[
  {
    "path": ".gitignore",
    "content": "# Ignore __pycache__ directories\n__pycache__/\n\n# Ignore .egg-info directories\n*.egg-info/\n\n# Ignore outputs directories\nscripts/outputs/\n\n# Ignore wandb directories\nscripts/wandb/\n"
  },
  {
    "path": "LICENSE",
    "content": "                                 Apache License\n                           Version 2.0, January 2004\n                        http://www.apache.org/licenses/\n\n   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION\n\n   1. Definitions.\n\n      \"License\" shall mean the terms and conditions for use, reproduction,\n      and distribution as defined by Sections 1 through 9 of this document.\n\n      \"Licensor\" shall mean the copyright owner or entity authorized by\n      the copyright owner that is granting the License.\n\n      \"Legal Entity\" shall mean the union of the acting entity and all\n      other entities that control, are controlled by, or are under common\n      control with that entity. For the purposes of this definition,\n      \"control\" means (i) the power, direct or indirect, to cause the\n      direction or management of such entity, whether by contract or\n      otherwise, or (ii) ownership of fifty percent (50%) or more of the\n      outstanding shares, or (iii) beneficial ownership of such entity.\n\n      \"You\" (or \"Your\") shall mean an individual or Legal Entity\n      exercising permissions granted by this License.\n\n      \"Source\" form shall mean the preferred form for making modifications,\n      including but not limited to software source code, documentation\n      source, and configuration files.\n\n      \"Object\" form shall mean any form resulting from mechanical\n      transformation or translation of a Source form, including but\n      not limited to compiled object code, generated documentation,\n      and conversions to other media types.\n\n      \"Work\" shall mean the work of authorship, whether in Source or\n      Object form, made available under the License, as indicated by a\n      copyright notice that is included in or attached to the work\n      (an example is provided in the Appendix below).\n\n      \"Derivative Works\" shall mean any work, whether in Source or Object\n      form, that is based on (or derived from) the Work and for which the\n      editorial revisions, annotations, elaborations, or other modifications\n      represent, as a whole, an original work of authorship. For the purposes\n      of this License, Derivative Works shall not include works that remain\n      separable from, or merely link (or bind by name) to the interfaces of,\n      the Work and Derivative Works thereof.\n\n      \"Contribution\" shall mean any work of authorship, including\n      the original version of the Work and any modifications or additions\n      to that Work or Derivative Works thereof, that is intentionally\n      submitted to Licensor for inclusion in the Work by the copyright owner\n      or by an individual or Legal Entity authorized to submit on behalf of\n      the copyright owner. For the purposes of this definition, \"submitted\"\n      means any form of electronic, verbal, or written communication sent\n      to the Licensor or its representatives, including but not limited to\n      communication on electronic mailing lists, source code control systems,\n      and issue tracking systems that are managed by, or on behalf of, the\n      Licensor for the purpose of discussing and improving the Work, but\n      excluding communication that is conspicuously marked or otherwise\n      designated in writing by the copyright owner as \"Not a Contribution.\"\n\n      \"Contributor\" shall mean Licensor and any individual or Legal Entity\n      on behalf of whom a Contribution has been received by Licensor and\n      subsequently incorporated within the Work.\n\n   2. Grant of Copyright License. Subject to the terms and conditions of\n      this License, each Contributor hereby grants to You a perpetual,\n      worldwide, non-exclusive, no-charge, royalty-free, irrevocable\n      copyright license to reproduce, prepare Derivative Works of,\n      publicly display, publicly perform, sublicense, and distribute the\n      Work and such Derivative Works in Source or Object form.\n\n   3. Grant of Patent License. Subject to the terms and conditions of\n      this License, each Contributor hereby grants to You a perpetual,\n      worldwide, non-exclusive, no-charge, royalty-free, irrevocable\n      (except as stated in this section) patent license to make, have made,\n      use, offer to sell, sell, import, and otherwise transfer the Work,\n      where such license applies only to those patent claims licensable\n      by such Contributor that are necessarily infringed by their\n      Contribution(s) alone or by combination of their Contribution(s)\n      with the Work to which such Contribution(s) was submitted. If You\n      institute patent litigation against any entity (including a\n      cross-claim or counterclaim in a lawsuit) alleging that the Work\n      or a Contribution incorporated within the Work constitutes direct\n      or contributory patent infringement, then any patent licenses\n      granted to You under this License for that Work shall terminate\n      as of the date such litigation is filed.\n\n   4. Redistribution. You may reproduce and distribute copies of the\n      Work or Derivative Works thereof in any medium, with or without\n      modifications, and in Source or Object form, provided that You\n      meet the following conditions:\n\n      (a) You must give any other recipients of the Work or\n          Derivative Works a copy of this License; and\n\n      (b) You must cause any modified files to carry prominent notices\n          stating that You changed the files; and\n\n      (c) You must retain, in the Source form of any Derivative Works\n          that You distribute, all copyright, patent, trademark, and\n          attribution notices from the Source form of the Work,\n          excluding those notices that do not pertain to any part of\n          the Derivative Works; and\n\n      (d) If the Work includes a \"NOTICE\" text file as part of its\n          distribution, then any Derivative Works that You distribute must\n          include a readable copy of the attribution notices contained\n          within such NOTICE file, excluding those notices that do not\n          pertain to any part of the Derivative Works, in at least one\n          of the following places: within a NOTICE text file distributed\n          as part of the Derivative Works; within the Source form or\n          documentation, if provided along with the Derivative Works; or,\n          within a display generated by the Derivative Works, if and\n          wherever such third-party notices normally appear. The contents\n          of the NOTICE file are for informational purposes only and\n          do not modify the License. You may add Your own attribution\n          notices within Derivative Works that You distribute, alongside\n          or as an addendum to the NOTICE text from the Work, provided\n          that such additional attribution notices cannot be construed\n          as modifying the License.\n\n      You may add Your own copyright statement to Your modifications and\n      may provide additional or different license terms and conditions\n      for use, reproduction, or distribution of Your modifications, or\n      for any such Derivative Works as a whole, provided Your use,\n      reproduction, and distribution of the Work otherwise complies with\n      the conditions stated in this License.\n\n   5. Submission of Contributions. Unless You explicitly state otherwise,\n      any Contribution intentionally submitted for inclusion in the Work\n      by You to the Licensor shall be under the terms and conditions of\n      this License, without any additional terms or conditions.\n      Notwithstanding the above, nothing herein shall supersede or modify\n      the terms of any separate license agreement you may have executed\n      with Licensor regarding such Contributions.\n\n   6. Trademarks. This License does not grant permission to use the trade\n      names, trademarks, service marks, or product names of the Licensor,\n      except as required for reasonable and customary use in describing the\n      origin of the Work and reproducing the content of the NOTICE file.\n\n   7. Disclaimer of Warranty. Unless required by applicable law or\n      agreed to in writing, Licensor provides the Work (and each\n      Contributor provides its Contributions) on an \"AS IS\" BASIS,\n      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or\n      implied, including, without limitation, any warranties or conditions\n      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A\n      PARTICULAR PURPOSE. You are solely responsible for determining the\n      appropriateness of using or redistributing the Work and assume any\n      risks associated with Your exercise of permissions under this License.\n\n   8. Limitation of Liability. In no event and under no legal theory,\n      whether in tort (including negligence), contract, or otherwise,\n      unless required by applicable law (such as deliberate and grossly\n      negligent acts) or agreed to in writing, shall any Contributor be\n      liable to You for damages, including any direct, indirect, special,\n      incidental, or consequential damages of any character arising as a\n      result of this License or out of the use or inability to use the\n      Work (including but not limited to damages for loss of goodwill,\n      work stoppage, computer failure or malfunction, or any and all\n      other commercial damages or losses), even if such Contributor\n      has been advised of the possibility of such damages.\n\n   9. Accepting Warranty or Additional Liability. While redistributing\n      the Work or Derivative Works thereof, You may choose to offer,\n      and charge a fee for, acceptance of support, warranty, indemnity,\n      or other liability obligations and/or rights consistent with this\n      License. However, in accepting such obligations, You may act only\n      on Your own behalf and on Your sole responsibility, not on behalf\n      of any other Contributor, and only if You agree to indemnify,\n      defend, and hold each Contributor harmless for any liability\n      incurred by, or claims asserted against, such Contributor by reason\n      of your accepting any such warranty or additional liability.\n\n   END OF TERMS AND CONDITIONS\n\n   APPENDIX: How to apply the Apache License to your work.\n\n      To apply the Apache License to your work, attach the following\n      boilerplate notice, with the fields enclosed by brackets \"[]\"\n      replaced with your own identifying information. (Don't include\n      the brackets!)  The text should be enclosed in the appropriate\n      comment syntax for the file format. We also recommend that a\n      file or class name and description of purpose be included on the\n      same \"printed page\" as the copyright notice for easier\n      identification within third-party archives.\n\n   Copyright [yyyy] [name of copyright owner]\n\n   Licensed under the Apache License, Version 2.0 (the \"License\");\n   you may not use this file except in compliance with the License.\n   You may obtain a copy of the License at\n\n       http://www.apache.org/licenses/LICENSE-2.0\n\n   Unless required by applicable law or agreed to in writing, software\n   distributed under the License is distributed on an \"AS IS\" BASIS,\n   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n   See the License for the specific language governing permissions and\n   limitations under the License.\n"
  },
  {
    "path": "README.md",
    "content": "<h3 align=\"center\">\n    🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉<br>\n     <font color=\"red\"><b>Check out our latest progress</b></font> of new offline RL algorithm for Android <a href=\"https://digirl-agent.github.io/DigiQ-agent.github.io/\"><b>DigiQ</b></a> and autonomous skill discovery for web agents <a href=\"https://yanqval.github.io/PAE/\"><b>Proposer-Agent-Evaluator</b></a>. <br>\n    🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉\n</h3>\n\n<p align=\"center\">\n    <img src=\"./assets/digirl-logo-text.png\" alt=\"logo\" width=\"50%\">\n</p>\n\n<h3 align=\"center\">\nDigiRL: Training In-The-Wild Device-Control Agents with Autonomous Reinforcement Learning\n<br>\n<b>Oral @ <a href=\"https://icml-fm-wild.github.io/\">FM Wild</a>, ICML</b>\n    <br>\n    <b>Neurips 2024</b>\n</h3>\n\n\n\n<p align=\"center\">\n| <a href=\"https://digirl-agent.github.io/\"><b>Website | Demo | Results</b></a> | <a href=\"https://arxiv.org/abs/2406.11896\"><b>Paper</b></a> | <a href=\"https://drive.google.com/drive/folders/14Iu6lAHePQ2qG0ghYkVG1RG6RUu7e2Hz?usp=sharing\"><b>Checkpoints | Data</b></a> |\n</p>\n\n---\n\nResearch Code for preprint \"DigiRL: Training In-The-Wild Device-Control Agents with Autonomous Reinforcement Learning\".\n\n[Hao Bai*](https://jackgethome.com), [Yifei Zhou*](https://<username>02.github.io/), [Mert Cemri](https://scholar.google.com/citations?user=sMEFwf8AAAAJ&hl=en), [Jiayi Pan](https://www.jiayipan.me/), [Alane Suhr](https://www.alanesuhr.com/), [Sergey Levine](https://people.eecs.berkeley.edu/~svlevine/), [Aviral Kumar](https://aviralkumar2907.github.io/)<br>\nUC Berkeley, UIUC, Google DeepMind\n<br>\n*Equal contribution, alphabetic order; work done at UC Berkeley\n\n<p align=\"center\">\n    <img src=\"./assets/digirl-diagram.png\" alt=\"digirl-diagram\" width=\"70%\">\n</p>\n\n## 🍩 Features\n\n### Environment Features\n\n- Auto-adaptive error handling support.\n- Multi-machine [emulation parallel](multimachine/README.md) support.\n- Checkpoint resuming support.\n- Trajectory video recording support.\n\n### Approach Features\n\n- Two training algorithms proposed in the paper\n  - DigiRL (automatic curriculum + doubly robust estimator filtering).\n  - Filtered Behavior Cloning (reward-based filtering).\n- Three training modes: \n  - Offline-only training: baseline apporach - use the AutoUI checkpoint to collect data (we have this data ready for you), then train with these pre-collected sub-optimal trajectories. This mode only allows evaluation using the checkpoint.\n  - Online-only training: traditional RL approach - the AutoUI checkpoint simultaneously interacts with the environment learns online. This mode allows interactive training.\n  - Offline-to-online training: the most powerful approach as evaluated in paper - the AutoUI checkpoint first learns the pre-collected data, then simultanesouly interacts with the environment and do online learning starting from this checkpoint. This mode allows interactive training\n- Two agents:\n  - [AutoUI](https://arxiv.org/abs/2309.11436): we support both training (2 algorithms x 3 paradigms) and evaluation.\n  - [CogAgent](https://arxiv.org/abs/2312.08914): current only support evaluation, no training pipeline is supported.\n\n- Two [Android-in-the-Wild](https://arxiv.org/abs/2307.10088) task sets:\n  - AitW General: general browsing, opening apps.\n  - AitW Web Shopping: shopping on popular shopping websites.\n  - It'll also be interesting to explore the [other AitW subsets](https://github.com/google-research/google-research/tree/master/android_in_the_wild) or other task sets  if you have good candidates, please propose one in the issue.\n- DDP Multi-GPU training:\n  - We support `accelerate` for multi-GPU training. You can turn off this feature if you only have 1 GPU. It only takes **12GB** of GPU memory for AutoUI running the DigiRL algorithm, but we provide this feature in case you want to play with something larger.\n\n\n## 🚀 Quick Start\n### Dependencies\n\nFirst, create a [conda](https://conda.io/projects/conda/en/latest/user-guide/install/index.html) environment and install all pip package requirements.\n\n```bash\nconda create -n digirl python==3.10\nconda activate digirl\n\ngit clone https://github.com/DigiRL-agent/digirl.git\ncd digirl\npip install -e .\n```\n\n### Environment Setup\n\nTo set up the Android environment for the DigiRL/filtered BC to interact with, refer to [the environment README](./env_setup/README.md). Before moving on, you should be able to view [this screenshot](./env_setup/screenshot.png) by running [this script](./env_setup/screenshot.py).\n\n### Model checkpoint and Datasets\n\nThe SFT checkpoint of the AutoUI model was released here and we use it:\n\n- [AutoUI SFT checkpoint](https://huggingface.co/cooelf/Auto-UI)\n\nSimply download `Auto-UI-Base.zip`, then unzip to a directory.\n\n```bash\ncd <path_to_autoui_dir>\nwget https://huggingface.co/cooelf/Auto-UI/resolve/main/Auto-UI-Base.zip\nunzip Auto-UI-Base.zip\n# wait...\nls Auto-UI-Base\n# config.json             pytorch_model.bin        tokenizer.json         training_args.bin\n# generation_config.json  special_tokens_map.json  tokenizer_config.json\n```\n\nWe provide the pre-collected trajectories using this SFT checkpoint:\n\n- [Trajectories of SFT'ed AutoUI](https://drive.google.com/drive/folders/1ud1XyzCfh0257CixxdgLjjpX59jYbhfU?usp=sharing)\n\nThe Google Drive folder contains 4 files, with stats below (you can use `gdown` to download the checkpoint you want):\n\n| File Name | #Trajectories | Horizon | File Size |\n|-----------|---------------|---------|-----------|\n| `general-off2on-zeroshot-trajectories.pt` | 608 | 10 | 95.5M |\n| `general-offline-zeroshot-trajectories.pt` | 1552 | 10 | 243.9M |\n| `webshop-off2on-zeroshot-trajectories.pt` | 528 | 20 | 115.2M |\n| `webshop-offline-zeroshot-trajectories.pt` | 1296 | 20 | 297.5M |\n\nwhere `general/webshop` mean the AitW General/Web Shopping subset, `off2on/offline` means whether the data is used for offline learning or offline-to-online learning. To make a fair comparison, offline learning should use the similar amount of data that offline-to-online learning finally uses.\n\nStore these files into a directory:\n\n```bash\nmkdir ~/data && cd ~/data\n# copy the .pt file here\n```\n\nIf you want to use our final offline-to-online checkpoints to reproduce scores in the paper, you can also download from Google Drive. We release the first offline-to-online checkpoint (`run1` in paper) for each algorithm in each environment:\n\n\n- [AutoUI DigiRL & online filtered BC checkpoint](https://drive.google.com/drive/folders/13jkIgWQ6JCcaTsfG_AWdgxE1qO4c2imJ?usp=sharing)\n\nThe Google Drive folder also contains 4 files:\n\n| File Name | Index in Paper | Test Set Score | File Size |\n|-----------|---------------|---------|---------|\n| `general-off2on-digirl.zip` | `run1` | 70.8 | 1.9G |\n| `general-off2on-filteredbc.zip` | `run1` | 59.4 | 1.9G |\n| `webshop-off2on-digirl.zip` | `run1` | 75.0 | 1.9G |\n| `webshop-off2on-filteredbc.zip` | `run1` | 55.2 | 1.9G |\n\nYou can also access through [Huggingface](https://huggingface.co/collections/JackBAI/digirl-checkpoints-6682ea42bdfb5af9bfc5f29f).\n\nNote that these checkpoints only allows evaluation because we only release the AutoUI checkpoint, not the optimizer states.\n\n### Modify Configurations\n\nThen change the `huggingface_token`, `wandb_token`, `gemini_token`, etc. in `scripts/config/main/default.yaml`, note that you need to specify **all entries** left blank or `<username>` for you in this file. This config is the default configuration - you also need to specify the subconfiguration - for example, if you want to run the online algorithm, you should also examine what to modify in `scripts/config/main/digirl_online`. Feel free to DIY your configs and play with the code!\n\n**Note: to load existing checkpoints, modify `save_path` instead of `policy_lm`.** That is, `policy_lm` should still be the path to the AutoUI checkpoint.\n\n### Run Experiments\n\nAfter modifying the config to what you like, you can now run experiments with the following commands:\n\n```bash\ncd scripts\npython run.py --config-path config/main --config-name digirl_online\n```\n\nThe file `run.py` is the entrance of the program, and you can pass the config name to run different experiments. The config file is in `scripts/config/` directory.\n\n### Main Results Reproduction\n\nTo reproduce the results in Table 1 of our paper, first download the corresponding checkpoints as described above. As the results in the training set are obtained by randomly sampling tasks, we recommend reproducing the test results (which are obtained by sequentially sampling the first 96 trajectories).\n\nTo do this, modify the [`eval_only.yaml`](https://github.com/DigiRL-agent/digirl/blob/master/scripts/config/main/default.yaml) config file and its parent ['default.yaml'](https://github.com/DigiRL-agent/digirl/blob/master/scripts/config/main/default.yaml) config file to experiment settings. For instance, you can modify these configs for reproduction:\n\n1. `default.yaml`\n    1. Set `task_split: \"test\"` and `eval_sample_mode: \"sequential\"`\n    2. Don't forget to increase `max_steps` to `20` if `task_set` is set to `webshop` (as the webshop tasks usually need more steps than the general tasks to complete).\n2. `eval_only.yaml`\n    1. Make sure `rollout_size` (in `default.yaml`) * `eval_iterations` (in `eval_only.yaml`) = 96. For example, `rollout_size (16) * eval_iterations (6) = 96`.\n\n### (Optional) CogAgent server\n\nThe way we set CogAgent up is using a Gradio-based API approach, which means that you need to setup CogAgent inference service on a server, then use our code to query that API. To set up CogAgent, refer to the GitHub Page of project [AutoEval](https://github.com/Berkeley-NLP/Agent-Eval-Refine/blob/main/exps/android_exp/README.md) by [Jiayi Pan](https://www.jiayipan.me/). \n\nGrab the link and modify that in `scripts/config/cogagent/default.yaml` file. You need at least one GPU with 48GB memory to host CogAgent for inference.\n\n### (Optional) Multi-machine Emulation Parallel\n\nIf you want to launch large scale emulation (say more than 32 emulators running at the same time), you'll need multiple machines that collects trajectories at the same time. Refer to the [multimachine-training README](multimachine/README.md) for details.\n\n### (Optional) Multi-GPU DDP Training\n\nWe use `accelerate` for multi-GPU DDP training. To enable, you need to identify the number of GPUs on your machine in the [accelerate config](scripts/config/accelerate_config/default_config.yaml). If you model is extremely large, it's also possible to do multi-machine DDP training but we currently don't support it.\n\nTo enable this, the only thing you need to do is to replace `python run.py` with `accelerate launch --config_file <config_file> run.py`. An example below:\n\n```\naccelerate launch --config_file config/accelerate_config/default_config.yaml run.py --config-path config/main --config-name digirl_off2on\n```\n\nYou should be able to see a much faster learning speed if you've successfully set this up.\n\n## Trouble Shooting (IMPORTANT)\n\n1. If you frequently get the `Error in environment reset` error, you can try increasing the timeout at [this line](https://github.com/DigiRL-agent/digirl/blob/5b77663c3c3f19932cdb9ceb6fe0474c7b28a0b7/digirl/environment/env_utils.py#L59). \n2. If you frequently get the `409 resource exhausted` error, try adding a `sleep()` function within the `call_gemini()` function [here](https://github.com/DigiRL-agent/digirl/blob/3896fda9d2e31081234f8b716e9049f6a2d6a7f8/digirl/environment/android/evaluate.py#L161). FYI, a free-tier Gemini API fits `sleep(2)` very well.\n3. If you see AVD copying errors (started with `shutil.error`), you can safely ignore it unless the location copying to is empty.\n\n## 🌟 Contribution\n\nWe welcome the open-source community to contribute to this project. If you invented an algorithm, or you support other types of base models, please propose a PR or issue. Example topics:\n\n- [ ] Other algorithms like PPO or any algorithm you invented.\n- [ ] Other base models like LLaVA.\n- [ ] Other task sets like WebArena.\n- [ ] Potential sub-optimal implementations.\n\n## 📄 License\n\nAll content of this work is under [Apache License v2.0](https://github.com/DigiRL-agent/digirl/blob/master/LICENSE), including codebase, data, and model checkpoints.\n\n## 📚 Citation\n\nConsider citing our paper!\n\n```\n@article{bai2024digirl,\n  title={DigiRL: Training In-The-Wild Device-Control Agents with Autonomous Reinforcement Learning},\n  author={Bai, Hao and Zhou, Yifei and Cemri, Mert and Pan, Jiayi and Suhr, Alane and Levine, Sergey and Kumar, Aviral},\n  journal={arXiv preprint arXiv:2406.11896},\n  year={2024}\n}\n```\n"
  },
  {
    "path": "__init__.py",
    "content": ""
  },
  {
    "path": "digirl/algorithms/__init__.py",
    "content": "from .offpolicy_train_loop import offpolicy_train_loop\nfrom .eval_loop import eval_loop\nfrom .worker_collect_loop import worker_collect_loop\nfrom .parallel_utils import remote_collect_trajectories"
  },
  {
    "path": "digirl/algorithms/digirl/__init__.py",
    "content": "from .trainer import DigiRLTrainer"
  },
  {
    "path": "digirl/algorithms/digirl/trainer.py",
    "content": "import torch\nfrom tqdm import tqdm\nfrom torch.utils.data import DataLoader\nfrom digirl.data import DummyDataset\nimport random\n\ndef dict_mean(dict_list):\n    mean_dict = {}\n    if len(dict_list) > 0:\n        for key in dict_list[0].keys():\n            if \"min\" in key:\n                mean_dict[key] = min(d[key] for d in dict_list)\n            elif \"max\" in key:\n                mean_dict[key] = max(d[key] for d in dict_list)\n            else:\n                mean_dict[key] = sum(d[key] for d in dict_list) / len(dict_list)\n    return mean_dict\n\nclass DigiRLTrainer():\n    def __init__(self, agent,\\\n                 accelerator,\\\n                    tokenizer,\\\n                    critic_lr: float = 1e-3,\\\n                    lm_lr: float = 1e-5,\\\n                    grad_accum_steps: int = 8,\\\n                    gamma: float = 0.9,\n                    tau: float = 0.1,\n                    epochs: int = 3,\n                    max_grad_norm: float=0.01,\n                    actor_epochs: int = 3,\n                    trajectory_critic_epochs: int = 3,):\n        \"\"\"\n        beta: coefficient for the bc loss\n        \"\"\"\n        super().__init__()\n        self.agent = agent\n        self.tokenizer = tokenizer\n        self.lm_optimizer = torch.optim.Adam(agent.model.parameters(), lr = lm_lr)\n        self.critic_optimizer = torch.optim.Adam(agent.critic.parameters(), lr = critic_lr)\n        self.trajectory_critic_optimizer = torch.optim.Adam(agent.trajectory_critic.parameters(), lr = critic_lr)\n        self.criterion = torch.nn.CrossEntropyLoss()\n        self.grad_accum_steps = grad_accum_steps\n        self.actor_epochs = actor_epochs\n        self.gamma = gamma\n        self.epochs = epochs\n        self.trajectory_critic_epochs = trajectory_critic_epochs\n        self.step = 0\n        self.tau = tau\n        self.max_grad_norm = max_grad_norm\n        self.accelerator = accelerator\n        self.softmax = torch.nn.Softmax(dim = -1)\n\n    def prepare(self):\n        self.lm_optimizer = self.accelerator.prepare(self.lm_optimizer)\n        self.critic_optimizer = self.accelerator.prepare(self.critic_optimizer)\n        self.trajectory_critic_optimizer = self.accelerator.prepare(self.trajectory_critic_optimizer)\n    \n    def trajectory_critic_loss(self, observation, mc_return, validation = False, **kwargs):\n        with torch.autograd.set_detect_anomaly(True):\n            mc_return = torch.Tensor(mc_return).to(self.accelerator.unwrap_model(self.agent.model).device, dtype = self.accelerator.unwrap_model(self.agent.model).dtype).flatten()\n            v = self.agent.trajectory_critic(observation, detach_model=False)\n            regression_target = (mc_return > 0).long()\n            v_loss = self.criterion(v, regression_target)\n            v_acc = (v.argmax(dim = 1) == regression_target).float().mean()\n            if not validation:\n                self.accelerator.backward(v_loss)\n            v_loss = v_loss.detach().cpu()\n            v_acc = v_acc.detach().cpu()\n            mc_return = mc_return.detach().cpu()\n            v = self.softmax(v)[:, 1]\n        info = {\"trajectory.v1.loss\": v_loss,\\\n                \"trajectory.v1.acc\": v_acc,\\\n                \"trajectory.v1.mean\": torch.mean(v),\\\n                \"trajectory.v1.min\": torch.min(v),\\\n                \"trajectory.v1.max\": torch.max(v),\\\n                \"trajectory.v1.std\": torch.std(v),\\\n                \"mc_return.mean\": torch.mean(mc_return),\n                \"mc_return.max\": torch.max(mc_return),\n                \"mc_return.min\": torch.min(mc_return),\n                \"mc_return.std\": torch.std(mc_return),\n                }\n        if validation:\n            validation_info = {}\n            for k,v in info.items():\n                validation_info[\"validation.\"+k] = v\n            return validation_info\n        return info\n\n    def critic_loss(self, observation, image_features, action, reward, next_observation, next_image_features,done, mc_return,\n                    validation = False, **kwargs):\n        reward = torch.Tensor(reward).to(self.accelerator.unwrap_model(self.agent.model).device, dtype = self.accelerator.unwrap_model(self.agent.model).dtype).flatten()\n        done = torch.Tensor(done).to(self.accelerator.unwrap_model(self.agent.model).device, dtype = self.accelerator.unwrap_model(self.agent.model).dtype).flatten()\n        mc_return = torch.Tensor(mc_return).to(self.accelerator.unwrap_model(self.agent.model).device, dtype = self.accelerator.unwrap_model(self.agent.model).dtype).flatten()\n        v1, v2 = self.agent.critic(observation, image_features, action, detach_model=False)\n        nv1, nv2 = self.agent.critic(next_observation, next_image_features, action, detach_model=False)\n\n        v1 = v1.reshape(-1, 2)\n        v2 = v2.reshape(-1, 2)\n        nv1 = nv1.reshape(-1, 2)\n        nv2 = nv2.reshape(-1, 2)\n        regression_target = (mc_return > 0).long()\n        v1_loss = self.criterion(v1, regression_target)\n        v1_acc = (v1.argmax(dim = 1) == regression_target).float().mean()\n        v2_loss = self.criterion(v2, regression_target)\n        v2_acc = (v2.argmax(dim = 1) == regression_target).float().mean()\n        nv1_loss = self.criterion(nv1, regression_target)\n        nv2_loss = self.criterion(nv2, regression_target)\n        if not validation:\n            self.accelerator.backward(v1_loss + v2_loss + nv1_loss + nv2_loss)\n        v1_loss, v2_loss = v1_loss.detach().cpu(), v2_loss.detach().cpu()\n        v1_acc, v2_acc = v1_acc.detach().cpu(), v2_acc.detach().cpu()\n\n        #calculate the probability for logging purpose\n        v1 = self.softmax(v1)[:, 1]\n        v2 = self.softmax(v2)[:, 1]\n        info = {\"v1.loss\": v1_loss,\\\n                \"v2.loss\": v2_loss,\\\n                \"v1.acc\": v1_acc,\\\n                \"v2.acc\": v2_acc,\\\n                \"v1.mean\": torch.mean(v1),\\\n                \"v1.min\": torch.min(v1),\\\n                \"v1.max\": torch.max(v1),\\\n                \"v1.std\": torch.std(v1),\n                \"v2.mean\": torch.mean(v2),\n                \"v2.max\": torch.max(v2),\n                \"v2.min\": torch.min(v2),\n                \"v2.std\": torch.std(v2),\n                }\n        if validation:\n            validation_info = {}\n            for k,v in info.items():\n                validation_info[\"validation.\"+k] = v\n            return validation_info\n        return info\n\n    def actor_loss(self, observation, action, image_features, next_observation, next_image_features, mc_return, pi_action, advantage, reward,\n                   validation=False,**kwargs):\n        mc_return = torch.Tensor(mc_return).to(self.accelerator.unwrap_model(self.agent.model).device, dtype = self.accelerator.unwrap_model(self.agent.model).dtype).flatten()\n        reward = torch.Tensor(reward).to(self.accelerator.unwrap_model(self.agent.model).device, dtype = self.accelerator.unwrap_model(self.agent.model).dtype).flatten()\n        with torch.no_grad():\n            v1, v2 = self.agent.critic(observation, image_features, action, detach_model=False)\n            nv1, nv2 = self.agent.critic(next_observation, next_image_features, action, detach_model=False)\n        v1 = self.softmax(v1)[:, 1]\n        v2 = self.softmax(v2)[:, 1]\n        nv1 = self.softmax(nv1)[:, 1]\n        nv2 = self.softmax(nv2)[:, 1]\n        v = torch.minimum(v1, v2).flatten()\n        nv = torch.minimum(nv1, nv2).flatten()\n        #TODO: set +1 so that the advantage is always positive\n        advantage = nv - v - 0.05 + reward + mc_return\n        advantage = torch.clamp(advantage, 0, 1)\n        advantage = (advantage > 0).to(dtype = self.accelerator.unwrap_model(self.agent.model).dtype)\n        image_features = image_features.to(self.agent.device)\n        log_prob = self.agent.get_log_prob(observation, image_features, action).sum(dim = 1).flatten()\n        advantage = torch.Tensor(advantage).to(self.accelerator.unwrap_model(self.agent.model).device, dtype = self.accelerator.unwrap_model(self.agent.model).dtype)\n        advantages = advantage.flatten()\n        values = torch.zeros_like(advantages)\n        residual_advantage = torch.zeros_like(advantages)\n        pg_loss = -torch.mean(log_prob.flatten()*advantages)\n        value_loss = torch.zeros_like(pg_loss)\n        if not validation:\n            self.accelerator.backward(pg_loss+value_loss)\n        advantages = advantages.detach().cpu()\n        info =  {\"pg.loss\": pg_loss.detach().cpu().item(),\n                \"values.loss\": value_loss.detach().cpu().item(),\n                \"values.mean\": values.mean(),\n                \"values.max\": torch.max(values),\n                \"values.min\": torch.min(values),\n                \"values.std\": torch.std(values),\n                \"advantages.mean\": advantages.mean(),\n                \"advantages.max\": torch.max(advantages),\n                \"advantages.min\": torch.min(advantages),\n                \"advantages.std\": torch.std(advantages),\n                \"residual_advantages.mean\": residual_advantage.mean(),\n                \"residual_advantages.max\": torch.max(residual_advantage),\n                \"residual_advantages.min\": torch.min(residual_advantage),\n                \"residual_advantages.std\": torch.std(residual_advantage),}\n        if validation:\n            validation_info = {}\n            for k,v in info.items():\n                validation_info[\"validation.\"+k] = v\n            return validation_info\n        return info\n\n    def update_trajectory_critic(self, trajectories, validation_trajectories = None):\n        info = {}\n        info_list = []\n        batch_size = 8\n        with torch.autograd.set_detect_anomaly(True):\n            for _ in tqdm(range(self.trajectory_critic_epochs), disable= not self.accelerator.is_main_process):\n                data = [{\"observation\": traj[0][\"observation\"], \"mc_return\": traj[-1][\"mc_return\"]} for traj in trajectories]\n                data = [random.sample(data, 1)[0] for _ in range(self.grad_accum_steps*batch_size)]\n                dataloader = DataLoader(DummyDataset(data), batch_size=batch_size)\n                dataloader = self.accelerator.prepare(dataloader)\n                self.trajectory_critic_optimizer.zero_grad()\n                for batch in tqdm(dataloader, disable=True):\n                    info_list.append(self.trajectory_critic_loss(**batch))\n                self.accelerator.clip_grad_norm_(self.agent.parameters(), self.max_grad_norm)\n                self.trajectory_critic_optimizer.step()\n        info.update(dict_mean(info_list))\n        if validation_trajectories is not None:\n            info_list = []\n            data = [{\"observation\": traj[0][\"observation\"], \"mc_return\": traj[-1][\"mc_return\"]} for traj in validation_trajectories]\n            data = [random.sample(data, 1)[0] for _ in range(self.grad_accum_steps*batch_size)]\n            dataloader = DataLoader(DummyDataset(data), batch_size=batch_size)\n            dataloader = self.accelerator.prepare(dataloader)\n            with torch.no_grad():\n                for batch in tqdm(dataloader, disable=True):\n                    info_list.append(self.trajectory_critic_loss(validation=True, **batch))\n            info.update(dict_mean(info_list))\n        return info\n\n    def update_critic(self, replay_buffer, validation_buffer = None):\n        self.step += 1\n        info = {}\n        info_list = []\n        with torch.autograd.set_detect_anomaly(True):\n            for _ in tqdm(range(self.epochs), disable= not self.accelerator.is_main_process):\n                data = [replay_buffer.sample(1) for _ in range(self.grad_accum_steps*replay_buffer.batch_size)]\n                for d in data:\n                    for k,v in d.items():\n                        d[k] = v[0]\n                dataloader = DataLoader(DummyDataset(data), batch_size=replay_buffer.batch_size)\n                dataloader = self.accelerator.prepare(dataloader)\n                self.critic_optimizer.zero_grad()\n                for batch in tqdm(dataloader, disable=True):\n                    info_list.append(self.critic_loss(**batch))\n                self.accelerator.clip_grad_norm_(self.agent.parameters(), self.max_grad_norm)\n                self.critic_optimizer.step()\n        info.update(dict_mean(info_list))\n        if validation_buffer is not None:\n            info_list = []\n            data = [validation_buffer.sample(1) for _ in range(self.grad_accum_steps*replay_buffer.batch_size)]\n            for d in data:\n                for k,v in d.items():\n                    d[k] = v[0]\n            dataloader = DataLoader(DummyDataset(data), batch_size=replay_buffer.batch_size)\n            dataloader = self.accelerator.prepare(dataloader)\n            with torch.no_grad():\n                for batch in tqdm(dataloader, disable=True):\n                    info_list.append(self.critic_loss(validation=True, **batch))\n            info.update(dict_mean(info_list))\n        return info\n        \n        \n    def update_policy(self, replay_buffer, validation_buffer = None, no_update_actor=False):\n        self.step += 1\n        info = {}\n        info_list = []\n        action_bsize = 2 if 'mistral' in self.agent.policy_lm else replay_buffer.batch_size\n        #update actor\n        if not no_update_actor:\n            print(\">>>updating actor\")\n            #batchsize for the actor set to 1 for mistral due to memory concern\n            # action_bsize = 2 if 'mistral' in self.agent.policy_lm else replay_buffer.batch_size\n            #action_bsize = replay_buffer.batch_size\n            for _ in tqdm(range(self.actor_epochs), disable= not self.accelerator.is_main_process):\n                data = [replay_buffer.sample(1) for _ in range(self.grad_accum_steps*replay_buffer.batch_size)]\n                grad_index = 0\n                for d in data:\n                    for k,v in d.items():\n                        d[k] = v[0]\n                dataloader = DataLoader(DummyDataset(data), batch_size=action_bsize, shuffle=False)\n                all_pi_actions = []\n                all_advantages = []\n                # import IPython; IPython.embed()\n                dataloader = self.accelerator.prepare(dataloader)\n                self.lm_optimizer.zero_grad()\n                for batch in dataloader:\n                    pi_action = None\n                    advantages = None\n                    info_list.append(self.actor_loss(**batch, pi_action=pi_action, advantage=advantages))\n                self.accelerator.clip_grad_norm_(self.agent.parameters(), self.max_grad_norm)\n                self.lm_optimizer.step()\n        info.update(dict_mean(info_list))\n        if validation_buffer is not None:\n            info_list = []\n            data = [validation_buffer.sample(1) for _ in range(self.grad_accum_steps*replay_buffer.batch_size)]\n            for d in data:\n                for k,v in d.items():\n                    d[k] = v[0]\n            dataloader = DataLoader(DummyDataset(data), batch_size=action_bsize, shuffle=False)\n            dataloader = self.accelerator.prepare(dataloader)\n            with torch.no_grad():\n                for batch in tqdm(dataloader, disable=True):\n                    info_list.append(self.actor_loss(validation=True, pi_action=None, advantage=None, **batch))\n            info.update(dict_mean(info_list))\n            return info\n        return info\n\n    def update(self, replay_buffer, validation_buffer = None, filtered_buffer = None, filtered_validation_buffer = None,no_update_actor=False):\n        if filtered_validation_buffer is None:\n            filtered_validation_buffer = validation_buffer\n        if filtered_buffer is None:\n            filtered_buffer = replay_buffer\n        info = {}\n        info.update(self.update_critic(replay_buffer, validation_buffer))\n        info.update(self.update_policy(filtered_buffer, filtered_validation_buffer,no_update_actor=no_update_actor))\n        return info\n\n    def save(self, path):\n        self.accelerator.save_state(path, safe_serialization=False)\n\n    def load(self, path):\n        self.accelerator.load_state(path)\n        \n        "
  },
  {
    "path": "digirl/algorithms/eval_loop.py",
    "content": "from digirl.environment import batch_interact_environment\nfrom digirl.algorithms.digirl import DigiRLTrainer\nfrom digirl.algorithms.filteredbc import BCTrainer\nimport numpy as np\nfrom digirl.misc import colorful_print\nimport wandb\nimport os\nimport torch\nimport time\n\ndef eval_loop(env,\\\n                agent,\\\n                accelerator,\\\n                tokenizer,\\\n                critic_lr,\\\n                lm_lr,\\\n                tau,\\\n                epochs,\\\n                actor_epochs,\\\n                grad_accum_steps,\\\n                max_grad_norm,\n                trajectory_critic_epochs,\n                gamma=None,\\\n                train_algorithm=None,\\\n                rollout_size: int = 50,\\\n                eval_iterations: int = 10,\\\n                use_wandb: bool = False,\n                save_path: str = None,\n                decode_f: callable = lambda x: x,\n                **kwargs):\n    if train_algorithm == \"digirl\":\n        print(\">>> Using DigiRL trainer\")\n        trainer = DigiRLTrainer(agent=agent,\\\n                            accelerator=accelerator,\\\n                                tokenizer=tokenizer,\\\n                                critic_lr = critic_lr,\\\n                                lm_lr = lm_lr,\\\n                                gamma = gamma,\\\n                                tau = tau,\\\n                                epochs = epochs,\\\n                                actor_epochs = actor_epochs,\n                                grad_accum_steps=grad_accum_steps,\n                                max_grad_norm=max_grad_norm,\n                                trajectory_critic_epochs = trajectory_critic_epochs)\n    elif train_algorithm == \"filteredbc\":\n        print(\">>> Using Filtered BC trainer\")\n        trainer = BCTrainer(agent=agent,\\\n                                tokenizer=tokenizer,\\\n                                accelerator=accelerator,\n                                lm_lr = lm_lr,\\\n                                epochs = actor_epochs,\\\n                                grad_accum_steps=grad_accum_steps,\n                                max_grad_norm=max_grad_norm)\n\n    agent.prepare()\n    # evaluation does not require optimizer\n    # trainer.prepare()\n\n    if os.path.exists(os.path.join(save_path, 'trainer.pt')):\n        print(\">>> Loading from previous checkpoint\")\n        trainer.load(os.path.join(save_path, 'trainer.pt'))\n    else:\n        print(\">>> No previous checkpoint found\")\n\n    colorful_print(\">>> Evaluating Agent\", fg='blue')\n    \n    all_trajectories = []\n    for i in range(eval_iterations):\n        trajectories = batch_interact_environment(agent = agent,\\\n                                            env = env,\\\n                                            num_trajectories= rollout_size,\\\n                                            accelerator = accelerator,\\\n                                            use_tqdm=False,\n                                            decode_f = decode_f,\n                                            gamma = gamma,\n                                            iter=i)\n        if accelerator.is_main_process:\n            info = {\"iteration\": i,\\\n                    \"rollout.mean\": np.mean([d[0][\"trajectory_reward\"] if len(d) > 0 else 0 for d in trajectories]),\\\n                    \"rollout.max\": np.max([d[0][\"trajectory_reward\"] if len(d) > 0 else 0 for d in trajectories]),\\\n                    \"rollout.min\": np.min([d[0][\"trajectory_reward\"] if len(d) > 0 else 0 for d in trajectories]),\\\n                    \"walltime\": time.time()}\n            all_trajectories += trajectories\n            \n            torch.save(all_trajectories, os.path.join(save_path, 'trajectories_eval.pt'))\n            time.sleep(15)\n        else:\n            info = {}\n        accelerator.wait_for_everyone()\n        all_trajectories = torch.load(os.path.join(save_path, 'trajectories_eval.pt'))\n        if use_wandb and accelerator.is_main_process:\n            wandb.log(info)\n            "
  },
  {
    "path": "digirl/algorithms/filteredbc/__init__.py",
    "content": "from .trainer import BCTrainer"
  },
  {
    "path": "digirl/algorithms/filteredbc/trainer.py",
    "content": "import torch\nimport transformers\nfrom tqdm import tqdm\nimport copy\nimport random\nfrom torch.utils.data import DataLoader\nfrom digirl.data import DummyDataset\ndef dict_mean(dict_list):\n    mean_dict = {}\n    if len(dict_list) > 0:\n        for key in dict_list[0].keys():\n            mean_dict[key] = sum(d[key] for d in dict_list) / len(dict_list)\n    return mean_dict\nclass BCTrainer():\n    def __init__(self, agent,\\\n                    tokenizer,\\\n                    accelerator,\\\n                    lm_lr: float = 1e-5,\\\n                    epochs: int = 3,\n                    max_grad_norm: float=0.01,\n                    grad_accum_steps: int = 8):\n        \"\"\"\n        beta: coefficient for the bc loss\n        \"\"\"\n        super().__init__()\n        self.agent = agent\n        self.tokenizer = tokenizer\n        self.lm_optimizer = torch.optim.Adam(agent.model.parameters(), lr = lm_lr)\n        self.criterion = torch.nn.MSELoss()\n        self.grad_accum_steps = grad_accum_steps\n        self.epochs = epochs\n        self.step = 0\n        self.max_grad_norm = max_grad_norm\n        self.accelerator = accelerator\n    \n    def prepare(self):\n        self.lm_optimizer = self.accelerator.prepare(self.lm_optimizer)\n\n    def actor_loss(self, observation, image_features, action, **kwargs):\n        # loss = plain_bc_loss(self.agent.model, self.tokenizer, observation, action)\n        image_features = image_features.to(self.agent.device)\n        loss = -self.agent.get_log_prob(observation, image_features, action).sum(dim = 1).mean()\n        self.accelerator.backward(loss)\n        return {\"bc.loss\": loss.detach().cpu().item()}\n\n    def update(self, replay_buffer, no_update_actor=False):\n        self.step += 1\n        info = {}\n        info_list = []\n        #update actor\n        if  not no_update_actor:\n            action_bsize = 1 if 'llama' in self.agent.policy_lm else replay_buffer.batch_size\n            for _ in range(self.epochs):\n                self.lm_optimizer.zero_grad()\n                data = [replay_buffer.sample(1) for _ in range(self.grad_accum_steps*replay_buffer.batch_size)]\n                grad_index = 0\n                for d in data:\n                    for k,v in d.items():\n                        d[k] = v[0]\n                dataloader = DataLoader(DummyDataset(data), batch_size=action_bsize, shuffle=False)\n                dataloader = self.accelerator.prepare(dataloader)\n                for batch in dataloader:\n                    info_list.append(self.actor_loss(**batch))\n                self.accelerator.clip_grad_norm_(self.agent.parameters(), self.max_grad_norm)\n                self.lm_optimizer.step()\n        info.update(dict_mean(info_list))\n        return info\n\n    def save(self, path):\n        self.accelerator.save_state(path, safe_serialization=False)\n        # torch.save({'model_state_dict': self.accelerator.unwrap_model(self.agent.model).state_dict(),\n        #             'critic_state_dict': self.accelerator.unwrap_model(self.agent.critic).state_dict(),\n        #             'target_critic_state_dict': self.accelerator.unwrap_model(self.agent.target_critic).state_dict(),\n        #             'critic_optimizer_state_dict': self.critic_optimizer.state_dict(),\n        #             'lm_optimizer_state_dict': self.lm_optimizer.state_dict()}, path)\n\n    def load(self, path):\n        self.accelerator.load_state(path)\n"
  },
  {
    "path": "digirl/algorithms/offpolicy_train_loop.py",
    "content": "from digirl.environment import batch_interact_environment\nfrom digirl.data import ReplayBuffer\nimport numpy as np\nfrom tqdm import tqdm\nfrom digirl.algorithms.digirl import DigiRLTrainer\nfrom digirl.algorithms.filteredbc import BCTrainer\nfrom digirl.misc import colorful_print\nimport wandb\nimport os\nimport torch\nimport time\nimport copy\nfrom digirl.environment.env_utils import add_mc_return\nfrom digirl.algorithms.parallel_utils import remote_collect_trajectories\n\ndef label_trajectories(trajectories, agent):\n    print(\"Labeling Trajectories\")\n    baselines = []\n    for i in range(0, len(trajectories), 16):\n        observations = [t[0][\"observation\"] for t in trajectories[i:i+16]]\n        with torch.no_grad():\n            v = agent.trajectory_critic(observations)\n            v = torch.nn.Softmax(dim = -1)(v)[:,1]\n            baselines.append(v.flatten())\n    baselines = torch.cat(baselines, dim = -1)\n    print(\"Done Labeling Trajectories\")\n    return torch.clamp(baselines.cpu(), 1e-4, 1-1e-4)\n\ndef framestack(all_trajectories):\n    new_trajectories = copy.deepcopy(all_trajectories)\n    for trajectory, new_trajectory in zip(all_trajectories, new_trajectories):\n        for i,(t, nt) in enumerate(zip(trajectory, new_trajectory)):\n            if i  == 0:\n                nt[\"image_features\"] = np.concatenate([t[\"image_features\"], t[\"image_features\"]], axis = -1)\n            else:\n                nt[\"image_features\"] = np.concatenate([trajectory[i-1][\"image_features\"], t[\"image_features\"]], axis = -1)\n            nt[\"next_image_features\"] = np.concatenate([t[\"image_features\"], t[\"next_image_features\"]], axis = -1)\n    return new_trajectories\n\ndef filterbc_buffer(all_trajectories, batch_size, capacity, agent):\n    trajectory_rewards = np.array([t[0][\"trajectory_reward\"] if len(t) > 0 else 0 for t in all_trajectories]).flatten()\n    cutoff = np.quantile(trajectory_rewards, 1 - 0.1)\n    top10 = np.argsort(trajectory_rewards)[-10:]\n    print(\"Top 10 Trajectories: \")\n    for i in top10:\n        print(all_trajectories[i][0][\"observation\"])\n        print(trajectory_rewards[i])\n    print(\"Cutoff: \", cutoff)\n    filtered_trajectories = []\n    for t, b in zip(all_trajectories, trajectory_rewards):\n        if b >= cutoff:\n            filtered_trajectories.append(t)\n    data = sum(filtered_trajectories, [])\n    filtered_buffer= ReplayBuffer(batch_size= batch_size, capacity=capacity)\n    for d in data:\n        filtered_buffer.insert(**d)\n    return filtered_buffer\n\n\ndef filter_buffer(all_trajectories, batch_size, capacity, agent):\n    baselines = label_trajectories(all_trajectories, agent).numpy().flatten()\n    trajectory_rewards = np.array([t[0][\"trajectory_reward\"] if len(t) > 0 else 0 for t in all_trajectories]).flatten()\n    baselines = trajectory_rewards - baselines\n    cutoff = np.quantile(baselines, 1 - 0.1)\n    top10 = np.argsort(baselines)[-10:]\n    print(\"Top 10 Trajectories: \")\n    for i in top10:\n        print(all_trajectories[i][0][\"observation\"])\n        print(baselines[i])\n    print(\"Cutoff: \", cutoff)\n    filtered_trajectories = []\n    for t, b in zip(all_trajectories, baselines):\n        if b >= cutoff:\n            filtered_trajectories.append(t)\n    data = sum(filtered_trajectories, [])\n    filtered_buffer= ReplayBuffer(batch_size= batch_size, capacity=capacity)\n    for d in data:\n        filtered_buffer.insert(**d)\n    return filtered_buffer\n\ndef offpolicy_train_loop(env,\\\n                agent,\\\n                tokenizer,\\\n                accelerator,\\\n                warmup_iter: int = 20,\n                rollout_size: int = 50,\\\n                batch_size: int = 2,\n                capacity: int = 500000,\n                train_iterations: int = 10,\\\n                epochs:int = 3, \\\n                grad_accum_steps: int = 1,\\\n                critic_lr: float= 1e-3,\\\n                lm_lr: float = 1e-5,\\\n                gamma: float = 0.9,\n                tau: float = 0.1,\n                use_wandb: bool = False,\n                actor_epochs: int = 3,\n                train_mode: str = None,\n                max_grad_norm: float = 0.01,\n                save_path: str = None,\n                save_freq: int = 25,\n                train_algorithm: str = \"digirl\",\n                decode_f: callable = lambda x: x,\n                offline_data_path: str = None,\n                offline_actor_iterations: int = 20,\n                offline_critic_iterations: int = 20,\n                offline_trajectory_critic_iterations: int = 20,\n                trajectory_critic_epochs: int = 5,\n                parallel: str = 'single',\n                worker_temp_path=None, \n                worker_run_path=None,\n                worker_ips=[], \n                worker_username=None,\n                **kwargs):\n\n    if train_algorithm == \"digirl\":\n        trainer = DigiRLTrainer(agent=agent,\\\n                            accelerator=accelerator,\\\n                                tokenizer=tokenizer,\\\n                                critic_lr = critic_lr,\\\n                                lm_lr = lm_lr,\\\n                                gamma = gamma,\\\n                                tau = tau,\\\n                                epochs = epochs,\\\n                                actor_epochs = actor_epochs,\n                                grad_accum_steps=grad_accum_steps,\n                                max_grad_norm=max_grad_norm,\n                                trajectory_critic_epochs = trajectory_critic_epochs)\n    elif train_algorithm == \"filteredbc\":\n        trainer = BCTrainer(agent=agent,\\\n                                tokenizer=tokenizer,\\\n                                accelerator=accelerator,\n                                lm_lr = lm_lr,\\\n                                epochs = actor_epochs,\\\n                                grad_accum_steps=grad_accum_steps,\n                                max_grad_norm=max_grad_norm)\n    replay_buffer= ReplayBuffer(batch_size= batch_size, capacity=capacity)\n    all_trajectories = []\n    \n    # prepare the model\n    agent.prepare()\n    # prepare the optimizers\n    trainer.prepare()\n\n    loaded_trajs = False\n    \n    # off-to-on\n    # no offline ckpt, no online ckpt -> offline training\n    # offline ckpt, no online ckpt -> online training\n    # offline ckpt, online ckpt -> resume online training\n    \n    # offline\n    # no resume supported\n    \n    # online\n    # no online ckpt -> online training\n    # online ckpt -> resume online training\n    \n    # omit this for online training\n    if offline_data_path is not None and train_mode != \"online\":\n        all_trajectories = torch.load(offline_data_path)\n        all_trajectories = framestack(all_trajectories)\n        print(f\"The number of offline trajectories is {len(all_trajectories)}\")\n        all_trajectories = [add_mc_return(t, gamma=gamma) for t in all_trajectories]\n        train_trajectories = all_trajectories[:int(len(all_trajectories)*0.8)]\n        val_trajectories = all_trajectories[int(len(all_trajectories)*0.8):]\n        loaded_trajs = 'scratch'\n        \n    # resume training from the saved checkpoint\n    if os.path.exists(os.path.join(save_path, 'trainer.pt')):\n        assert train_mode != \"offline\", \"Only online/off2on training can be resumed\"\n        trainer.load(os.path.join(save_path, 'trainer.pt'))\n        replay_buffer = torch.load(os.path.join(save_path, 'replay_buffer.pt'))\n        all_trajectories = torch.load(os.path.join(save_path, 'trajectories.pt'))\n        train_trajectories = torch.load(os.path.join(save_path, 'train_trajectories.pt'))\n        val_trajectories = torch.load(os.path.join(save_path, 'val_trajectories.pt'))\n        print(f\"The number of online trajectories is {len(all_trajectories)}\")\n        if use_wandb and accelerator.is_main_process:\n            print(\"Loading from checkpoint\")\n        loaded_trajs = 'resume'\n            \n    if not loaded_trajs:\n        train_trajectories = []\n        val_trajectories = []\n        all_trajectories = []\n\n    replay_buffer = ReplayBuffer(batch_size= batch_size, capacity=capacity)\n    validation_buffer = ReplayBuffer(batch_size= batch_size, capacity=capacity)\n\n    data = sum(train_trajectories, [])\n    val_data = sum(val_trajectories, [])\n    for d in data:\n        replay_buffer.insert(**d)\n    for d in val_data:\n        validation_buffer.insert(**d)\n    # offline training\n    if not os.path.exists(os.path.join(save_path, 'trainer.pt')):\n        #if nothing in the trainer only the offline trainer is saved\n        if os.path.exists(os.path.join(save_path, 'trainer_offline.pt')):\n            trainer.load(os.path.join(save_path, 'trainer_offline.pt'))\n            print(\"Loading from offline trainer\")\n        else:\n            if offline_data_path is not None and train_mode != \"online\":\n                print(\">>>Training Offline\")\n                info = {}\n                # offline training will never use the trajectory-level critic filter, so please use filterbc_buffer\n                filtered_buffer = filterbc_buffer(train_trajectories, batch_size, capacity, agent)\n                filtered_validation_buffer = filterbc_buffer(val_trajectories, batch_size, capacity, agent)\n                \n                if train_algorithm == \"filteredbc\":\n                    # filtered BC training phase\n                    for i in tqdm(range(offline_actor_iterations), disable=not accelerator.is_main_process):\n                        info.update(trainer.update(filtered_buffer))\n                        if use_wandb and accelerator.is_main_process:\n                            wandb.log(info)\n                elif train_algorithm == \"digirl\":\n                    # digirl training phase\n                    for i in tqdm(range(offline_trajectory_critic_iterations), disable=not accelerator.is_main_process):\n                        info.update(trainer.update_trajectory_critic(train_trajectories, val_trajectories))\n                        if use_wandb and accelerator.is_main_process:\n                            wandb.log(info)\n                    for i in tqdm(range(offline_critic_iterations), disable=not accelerator.is_main_process):\n                        info.update(trainer.update_critic(replay_buffer, validation_buffer))\n                        if use_wandb and accelerator.is_main_process:\n                            wandb.log(info)\n\n                    print(\">>>Training Policy\")\n                    for i in tqdm(range(offline_actor_iterations), disable=not accelerator.is_main_process):\n                        info.update(trainer.update_policy(filtered_buffer, filtered_validation_buffer))\n                        if use_wandb and accelerator.is_main_process:\n                            wandb.log(info)\n                if accelerator.is_main_process:\n                    trainer.save(os.path.join(save_path, 'trainer_offline.pt'))\n\n    if accelerator.is_main_process:\n        print(\">>>start iterations\")\n    if loaded_trajs == \"resume\":\n        resume_iter = len(all_trajectories) // rollout_size\n    else:\n        resume_iter = 0\n    \n    progress_bar = tqdm(total=train_iterations, initial=resume_iter)\n    \n    for i in range(resume_iter, train_iterations):\n        assert train_mode != \"offline\", \"Only online/off2on need to iteractively train; offline should directly go to eval loop after training\"\n        if parallel == 'single':\n            trajectories = batch_interact_environment(agent = agent,\\\n                                                env = env,\\\n                                                num_trajectories= rollout_size,\\\n                                                accelerator = accelerator,\\\n                                                use_tqdm=False,\n                                                decode_f = decode_f,\n                                                gamma = gamma,\n                                                iter=i)\n        elif parallel == 'host':\n            if i == 0:\n                if not os.path.exists(save_path):\n                    os.makedirs(save_path)\n            trajectories = remote_collect_trajectories(save_path=save_path, \n                                                       worker_temp_path=worker_temp_path, \n                                                       worker_run_path=worker_run_path,\n                                                       worker_ips=worker_ips, \n                                                       worker_username=worker_username,\n                                                       trainer=trainer)\n        \n        trajectories = framestack(trajectories)\n        if accelerator.is_main_process:\n            info = {\"iteration\": i,\\\n                    \"rollout.mean\": np.mean([d[0][\"trajectory_reward\"] if len(d) > 0 else 0 for d in trajectories]),\\\n                    \"rollout.max\": np.max([d[0][\"trajectory_reward\"] if len(d) > 0 else 0 for d in trajectories]),\\\n                    \"rollout.min\": np.min([d[0][\"trajectory_reward\"] if len(d) > 0 else 0 for d in trajectories]),\\\n                    \"walltime\": time.time()}\n            all_trajectories += trajectories\n            colorful_print(f\">>> length of all_trajectories: {len(trajectories)}\", fg='green')\n            new_train_trajectories = trajectories[:int(len(trajectories)*0.8)]\n            new_val_trajectories = trajectories[int(len(trajectories)*0.8):]\n            train_trajectories += new_train_trajectories\n            val_trajectories += new_val_trajectories\n            data = sum(new_train_trajectories, [])\n            val_data = sum(new_val_trajectories, [])\n            for d in data:\n                replay_buffer.insert(**d)\n            for d in val_data:\n                validation_buffer.insert(**d)\n        \n            info.update({\"rollout.reward.mean\": np.mean([d[\"reward\"] for d in data]),\\\n                    \"rollout.reward.max\": np.max([d[\"reward\"] for d in data]),\\\n                    \"rollout.reward.min\": np.min([d[\"reward\"] for d in data])})\n            print(\">>> Saving Replay Buffer\")\n            torch.save(replay_buffer, os.path.join(save_path, 'replay_buffer.pt'))\n            torch.save(all_trajectories, os.path.join(save_path, 'trajectories.pt'))\n            torch.save(train_trajectories, os.path.join(save_path, 'train_trajectories.pt'))\n            torch.save(val_trajectories, os.path.join(save_path, 'val_trajectories.pt'))\n            print(\">>> Saved Replay Buffer\")\n            time.sleep(15)\n        else:\n            info = {}\n        accelerator.wait_for_everyone()\n        \n        train_trajectories = torch.load(os.path.join(save_path, 'train_trajectories.pt'))\n        val_trajectories = torch.load(os.path.join(save_path, 'val_trajectories.pt'))\n        all_trajectories = torch.load(os.path.join(save_path, 'trajectories.pt'))\n        replay_buffer = torch.load(os.path.join(save_path, 'replay_buffer.pt'))\n\n        assert train_algorithm in ['digirl', 'filteredbc'], \"Only digirl and filteredbc are supported\"\n        if train_algorithm == \"filteredbc\":\n            filtered_buffer = filterbc_buffer(train_trajectories, batch_size, capacity, agent)\n            filtered_validation_buffer = filterbc_buffer(val_trajectories, batch_size, capacity, agent)\n        elif train_algorithm == 'digirl':\n            filtered_buffer = filter_buffer(train_trajectories, batch_size, capacity, agent)\n            filtered_validation_buffer = filter_buffer(val_trajectories, batch_size, capacity, agent)\n        \n        print(\"Training\")\n        if 'filtered' in train_algorithm:\n            info.update(trainer.update(filtered_buffer, no_update_actor = (i < warmup_iter)))\n            del filtered_buffer\n        else:\n            info.update(trainer.update_trajectory_critic(train_trajectories, val_trajectories))\n            info.update(trainer.update(replay_buffer, validation_buffer, filtered_buffer, filtered_validation_buffer, no_update_actor = (i < warmup_iter)))\n    \n        if use_wandb and accelerator.is_main_process:\n            wandb.log(info)\n        if (i+1) % save_freq == 0 and save_path is not None and accelerator.is_main_process:\n            print(\"Saving\")\n            trainer.save(os.path.join(save_path, 'trainer.pt'))\n            torch.save(replay_buffer, os.path.join(save_path, 'replay_buffer.pt'))\n            \n        if accelerator.is_main_process:\n            progress_bar.update(1)\n        \n"
  },
  {
    "path": "digirl/algorithms/parallel_utils.py",
    "content": "from digirl.misc import colorful_print\nimport threading\nimport os\nimport torch\nimport time\n\ndef remote_collect_trajectories(save_path, \n                                worker_temp_path, \n                                worker_run_path, \n                                worker_ips, \n                                worker_username, \n                                trainer):\n    # add all workers into known hosts if not already\n    colorful_print(\"Adding all workers to known hosts\", fg='green')\n    for worker_ip in worker_ips:\n        print(\"worker_ip\", worker_ip)\n        os.system(f\"ssh-keyscan -H {worker_ip} >> ~/.ssh/known_hosts\")\n    # kill all processes\n    for worker_ip in worker_ips:\n        os.system(f\"ssh {worker_username}@{worker_ip} 'pkill -U {worker_username}'\")\n    time.sleep(5)\n    for worker_ip in worker_ips:\n        os.system(f\"ssh {worker_username}@{worker_ip} 'skill -u {worker_username}'\")\n    time.sleep(5)\n    \n    # copying the agent to all remote workers\n    # save the current trainer, NO MATTER it's zero-shot or offline or online\n    colorful_print(\"Saving the current trainer\", fg='green')\n    trainer.save(os.path.join(save_path, \"trainer_current.pt\"))\n    colorful_print(\"Copying the current trainer to all workers\", fg='green')\n\n    command = f\"rm -rf {worker_temp_path} && mkdir -p {worker_temp_path} && exit\"\n    # parallely execute this command in all remote workser and wait for the command to finish\n    threads = []\n    colorful_print(\"Starting all trajectory collections\", fg='green')\n    for worker_ip in worker_ips:\n        t = threading.Thread(target=os.system, args=(f\"\"\"ssh -tt {worker_username}@{worker_ip} << EOF \n{command}\nEOF\n\"\"\",))\n        threads.append(t)\n        t.start()\n    for t in threads:\n        t.join()\n        colorful_print(\"Trajectory collection finished\", fg='green')\n\n    for worker_ip in worker_ips:\n        command = f\"scp -r {save_path}/trainer_current.pt {worker_username}@{worker_ip}:{worker_temp_path}\"\n        os.system(command)\n\n    command = f\"conda activate digirl && cd {worker_run_path} && python run.py --config-path config/multimachine --config-name worker && exit\"\n    for worker_ip in worker_ips:\n        t = threading.Thread(target=os.system, args=(f\"\"\"ssh -tt {worker_username}@{worker_ip} << EOF \n{command}\nEOF\n\"\"\",))\n        threads.append(t)\n        t.start()\n    for t in threads:\n        t.join()\n        colorful_print(\"Trajectory collection finished\", fg='green')\n    \n    for worker_ip in worker_ips:\n        os.system(f\"scp {worker_username}@{worker_ip}:{worker_temp_path}/trajectories.pt {save_path}/{worker_ip}\")\n    # wait for all trajs to be scp'ed to this host machine\n    while True:\n        if all([os.path.exists(f\"{save_path}/{worker_ip}\") for worker_ip in worker_ips]):\n            break\n        time.sleep(5)\n\n    # load all trajs in the remote machine\n    trajectories_list = [torch.load(f\"{save_path}/{worker_ip}\") for worker_ip in worker_ips]\n    # aggregate all trajs\n    trajectories = []\n    for traj_list in trajectories_list:\n        for traj in traj_list:\n            trajectories.append(traj)\n    return trajectories\n"
  },
  {
    "path": "digirl/algorithms/worker_collect_loop.py",
    "content": "from digirl.environment import batch_interact_environment\nfrom digirl.data import ReplayBuffer\nfrom digirl.algorithms.digirl import DigiRLTrainer\nfrom digirl.algorithms.filteredbc import BCTrainer\nfrom digirl.misc import colorful_print\nimport os\nimport torch\n\ndef worker_collect_loop(env,\\\n                agent,\\\n                tokenizer,\\\n                accelerator,\\\n                warmup_iter: int = 20,\n                rollout_size: int = 50,\\\n                batch_size: int = 2,\n                capacity: int = 500000,\n                train_iterations: int = 1,\\\n                epochs:int = 3, \\\n                grad_accum_steps: int = 1,\\\n                do_sample: bool = False,\\\n                temperature: float = 2.0,\\\n                critic_lr: float= 1e-3,\\\n                lm_lr: float = 1e-5,\\\n                gamma: float = 0.9,\n                tau: float = 0.1,\n                use_wandb: bool = False,\n                env_load_path: str = '',\n                actor_epochs: int = 3,\n                max_grad_norm: float = 0.01,\n                save_path: str = None,\n                save_freq: int = 25,\n                train_algorithm: str = \"digirl\",\n                decode_f: callable = lambda x: x,\n                offline_data_path: str = None,\n                offline_actor_iterations: int = 20,\n                offline_critic_iterations: int = 20,\n                offline_trajectory_critic_iterations: int = 20,\n                trajectory_critic_epochs: int = 5,\n                **kwargs):\n    if train_algorithm == \"digirl\":\n        trainer = DigiRLTrainer(agent=agent,\\\n                            accelerator=accelerator,\\\n                                tokenizer=tokenizer,\\\n                                critic_lr = critic_lr,\\\n                                lm_lr = lm_lr,\\\n                                gamma = gamma,\\\n                                tau = tau,\\\n                                epochs = epochs,\\\n                                actor_epochs = actor_epochs,\n                                grad_accum_steps=grad_accum_steps,\n                                max_grad_norm=max_grad_norm,\n                                trajectory_critic_epochs = trajectory_critic_epochs)\n    elif train_algorithm == \"filteredbc\":\n        trainer = BCTrainer(agent=agent,\\\n                                tokenizer=tokenizer,\\\n                                accelerator=accelerator,\n                                lm_lr = lm_lr,\\\n                                epochs = actor_epochs,\\\n                                grad_accum_steps=grad_accum_steps,\n                                max_grad_norm=max_grad_norm)\n    replay_buffer= ReplayBuffer(batch_size= batch_size, capacity=capacity)\n    all_trajectories = []\n    #prepare the model and optimizers\n    agent.prepare()\n    trainer.prepare()\n\n    colorful_print(\">>> Loading Current Trainer from Host\", fg='blue')\n    trainer.load(os.path.join(save_path, 'trainer_current.pt'))\n\n    colorful_print(\">>> Worker Collecting Online Data\", fg='blue')\n    \n    for i in range(train_iterations):\n        trajectories = batch_interact_environment(agent = agent,\\\n                                            env = env,\\\n                                            num_trajectories= rollout_size,\\\n                                            accelerator = accelerator,\\\n                                            use_tqdm=False,\n                                            decode_f = decode_f,\n                                            gamma = gamma,\n                                            iter=i)\n\n        torch.save(trajectories, os.path.join(save_path, 'trajectories.pt'))\n\n            "
  },
  {
    "path": "digirl/data/__init__.py",
    "content": "from .utils import DummyDataset, ReplayBuffer"
  },
  {
    "path": "digirl/data/utils.py",
    "content": "from torch.utils.data import Dataset, DataLoader\nimport numpy as np\nclass DummyDataset(Dataset):\n    def __init__(self, buffer):\n        self.buffer = buffer\n\n    def __len__(self):\n        return len(self.buffer)\n\n    def __getitem__(self, idx):\n        return self.buffer[idx]\n\n\nclass ReplayBuffer:\n    def __init__(self, batch_size=2, capacity=10000):\n        self.max_size = capacity\n        self.size = 0\n        self.observations = None\n        self.rewards = None\n        self.next_observations = None\n        self.dones = None\n        self.batch_size = batch_size\n        self.actions = None\n        self.mc_returns = None\n        self.image_features = None\n        self.next_image_features = None\n\n    def sample(self, batch_size=None):\n        if batch_size is None:\n            batch_size = self.batch_size\n        rand_indices = np.random.randint(0, self.size, size=(batch_size,)) % self.max_size\n        return {\n            \"observation\": self.observations[rand_indices],\n            \"action\": self.actions[rand_indices],\n            \"image_features\": self.image_features[rand_indices],\n            \"next_image_features\": self.next_image_features[rand_indices],\n            \"reward\": self.rewards[rand_indices],\n            \"next_observation\": self.next_observations[rand_indices],\n            \"done\": self.dones[rand_indices],\n            \"mc_return\": self.mc_returns[rand_indices],\n        }\n\n    def __len__(self):\n        return self.size\n\n    def insert(\n        self,\n        /,\n        observation,\n        action,\n        image_features: np.ndarray,\n        next_image_features: np.ndarray,\n        reward: np.ndarray,\n        next_observation,\n        done: np.ndarray,\n        mc_return,\n        **kwargs\n    ):\n        \"\"\"\n        Insert a single transition into the replay buffer.\n\n        Use like:\n            replay_buffer.insert(\n                observation=observation,\n                action=action,\n                reward=reward,\n                next_observation=next_observation,\n                done=done,\n            )\n        \"\"\"\n        if isinstance(reward, (float, int)):\n            reward = np.array(reward)\n        if isinstance(mc_return, (float, int)):\n            mc_return = np.array(mc_return)\n        if isinstance(done, bool):\n            done = np.array(done)\n        # print(next_observation)\n        # if isinstance(prompt_actionaction, int):\n        #     action = np.array(action, dtype=np.int64)\n\n        if self.observations is None:\n            self.observations = np.array(['']*self.max_size, dtype = 'object')\n            self.actions = np.array(['']*self.max_size, dtype = 'object')\n            self.image_features = np.empty((self.max_size, *image_features.shape), dtype=image_features.dtype)\n            self.next_image_features = np.empty((self.max_size, *next_image_features.shape), dtype=next_image_features.dtype)\n            self.rewards = np.empty((self.max_size, *reward.shape), dtype=reward.dtype)\n            self.next_observations = np.array(['']*self.max_size, dtype = 'object')\n            self.dones = np.empty((self.max_size, *done.shape), dtype=done.dtype)\n            self.mc_returns = np.empty((self.max_size, *mc_return.shape), dtype=mc_return.dtype)\n\n        assert reward.shape == ()\n        assert done.shape == ()\n\n        self.observations[self.size % self.max_size] = observation\n        self.image_features[self.size % self.max_size] = image_features\n        self.next_image_features[self.size % self.max_size] = next_image_features\n        self.actions[self.size % self.max_size] = action\n        self.rewards[self.size % self.max_size] = reward\n        self.next_observations[self.size % self.max_size] = next_observation\n        self.dones[self.size % self.max_size] = done\n        self.mc_returns[self.size % self.max_size] = mc_return\n\n        self.size += 1"
  },
  {
    "path": "digirl/environment/__init__.py",
    "content": "from .env_utils import batch_interact_environment\nfrom .android import BatchedAndroidEnv\n"
  },
  {
    "path": "digirl/environment/android/__init__.py",
    "content": "from .env import BatchedAndroidEnv\nfrom .evaluate import EndResultEvaluator\nfrom .autoui_utils import cogagent_translate_action, autoui_translate_action, autoui_prepare_prompt"
  },
  {
    "path": "digirl/environment/android/assets/task_set/general_test.txt",
    "content": "Search for hotels in Washington DC\nWhat's the news in India?\nHow much does a 2 bedroom apartment rent for in Philadelphia?\nOpen a new tab in Chrome\nSet an alarm for 6pm\nSearch for flights from Sydney to Helsinki\nWhat's the news in Japan?\nOpen the clock\nHow do I get to the nearest Lowe's?\nSearch for hotels in Philadelphia\nWhat's the latest video from GameSpot News?\nWhat's a good restaurant in New Jersey?\nWhat's the weather like in New York?\nWhat's a good restaurant in Seattle?\nWhat's on the menu at Burger King?\nInstall the Calendar app\nWhat's a good restaurant in New York?\nPlay some music on YouTube\nSearch for hotels in Atlanta\nOpen a new Chrome incognito tab\nWhat's the latest news in space exploration?\nWhat's the news in the Dominican Republic?\nSearch for flights from NYC to Mexico city\nSearch for flights from Seoul to Barcelona\nWhat's the latest news in astrophysics?\nWhat's the news in Jamaica?\nWhat is the capital of Switzerland?\nWhat's a good restaurant in Sacramento?\nWhere can I buy a nice beach towel?\nSearch for a new perfume\nShow me some nice wallpapers for my tablet\nSearch for vegetarian restaurants on Maps\nSearch for flights from Buenos aires to Tokyo\nSearch for a new blush on Sephora\nWhat's the price of the Hisense TV?\nOpen a new Chrome private window\nSearch for hotels in Chicago\nSearch for hotels in Austin\nSet an alarm for 4pm\nInstall the Reddit app\nHow much does a 2x4x8 board cost at Lowes?\nCheck the settings for the YouTube app\nSearch for good Greek restaurants\nPlay the new Drake video on YouTube\nOpen the files app\nCheck the settings for the Google Maps app\nWho is the prime minister of the United Kingdom?\nFind the nearest grocery store\nSearch for hotels in Paris\nHow much does a 3 bedroom apartment rent for in Dallas?\nHow much does a 2 bedroom apartment rent for in Miami?\nFind the nearest electronics store that's open tomorrow\nOpen a new incognito window in the chrome app\nSearch for good Korean restaurants\nSearch for flights from London to Paris\nWhat's the news in Sri Lanka?\nWhat are the new products by Samsung?\nWhat is the capital of Sweden?\nHow much does the LG TV cost?\nSearch for flights from NYC to Buenos aires\nWhat's a good restaurant in Las Vegas?\nWhat is the capital of Norway?\nWhat is the capital of Italy?\nWhat is the speed of a rocket?\nHow do I get to the nearest Target?\nShow me some nice wallpapers for my phone\nWhat time is it in New York?\nSearch for flights from NYC to Tokyo\nCheck the settings for the Amazon Alexa app\nWhat's the weather like inToronto?\nPlay the new Bruno Mars video on YouTube\nWhat's a good restaurant in San Diego?\nWhat's the price of the LG TV?\nGoogle the capital of Argentina\nGoogle the capital of the United States\nWhat's the news in Argentina?\nSearch for flights from NYC to Sydney\nWhere can I buy a nice beach umbrella?\nShow me some nice wallpapers for my computer\nOpen a new incognito tab in the chrome app\nSet an alarm for 7pm\nSearch for flights from Barcelona to Boston\nHow much does a 3 bedroom apartment rent for in Miami?\nWhat's a good restaurant in Los Angeles?\nGoogle the capital of Panama\nSearch for a new eyeshadow\nHow much does a 3 bedroom apartment rent for in Washington DC?\nWhat's the weather like in London?\nWhat's the news in Chile?\nPlay the latest video from the BBC\nCheck the settings for the Twitter app\nHow do I get to the nearest Verizon Store?\nOpen Reddit\nSearch for a new mascara on Sephora\nSearch for top rated sushi restaurants on Maps\nWhat time is it in Moscow?\nHow much does a 3 bedroom apartment rent for in New York?\nHow much does a 2 bedroom apartment rent for in Denver?\nCheck the settings for the Google Play Music app\nWhat's on the menu at McDonalds?\nWhat's US dollar exchange rate against the British Pound?\nWhat is the capital of Canada?\nWhat's on the menu at Panera?\nHow much does a 3 bedroom apartment rent for in Seattle?\nSearch for a new mascara\nWhat is the capital of Spain?\nWhat is the speed of a jet?\nWhat's the news about the US dollar exchange rate?\nInstall the Yelp app\nWhat is the capital of the United Kingdom?\nWhat's on my calendar for the rest of the month?\nShow me some nice wallpapers for my desktop\nCheck the settings for the Amazon Music app\nInstall the Yahoo app\nHow much does a 2 bedroom apartment rent for in San Francisco?\nWhat's US dollar exchange rate against the South Korean Won?\nWhat's the top post on reddit?\nWhat is the speed of a plane?\nSearchfor good French restaurants\nHow much does the HisenseTV cost?\nSearch for a good pizza place on Maps\nSearch for flights from Helsinki to Tokyo\nWhat are the best selling refrigerators at Home Depot?\nWhat is the speed of a tiger?\nInstall the Pandora app\nOpen the SMS app\nWhat's the price of the Vizio TV?\nHow much does a 2 bedroom apartment rent for in New York?\nWhat's the price of the Samsung TV?\nFind the nearest electronics store that's open today\nCheck the settings for the Amazon Prime Video app\nWhere can I buy a nice beach tote?\nWhat's the news in Paraguay?\nSearch for a new foundation (skincare) product\nWho is the president of the United States?\nWhat time is it in Sydney?\nSearch for good Italian restaurants\nOpen the calendar and show me this week's events?\nHow big is the universe?\nSearch for flights from Mexico city to Sydney\nSearch for flights from Helsinki to Seoul\nSearch for top rated burger restaurants on Maps\nInstall the Facebook app\nHow big is the earth?\nWhat's the price of the Sony TV?\nOpen a new private tab in Chrome\nWhat's on the menu at Denny's?\nHow do I get to the nearest electronics store?\nHow much does a 3 bedroom apartment rent for in Boston?\nWhat is the speed of sound?\nOpen the calculator\nWhat's the price of the 1000-Watt EGO Power+ Snow Blower?\nSearch for good Indian restaurants\nWhat's the latest news in space science?\nInstall the Spotify app\nOpen a new Chrome incognito window\nHow much does a 2 bedroom apartment rent for in Chicago?\nWhat's a good restaurant in Philadelphia?\nWhat's the weather like in Chicago?\nWhat's a good restaurant in Portland?\nWhat's a good restaurant in San Francisco?\nInstall the Weather app\nSearch for flights from NYC to Paris\nSearch for hotels in Las Vegas\nPlay the latest video from the Wall Street Journal\nSearch for flights from Zurich to Buenos aires\nWhat's the news in China?\nInstall the Uber app\nWhere can I buy a nice beach tent?\nCheck the settings for the Google Play Books app\nWhat's the latest technology news?\nWhat's the news in Ecuador?\nSearch for a new skincare product\nWhat's on my calendar for the rest of the week?\nCheck the settings for the Google Chrome app\nOpen the contacts\nGoogle the capital of Paraguay\nSearch for flights from Mexico city to Boston\nSearch for top rated seafood restaurants on Google Maps\nHow much does a3 bedroom apartment rent for in Portland?\nWhat's the news about the US economy?\nWhere can I buy a nice beach sandals?\nSearch for a new eyeliner\nWhat's the latest video from GameXplain?\nWhere can I buy a nice beach chair?\nWhat's the news about the US dollar?\nPlay the new Katy Perry video on YouTube\nOpen a new incognito window in Chrome\nSearch for hotels in Sydney\nHow big is the moon?\nWhat's on the menu at Taco Bell?\nWhat is the capital of France?\nPlay the latest video from the Washington Post\nSearch for the best pizza restaurants on Maps\nHow do I get to the nearest McDonalds?\nSearch for hotels in New York\nWhat's the news in the Bahamas?\nWhat's the latest video from GameSpot Reviews?\nWhat's the news in Singapore?\nCheck my email\nCheck the settings for the Spotify app\nHow much does the new iPad cost on eBay?\nWhat's the weather like in Beijing?\nTurn on notifications for the Google Maps app\nOpen the camera\nHow do I get to the nearest Best Buy?\nWhat is the speed of a train?\nHow do I get to the nearest Nordstrom?\nHow big is the sun?\nWho is the president of France?\nWhat's the price of the 2x4x8 boards at Home Depot?\nWhat's the time in San Francisco?\nWhat's the weather like in Moscow?\nCheck the settings for the Instagram app\nWhat's the weather like in Mexico City?\nWhat time is it in London?\nWhat's on the menu at In-N-Out?\nWhat's the news in Barbados?\nOpen the calculator app\nWhat's the news in South Korea?\nWhat's the weather like in Rio de Janeiro?\nWhat is the speed of a bicycle?\nWhat time is it in Beijing?\nWhat's the news this afternoon?\nHow much does a 2 bedroom apartment rent for in Seattle?\nCheck the settings for the Lyft app\nWhat's a good restaurant near me?\nWhat's the price of the new iPhone on eBay?\nHow much does a 2 bedroom apartment rent for in Washington DC?\nWhat's US dollar exchange rateagainst the Mexican Peso?\nWhat's the price of the Galaxy phone on eBay?\nWhat's the news in Suriname?\nHow do I get to the nearest IKEA?\nHow much does the new iPad cost?\nOpen the Google play store app\nCheck the settings for the Amazon Shopping app\nWhat's the news in the Philippines?\nWhat's a good restaurant in Miami?\nSearch for hotels in San Francisco\nSet an alarm for 6am\nOpen the settings\nHow big is a dinosaur?\nSearch for good Chinese restaurants\nWhat is the capital of Japan?\nWhat's the top post on reddit right now?\nSearch for good Italian restaurants on Maps\nWhat's the news in Pakistan?\nWhat is the capital of Brazil?\nWhat's the news in Cambodia?\nGoogle the capital of Bolivia\nSearch for a new blush\nWhat is the speed of light?\nWhat's on the menu at Domino's?\nWhat's the top post on reddit today?\nHow much does the TCL TV cost?\nGoogle the capital of Uruguay\nSearch for hotels in Buenos aires\nSearch for flights from Sydney to Zurich\nWhat are the best selling refrigerators at Lowes?\nSearch for hotels in Orlando\nWhat is the capital of Germany?\nOpen a new window in the chrome app\nOpen a new Chrome tab\nSearch for flights from Barcelona to Mexico city\nWhat's a good restaurant in Atlanta?\nGoogle the capital of Chile\nPlay the new Beyonce video on YouTube\nWhat's on the menu at IHOP?\nSearch for flights from Buenos aires to Seoul\nOpen a new tab in the chrome app\nHow much does a 3 bedroom apartment rent for in Austin?\nWhat is the capital of India?\nHow much does a 3 bedroom apartment rent for in Los Angeles?\nHow do I get to the nearest Home Depot?\nCheck the settings for the Google Play Movies app\nSearch for hotels in NYC\nSet an alarm for 3pm\nSearch for good Japanese restaurants\nWhat's the news in Brunei?\nSet an alarm for 11am\nInstall the eBay app\nGoogle the capital of Canada\nSearch for flights from Tokyo to NYC\nWhat's the price of the TCL TV?\nWhat's the weather like in San Francisco?\nHow do I get to the nearest Starbucks?\nHow much does a 3 bedroom apartment rent for in Atlanta?\nCheck my gmail\nPlay the new Ariana Grande video on YouTube\nHow much does a 2 bedroom apartment rent for in Atlanta?\nWhat's the price of the new iPhone\nFind the nearest electronics store that's open\nShow me my notifications\nSearch for flights from NYC to San Diego\nWhat's the weather like in Seoul?\nWhat's the news this morning?\nGoogle the capital of Peru\nWhat's a good restaurant in Denver?\nGoogle the capital of Mexico\nSearch for top rated sushi restaurant\nWhat's the weather like in Tokyo?\nSearch for top rated pizza restaurants on Maps\nSearch for a new highlighter\nHow do I get to the nearest JCPenney?\nWhat's the news in Thailand?\nWhat's the news this month?\nSet an alarm for 8pm\nWhat's the news this week?\nSearch for a new hair product\nWhat's the news in Taiwan?\nHow old is the earth?\nCheck the settings for the Netflix app\nWhat's the weather like in Johannesburg?\nHow do I get to the nearest Burger King?\nWhat is the capital of Argentina?\nHow big is a giraffe?\nOpen a new Chrome private tab\nWhat's the US dollar exchange rate against the Canadian Dollar?\nPlay the new Justin Bieber video on YouTube\nHow much does the Vizio TV cost?\nSet an alarm for 1pm\nHow much does a 2 bedroom apartment rent for in Austin?\nWhat's on the menu at Papa Murphy's?\nSearch for flights from Buenos aires to Helsinki\nWhat's the latest news in space technology?\nFind coffee shops on Maps\nInstall the Wikipedia app\nWhat's the news in Nepal?\nSet an alarm for 2pm\nWhat's the news this evening?\nWhat's the weather like in Paris?\nWhat's the news in Uruguay?\nWhat's on the menu at Subway?\nHow do I get to the nearest Sprint Store?\nWhat time is it in Berlin?\nWhat time is it?\nHow big is a lion?\nSearch for flights from Chicago to London\nSearch for hotels in Tokyo\nWhat's the time in New York?\nOpen the clock app\nOpen the downloads\nWhat's the news in Puerto Rico?\nWhat time is it in Tokyo?\nWhat's the news in Bangladesh?\nWhat time is it in San Francisco?\nSearch for hotels in London\nSearch for flights from Tokyo to Seoul\nWhat's on the menu at Cheesecake Factory?\nSearch for flights from Boston to Zurich\nWhat's the news in French Guiana?\nWhat's the speed of light?\nHow much does the Samsung TV cost?\nOpen a new Chrome window\nWhat's the news about the US?\nOpen the music app\nPlay the new Taylor Swift video on YouTube\nWhat's the latest news in planetary science?\nWhat's the news in Laos?\nSet an alarm for 8am\nSearch for hotels in Seattle\nWhat's on the menu at Five Guys?\nGoogle the capital of Brazil\nWhat's the news in Guyana?\nWhat's a good restaurant in San Jose?\nHow much does a 3 bedroom apartment rent for in San Francisco?\nPlay the latest video from the Huffington Post\nSearch for a new bronzer\nSearch for hotels in Zurich\nInstall the Google app\nWhat's US dollar exchange rate against the Chinese Yuan?\nInstall the Starbucks app\nWhat's a good restaurant in Dallas?\nWhat's on Reddit this week\nFind a good burger place on Maps\nHow much does a 2 bedroom apartment rent for in Los Angeles?\nSet an alarmfor 5pm\nWhat's the news in Peru?\nWhat is the capital of China?\nWhat's the news in Indonesia?\nHow much does a 3 bedroom apartment rent for in Houston?\nHow much does a 2 bedroom apartment rent for in Portland?\nWhat's the latest video from GameSpot Trailers?\nWhat's on Reddit\nInstall the News app\nWhat's the weather like in Sydney?\nWhat's a good restaurant in Chicago?\nSearch for hotels in Denver\nTurn off notifications for the Google Maps app\nSearch for flights from Sydney to Buenos aires\nCheck the settings for the Facebook app\nGo to Reddit\nWhat's on the menu at Chick-fil-A?\nWhat does the iPhone 8 look like on eBay?\nWhat's the price of the Galaxy phone?\nSearch for the best burger restaurants on Maps\nSearch for hotels in Miami\nHow much does a 2 bedroom apartment rent for in Boston?\nSearch for a new foundation on Sephora\nGoogle the capital of Colombia\nWhat's the news in Malaysia?\nWhat's on the menu at Olive Garden?\nWhat's the latest video from GameTrailers?\nWhat's the latest news in space?\nWhat's the weather like in Singapore?\nSearch for flights from NYC to London\nOpen the play store\nWhat's the weather like in Los Angeles?\nHow much does a 3 bedroom apartment rent for in Philadelphia?\nCheck the settings for the Google Photos app\nOpen the calendar\nWhat's a goodrestaurant in Austin?\nOpen a new private window in Chrome\nFind the nearest electronics store that's open now\nInstall the Twitter app\nOpen the contacts app\nWhat's the news in Vietnam?\nWhat's a good restaurant in Houston?\nWhat's the latest news in cosmology?\nSearch for hotels in Boston\nSearch for flights from San Francisco to Tokyo\nWhat is the speed of a skateboard?\nSearch for a new nail polish\nWhat's the latest video from GameSpot?\nWhere can I buy a nice beach hat?\nWhat's the news in theFalkland Islands?\nSearch for a new lipstick on Sephora\nWhat does the iPhone 8 look like?\nInstall the CNN app\nPlay the latest video from the New York Times\nWhat is the capital of Mexico?\nGoogle the capital of Venezuela\n"
  },
  {
    "path": "digirl/environment/android/assets/task_set/general_train.txt",
    "content": "Check the settings for the Pandora app\nWhat's a good restaurant in Los Angeles?\nShow me some nice wallpapers for my computer\nPlay the new Bruno Mars video on YouTube\nHow do I get to the nearest Verizon Store?\nWhat's the latest video from GameSpot Reviews?\nHow much does the Samsung TV cost?\nCheck the settings for the YouTube app\nWhat's a good restaurant in Denver?\nSearch for 5 star sushi restaurants on Maps\nWhat's a good restaurant in New Jersey?\nCheck the settings for the Facebook app\nWhere can I buy a nice beach hat?\nPlay the latest video from the Wall Street Journal\nGoogle the capital of Colombia\nPlay the new Taylor Swift video on YouTube\nWhat's on the menu at Cheesecake Factory?\nHow do I get to the nearest grocery store?\nWhat's US dollar exchange rate against the Chinese Yuan?\nWhat's the weather like in Hong Kong?\nWhat is the price of a 12' ladder at Home Depot?\nWhat's the news this week?\nHow do I get to the nearest IKEA?\nCheck the settings for the Google Play Movies app\nWhat's the news in Malaysia?\nWhat's the weather like in Johannesburg?\nSearch for good Indian restaurants\nSearch for flights from NYC to Mexico city\nHow big is the sun?\nOpen Reddit\nWhat is the capital of Germany?\nSearch for flights from NYC to San Francisco\nSearch for flights from Sydney to Buenos aires\nWhat's a good restaurant near me?\nWhat's the latest video from GameSpot Trailers?\nWhat's a good restaurant in Seattle?\nWhat's the latest tech news?\nWhat's US dollar exchange rate against the South Korean Won?\nInstall the News app\nCheck my email\nSearch for flights from Zurich to Helsinki\nSearch for top rated sushi restaurant\nPlay the latest video from the New York Times\nWhat is the average speed of a car?\nWhat's the price of the Galaxy phone on eBay?\nFind a nice sofa on eBay\nWhat is the capital of China?\nInstall the Pandora app\nHow much does a 2 bedroom apartment rent for in Washington DC?\nSearch for a new lipgloss\nHow big is a blue whale?\nCheck the settings for the Spotify app\nSearch for flights from Barcelona to Mexico city\nHow do I get to the nearest Apple Store?\nWhat's US dollar exchange rate against the British Pound?\nSearch for flights from Tokyo to Mexico city\nHow much does a 3 bedroom apartment rent for in Atlanta?\nOpen a new incognito window in the chrome app\nCheck the settings for the Google Play Store app\nInstall the Calendar app\nWhat's the latest video from GameXplain?\nSearch for top rated sushi restaurants on Maps\nWhat's the news in Indonesia?\nSet an alarm for 3pm\nShow me some nice wallpapers for my phone\nSearch for hotels in Las Vegas\nWhat's the price of the TCL TV?\nHow big is the earth?\nGoogle the capital of Uruguay\nOpen the Google play store app\nWhat's the news in Myanmar?\nHow far is the moon?\nWhat's the news today?\nHow much does a 3 bedroom apartment rent for in Chicago?\nWhat is the speed of a skateboard?\nWhat's the price of the new iPhone on eBay?\nWhat's the top post on reddit today?\nWhat's the news in Puerto Rico?\nOpen the play store\nPlay the latest video from the Washington Post\nSearch for flights from Helsinki to Seoul\nSearch for flights from Seoul to Mexico city\nWhat's the weather like in Moscow?\nHow do I get to the nearest JCPenney?\nWhat's on the menu at Domino's?\nWhat's a good restaurant in Atlanta?\nSearch for good Thai restaurants\nSearch for a new blush\nHow much does a 3 bedroom apartment rent for in Seattle?\nInstall the Twitter app\nWhat's the news this afternoon?\nSearch for a new foundation (skincare) product\nOpen a new Chrome incognito tab\nSearch for good Italian restaurants on Maps\nPlay the new Beyonce video on YouTube\nSearch for flights from NYC to Tokyo\nFind the nearest electronics store that's open now\nWhat's the weather like in Seoul?\nShow me my notifications\nInstall the Instagram app\nWhat's the US dollar exchange rate against the Brazilian Real?\nWhat's a good restaurant in San Francisco?\nHow do I get to the nearest Home Depot?\nInstall the CNN app\nSearch for a new eyeshadow\nFind coffee shops on Maps\nSearch for top rated seafood restaurants on Google Maps\nSearch for the best Mexican restaurants\nSet an alarm for 2pm\nOpen a new incognito window in Chrome\nWhat are the best selling refrigerators at Home Depot?\nWhat's the news in Singapore?\nHow much does a 2 bedroom apartment rent for in Houston?\nSearch for hotels in Paris\nInstall the ESPN app\nSearch for good Chinese restaurants\nWhat's the latest news in space science?\nWhat's the time in New York?\nWhat's on the menu at Red Lobster?\nWhat time is it in Moscow?\nGoogle the capital of Panama\nShow me some nice wallpapers for my laptop\nWhat's a good restaurant in Boston?\nWhat time is it in Beijing?\nWhat time is it in Sydney?\nHow much does a 2 bedroom apartment rent for in Denver?\nSearch for flights from Tokyo to NYC\nSearch for the best burger restaurants on Maps\nPlay the new Ariana Grande video on YouTube\nHow much does the Vizio TV cost?\nWhat's the price of the new iPhone\nSearch for good pizza restaurants on Maps\nHow much does a 2 bedroom apartment rent for in Chicago?\nWhat's the US dollar exchange rate against the Australian Dollar?\nSearch for a new eyeliner\nHow do I get to the nearest AT&T Store?\nWhat's the news in Paraguay?\nWhat's the latest news in tech?\nSearch for flights from Zurich to Buenos aires\nSearch for hotels in San Diego\nSet an alarm for 10am\nSearch for good Italian restaurants\nOpen a new Chrome incognito window\nOpen the contacts\nWhat's the weather like in Mexico City?\nFind the nearest electronics store that's open tomorrow\nSet an alarm for 8pm\nWhat's the news in Suriname?\nSearch for a new eyeshadow on Sephora\nWhat's the price of the 1000-Watt EGO Power+ Snow Blower?\nWhere can I buy a nice beach bag?\nOpen a new window in the chrome app\nWhat's the news in Trinidad and Tobago?\nWhat is the speed of a jet?\nCheck the settings for the Google Photos app\nWhat's the news this weekend?\nHow do I get to the nearest Best Buy?\nWhat's the news in the Philippines?\nWhat's the weather like inToronto?\nWhat's the latest video from GameSpot eSports?\nWhat is the capital of Spain?\nWhat is the capital of Japan?\nWhat's a goodrestaurant in Austin?\nSearch for flights from NYC to Barcelona\nCheck my email inbox\nSearch for hotels in Miami\nWhat's on the menu at Denny's?\nWhat is the capital of India?\nWhat's the news in Cambodia?\nWhat's on the menu at Panera?\nSearch for hotels in Buenos aires\nWhat is the capital of Brazil?\nGoogle the capital of Peru\nSearch for flights from Buenos aires to Tokyo\nSearch for flights from NYC to London\nWhat's the top post on reddit right now?\nWhat time is it?\nToggle notifications for the Google Photos app\nWhat's on the menu at Chipotle?\nSearch for flights from Helsinki to Tokyo\nOpen the calendar and show me this week's events?\nWhat's the latest news in space exploration?\nWhat's the latest video from IGN?\nWhat's the latest news in cosmology?\nWhat's the news in South Korea?\nWhat's on the menu at Chick-fil-A?\nSearch for flights from Mexico city to Seattle\nOpen a new private window in Chrome\nPlay the new Ed Sheeran video on YouTube\nOpen a new incognito tab in Chrome\nWhat's the latest news in astrophysics?\nWhat is the speed of sound?\nSearch for good BBQ restaurants\nSearch for hotels in Los Angeles\nWhat time is it in San Francisco?\nCheck the settings for the Amazon Prime Video app\nPlay the new Justin Bieber video on YouTube\nWhat is the capital of France?\nSearch for a new perfume\nWhat's the US dollar exchange rate against the Euro?\nHow big is a giraffe?\nWhat's on the menu at Subway?\nWhat's the latest technology news?\nWhat is the capital of Switzerland?\nWhat's the news in Venezuela?\nSearch for a new blush on Sephora\nWhat's the news in Bolivia?\nSearch for hotels in San Francisco\nCheck the settings for the Google Play Books app\nSearchfor good French restaurants\nWhat's the news about the US president?\nWhat's the news in Pakistan?\nWhat's the news in Argentina?\nSearch for good Greek restaurants\nHow do I get to the nearest Burger King?\nWhat's the news in Taiwan?\nWhat's a good restaurant in Sacramento?\nWhere can I buy a nice beach sandals?\nWhat time is it in London?\nWhat's the news this evening?\nHow do I get to the nearest Starbucks?\nHow much does a 3 bedroom apartment rent for in Denver?\nSearch for flights from Mexico city to Boston\nWhat's the latest video from Game Informer?\nSearch for hotels in New York\nCheck the settings for the Uber app\nWhat is the capital of England?\nSearch for hotels in Philadelphia\nWhere can I buy a nice beach blanket?\nOpen the settings\nWhat's the latest video from GameSpot?\nWhat's a good restaurant in Miami?\nHow much does a 3 bedroom apartment rent for in Austin?\nSearch for flights from San Francisco to Tokyo\nHow do I get to the nearest Subway?\nWhere can I buy a nice beach towel?\nWhat's US dollar exchange rate against the Indian Rupee?\nWhat's on the menu at Starbucks?\nWhat's a good restaurant in New York?\nToggle notifications for the Google Maps app\nSearch for hotels in Atlanta\nOpen the calendar app\nWhat's the news in Chile?\nInstall the eBay app\nWhere can I buy a nice beach chair?\nGoogle the capital of Chile\nWhat's a good restaurant in Chicago?\nInstall the Amazon app\nSearch for hotels in Sydney\nWhat's the time in San Francisco?\nOpen the downloads\nWhat's a good restaurant in Phoenix?\nSearch for flights from Chicago to London\nWhat's the weather like in Beijing?\nWhat's the news about the US stock market?\nPlay the new Maroon 5 video on YouTube\nSearch for flights from NYC to San Diego\nWhat's the news in the Bahamas?\nHow much does a 2 bedroom apartment rent for in Atlanta?\nWhat's the price of the Vizio TV?\nOpen the files app\nSearch for good Japanese restaurants\nInstall the Lyft app\nWhat's the news about the US economy?\nWhat's the news in Barbados?\nCheck the settings for the Lyft app\nFind the nearest grocery store\nWhat's the news in Brazil?\nWhat's the news in Colombia?\nHow do I get to the nearest Target?\nWhat's on the menu at Papa Murphy's?\nWhat's the news in Nepal?\nOpen a new tab in Chrome\nHow much does a 2x4x8 board cost at Lowes?\nWhat's on the menu at Five Guys?\nHow big is a tiger?\nWhat's on the menu at McDonalds?\nWhat's the news in Japan?\nWhat's the weather like in Los Angeles?\nCheckthe settings for the Amazon Prime Music app\nSearch for hotels in Mexico city\nPlay the new Selena Gomez video on YouTube\nInstall the Starbucks app\nWhat's the speed of light?\nSet an alarm for 1pm\nHow much does a 2 bedroom apartment rent for in New York?\nWhat's US dollar exchange rate against the Japanese Yen?\nOpen a new private tab in Chrome\nSearch for flights from San Diego to Seattle\nWhat's the news in Vietnam?\nWhat's the weather like in Tokyo?\nSearch for a new highlighter\nHow much does the HisenseTV cost?\nCheck the settings for the Amazon Shopping app\nHow much does a 3 bedroom apartment rent for in Dallas?\nWhat's the news in theFalkland Islands?\nHow do I get to the nearest T-Mobile Store?\nSearch for flights from Buenos aires to Helsinki\nHow much does a 2 bedroom apartment rent for in Boston?\nOpen a new private window in the chrome app\nWhat's on my calendar for the rest of the week?\nWHat are the new products by Samsung on eBay?\nWhat's the price of the 2x4x8 boards at Home Depot?\nSearch for the best pizza restaurants on Maps\nWhat's the latest news in astronomy?\nSearch for flights from NYC to Chicago\nSearch for hotels in Austin\nSearch for top rated pizza restaurants on Maps\nHow big is the moon?\nWhat's the latest video from GameSpot News?\nWhat's the weather like in London?\nCheck the settings for the Netflix app\nWhat's on the menu at In-N-Out?\nSet an alarm for 6am\nWhat time is it in New York?\nWhat's the price of the EGO 14-in 56-Volt Brushless Cordless Chainsaw?\nWhere can I buy a nice beach tote?\nSearch for flights from Mexico city to Sydney\nPlay the new Demi Lovato video on YouTube\nHow do I get to the nearest McDonalds?\nWhat's the news in Ecuador?\nSet an alarm for 7pm\nWhat are the best selling refrigerators at Lowes?\nSearch for flights from Buenos aires to Seoul\nSearch for flights from London to Paris\nInstall the Google app\nCheck my gmail\nOpen the calendar\nHow big is a lion?\nSearch for a new skincare product\nSearch for flights from NYC to Sydney\nWhat's the latest video from Gameranx?\nWhat is the speed of a plane?\nWhat's on the menu at Pizza Hut?\nSearch for hotels in Denver\nSearch for hotels in NYC\nWhat's the latest news in space technology?\nWho is the prime minister of Canada?\nWhat's on the menu at Burger King?\nInstall the Uber app\nOpen the calculator\nSearch for flights from Seoul to Barcelona\nCheck the settings for the Twitter app\nWhat's the weather like in Rio de Janeiro?\nOpen the music app\nWhat's the news in Bangladesh?\nHow much does a 2 bedroom apartment rent for in Miami?\nHow much does a 2 bedroom apartment rent for in Austin?\nGoogle the capital of Paraguay\nWhere can I buy a nice beach cooler?\nOpen a new Chrome tab\nSet an alarm for 7am\nSearch for flights from NYC to Paris\nWhat's the news this morning?\nHow much does a 3 bedroom apartment rent for in Boston?\nPlay the latest video from the BBC\nSet an alarmfor 5pm\nWhat's on the menu at Olive Garden?\nSearch for top rated burger restaurants on Google Maps\nHow do I get to the nearest Sprint Store?\nHow do I get to the nearest Chipotle?\nSearch for vegetarian restaurants on Maps\nWhat's the price of the Galaxy phone?\nGoogle the capital of the United States\nWhat's the weather like in Delhi?\nFind a good burger place on Maps\nWhat are the new products by Samsung?\nHow much does a 3 bedroom apartment rent for in Philadelphia?\nWhat's the news in India?\nWhere can I buy a nice beach umbrella?\nWhere can I buy a nice beach tent?\nWhat's a good restaurant in Portland?\nSearch for a good pizza place on Maps\nWhat's the price of the Hisense TV?\nSearch for flights from NYC to Buenos aires\nWhat's the US dollar exchange rateagainst the Swiss Franc?\nWhat is the capital of Mexico?\nHow do I get to the nearest Lowe's?\nInstall the Yahoo app\nWhat does the iPhone 8 look like?\nSearch for hotels in Boston\nWhat's the weather like in Paris?\nCheck the settings for the Google Play Music app\nWhat's a good restaurant in San Diego?\nSearch for a new mascara on Sephora\nWhat is the speed of a rocket?\nWhat is the price of a 12' ladder at Lowes?\nWhat's the weather like in Sydney?\nWhat is the speed of light?\nWhat's the price of the Samsung TV?\nWhat's the news in Thailand?\nWhat's on my calendar for the rest of the month?\nWhat's the news in Guyana?\nGoogle the capital of Brazil\nWhat's the latest news in planetary science?\nSearch for flights from Boston to Zurich\nWhat is the speed of a tiger?\nWhat is the speed of a train?\nCheck the settings for the Instagram app\nSet an alarm for 4pm\nWhat is the capital of Canada?\nGoogle the capital of Ecuador\nGoogle the capital of Mexico\nFind the nearest electronics store that's open\nSet an alarm for 8am\nWhat's the weather like in Singapore?\nSearch for flights from Barcelona to Boston\nSearch for a new hair product\nOpen a new Chrome private window\nSearch for flights from Mexico city to Zurich\nOpen the gallery\nWhat is the capital of the United Kingdom?\nSearch for hotels in Zurich\nHow do I get to the nearest Walmart?\nWhat's on the menu at Papa John's?\nSearch for a new nail polish\nWhat's the news in Peru?\nWhat's a good restaurant in Dallas?\nGo to Reddit\nWhat's US dollar exchange rateagainst the Mexican Peso?\nGoogle the capital of Venezuela\nHow much does a 3 bedroom apartment rent for in Washington DC?\nWhat's the top post on reddit?\nSearch for flights from Boston to Sydney\nGoogle the capital of Bolivia\nWhat's a good restaurant in San Jose?\nWhat time is it in Berlin?\nWhat's the news?\nOpen the calculator app\nOpen a new tab in the chrome app\nWhat time is it in Los Angeles?\nHow much does the new iPad cost?\nHow do I get to the nearest Macy's?\nWhat's on the menu at IHOP?\nHow much does a 3 bedroom apartment rent for in Los Angeles?\nWhat's the latest news in space?\nSearch for a new mascara\nWhat's on Reddit today\nSet an alarm for 6pm\nSearch for flights from Tokyo to Seoul\nOpen the camera\nCheck the settings for the Google Maps app\nWhat's the news in China?\nHow much does a 3 bedroom apartment rent for in San Francisco?\nWhat's the news about the US dollar?\nPlay the latest video from the Huffington Post\nOpen a new window in Chrome\nCheck my gmail inbox\nTurn off notifications for the Google Maps app\nWhat's on Reddit\nInstall the Weather app\nInstall the Wikipedia app\nInstall the Yelp app\nWhat's the US dollar exchange rate against the Canadian Dollar?\nWhat's a good restaurant in Houston?\nWhat is the capital of Italy?\nHow much does the new iPad cost on eBay?\nWhat's a good restaurant in Philadelphia?\nHow much does a 3 bedroom apartment rent for in Houston?\nWhat's the news in the Dominican Republic?\nHow much does the LG TV cost?\nSearch for hotels in Orlando\nSearch for a new foundation on Sephora\nPlay some music on YouTube\nHow much does a 2 bedroom apartment rent for in Los Angeles?\nOpen a new Chrome window\nHow big is the universe?\nWhat's the news about the US dollar exchange rate?\nWhat time is it in Tokyo?\nHow much does a3 bedroom apartment rent for in Portland?\nSearch for a new lipstick on Sephora\nWhat's the weather like in San Francisco?\nWhat's the news about the US?\nOpen the SMS app\nSearch for hotels in Chicago\nWhat's on the menu at Taco Bell?\nOpen the clock\nSearch for hotels in Washington DC\nWhat's the news in Laos?\nSearch for hotels in Seattle\nWhat is the capital of Argentina?\nWhat's the weather like in Chicago?\nInstall the Reddit app\nCheck the settings for the Amazon Alexa app\nWhat's the weather like in New York?\nSearch for hotels in Tokyo\nCheck the settings for the Google Chrome app\nTurn on notifications for the Google Maps app\nOpen the clock app\nInstall the Spotify app\nSet an alarm for 11am\nSet an alarm for 12pm\nHow do I get to the nearest Nordstrom?\nSearch for hotels in London\nWho is the president of France?\nHow much does a 3 bedroom apartment rent for in Miami?\nShow me some nice wallpapers for my desktop\nHow much does the Sony TV cost?\nHow much does the TCL TV cost?\nWhat's on Reddit this week\nWhat's the news in French Guiana?\nFind the nearest electronics store that's open today\nWhat's the news in Brunei?\nSearch for top rated burger restaurants on Maps\nShow me some nice wallpapers for my tablet\nWho is the president of the United States?\nWhat's the price of the Sony TV?\nInstall the Facebook app\nWhat's the time?\nWhat's a good restaurant in Las Vegas?\nWhat is the capital of Sweden?\nGoogle the capital of Argentina\nHow much does a 2 bedroom apartment rent for in Philadelphia?\nHow do I get to the nearest electronics store?\nHow much does a 3 bedroom apartment rent for in New York?\nSet an alarm for 9am\nCheck the settings for the Amazon Music app\nWhat's the news in Jamaica?\nOpen a new incognito tab in the chrome app\nSearch for good Korean restaurants\nWhat's the weather like in Mumbai?\nWhat's the latest video from GameTrailers?\nWhat time is it in Paris?\nHow much does a 2 bedroom apartment rent for in Portland?\nWhat does the iPhone 8 look like on eBay?\nPlay the new Drake video on YouTube\nWhat is the speed of a cheetah?\nOpen the contacts app\nSearch for flights from Sydney to Zurich\nWhat's the news in Uruguay?\nSearch for flights from Sydney to Helsinki\nSearch for a new bronzer\nWhat's the news this month?\nHow big is a dinosaur?\nWho is the prime minister of the United Kingdom?\nHow much does a 2 bedroom apartment rent for in Seattle?\nWhat's the news in Sri Lanka?\nOpen a new Chrome private tab\nHow old is the earth?\nWhat's the price of the LG TV?\nPlay the new Katy Perry video on YouTube\nHow much does a 2 bedroom apartment rent for in San Francisco?\nWhat is the capital of Norway?\nGoogle the capital of Canada\nWhat is the speed of a bicycle?"
  },
  {
    "path": "digirl/environment/android/assets/task_set/webshop_test.txt",
    "content": "Go to ebay.com, search for 'apple airpods', and select the first entry\nGo to ebay.com\nGo to ebay.com, search for 'apple airpods'\nGo to costco.com, search for 'razer blade', and select the first entry\nGo to costco.com\nGo to costco.com, search for 'razer blade'\nGo to costco.com, search for 'asus zenbook', and select the first entry\nGo to costco.com\nGo to costco.com, search for 'asus zenbook'\nGo to walmart.com, search for 'logitech g933', and select the first entry\nGo to walmart.com\nGo to walmart.com, search for 'logitech g933'\nGo to ebay.com, search for 'corsair k70', and select the first entry\nGo to ebay.com\nGo to ebay.com, search for 'corsair k70'\nGo to newegg.com, search for 'alienware area 51', and select the first entry\nGo to newegg.com\nGo to newegg.com, search for 'alienware area 51'\nGo to walmart.com, search for 'macbook pro 15 inch', and select the first entry\nGo to walmart.com\nGo to walmart.com, search for 'macbook pro 15 inch'\nGo to newegg.com, search for 'duracell triple a', and select the first entry\nGo to newegg.com\nGo to newegg.com, search for 'duracell triple a'\nGo to bestbuy.com, search for 'energizer triple a', and select the first entry\nGo to bestbuy.com\nGo to bestbuy.com, search for 'energizer triple a'\nGo to ebay.com, search for 'duracell triple a', and select the first entry\nGo to ebay.com\nGo to ebay.com, search for 'duracell triple a'\nGo to costco.com, search for 'usb-c to usb-a', and select the first entry\nGo to costco.com\nGo to costco.com, search for 'usb-c to usb-a'\nGo to newegg.com, search for 'razer blade', and select the first entry\nGo to newegg.com\nGo to newegg.com, search for 'razer blade'\nGo to costco.com, search for 'razer blade', and select the first entry\nGo to costco.com\nGo to costco.com, search for 'razer blade'\nGo to bestbuy.com, search for 'logitech g933', and select the first entry\nGo to bestbuy.com\nGo to bestbuy.com, search for 'logitech g933'\nGo to walmart.com, search for 'razer kraken', and select the first entry\nGo to walmart.com\nGo to walmart.com, search for 'razer kraken'\nGo to walmart.com, search for 'macbook pro', and select the first entry\nGo to walmart.com\nGo to walmart.com, search for 'macbook pro'\nGo to bestbuy.com, search for 'rayovac triple a', and select the first entry\nGo to bestbuy.com\nGo to bestbuy.com, search for 'rayovac triple a'\nGo to ebay.com, search for 'sony triple a', and select the first entry\nGo to ebay.com\nGo to ebay.com, search for 'sony triple a'\nGo to ebay.com, search for 'jbl flip 4', and select the first entry\nGo to ebay.com\nGo to ebay.com, search for 'jbl flip 4'\nGo to ebay.com, search for 'logitech g pro', and select the first entry\nGo to ebay.com\nGo to ebay.com, search for 'logitech g pro'\nGo to costco.com, search for 'acer predator', and select the first entry\nGo to costco.com\nGo to costco.com, search for 'acer predator'\nGo to newegg.com, search for 'usb-a', and select the first entry\nGo to newegg.com\nGo to newegg.com, search for 'usb-a'\nGo to newegg.com, search for 'razer thresher', and select the first entry\nGo to newegg.com\nGo to newegg.com, search for 'razer thresher'\nGo to costco.com, search for 'acer nitro', and select the first entry\nGo to costco.com\nGo to costco.com, search for 'acer nitro'\nGo to bestbuy.com, search for 'razer thresher', and select the first entry\nGo to bestbuy.com\nGo to bestbuy.com, search for 'razer thresher'\nGo to walmart.com, search for 'dell xps', and select the first entry\nGo to walmart.com\nGo to walmart.com, search for 'dell xps'\nGo to bestbuy.com, search for 'razer nari', and select the first entry\nGo to bestbuy.com\nGo to bestbuy.com, search for 'razer nari'\nGo to ebay.com, search for 'macbook air', and select the first entry\nGo to ebay.com\nGo to ebay.com, search for 'macbook air'\nGo to costco.com, search for 'macbook pro', and select the first entry\nGo to costco.com\nGo to costco.com, search for 'macbook pro'\nGo to costco.com, search for 'razer blackwidow', and select the first entry\nGo to costco.com\nGo to costco.com, search for 'razer blackwidow'\nGo to ebay.com, search for 'lenovo thinkpad', and select the first entry\nGo to ebay.com\nGo to ebay.com, search for 'lenovo thinkpad'\nGo to ebay.com, search for 'razer blade', and select the first entry\nGo to ebay.com\nGo to ebay.com, search for 'razer blade'\nGo to newegg.com, search for 'acer predator', and select the first entry\nGo to newegg.com\nGo to newegg.com, search for 'acer predator'\nGo to walmart.com, search for 'logitech g502', and select the first entry\nGo to walmart.com\nGo to walmart.com, search for 'logitech g502'\nGo to walmart.com, search for 'usb-a', and select the first entry\nGo to walmart.com\nGo to walmart.com, search for 'usb-a'\nGo to walmart.com, search for 'panasonic triple a', and select the first entry\nGo to walmart.com\nGo to walmart.com, search for 'panasonic triple a'\nGo to costco.com, search for 'razer thresher', and select the first entry\nGo to costco.com\nGo to costco.com, search for 'razer thresher'\nGo to newegg.com, search for 'razer nari', and select the first entry\nGo to newegg.com\nGo to newegg.com, search for 'razer nari'\nGo to costco.com, search for 'logitech g910', and select the first entry\nGo to costco.com\nGo to costco.com, search for 'logitech g910'\nGo to walmart.com, search for 'dell xps', and select the first entry\nGo to walmart.com\nGo to walmart.com, search for 'dell xps'\nGo to costco.com, search for 'bose soundlink', and select the first entry\nGo to costco.com\nGo to costco.com, search for 'bose soundlink'\nGo to costco.com, search for 'duracell triple a', and select the first entry\nGo to costco.com\nGo to costco.com, search for 'duracell triple a'\nGo to bestbuy.com, search for 'asus rog', and select the first entry\nGo to bestbuy.com\nGo to bestbuy.com, search for 'asus rog'\nGo to walmart.com, search for 'dell xps', and select the first entry\nGo to walmart.com\nGo to walmart.com, search for 'dell xps'\nGo to costco.com, search for 'duracell triple a', and select the first entry\nGo to costco.com\nGo to costco.com, search for 'duracell triple a'\nGo to ebay.com, search for 'alienware area 51', and select the first entry\nGo to ebay.com\nGo to ebay.com, search for 'alienware area 51'\nGo to newegg.com, search for 'macbook air', and select the first entry\nGo to newegg.com\nGo to newegg.com, search for 'macbook air'\nGo to costco.com, search for 'macbook pro 13 inch', and select the first entry\nGo to costco.com\nGo to costco.com, search for 'macbook pro 13 inch'\nGo to bestbuy.com, search for 'logitech g910', and select the first entry\nGo to bestbuy.com\nGo to bestbuy.com, search for 'logitech g910'\nGo to ebay.com, search for 'sony triple a', and select the first entry\nGo to ebay.com\nGo to ebay.com, search for 'sony triple a'\nGo to walmart.com, search for 'razer thresher', and select the first entry\nGo to walmart.com\nGo to walmart.com, search for 'razer thresher'\nGo to walmart.com, search for 'apple airpods', and select the first entry\nGo to walmart.com\nGo to walmart.com, search for 'apple airpods'\nGo to costco.com, search for 'usb-b', and select the first entry\nGo to costco.com\nGo to costco.com, search for 'usb-b'\nGo to ebay.com, search for 'logitech g502', and select the first entry\nGo to ebay.com\nGo to ebay.com, search for 'logitech g502'\nGo to costco.com, search for 'acer nitro', and select the first entry\nGo to costco.com\nGo to costco.com, search for 'acer nitro'\nGo to newegg.com, search for 'usb-a', and select the first entry\nGo to newegg.com\nGo to newegg.com, search for 'usb-a'\nGo to bestbuy.com, search for 'beats solo 3', and select the first entry\nGo to bestbuy.com\nGo to bestbuy.com, search for 'beats solo 3'\nGo to walmart.com, search for 'dell alienware', and select the first entry\nGo to walmart.com\nGo to walmart.com, search for 'dell alienware'\nGo to walmart.com, search for 'alienware area 51', and select the first entry\nGo to walmart.com\nGo to walmart.com, search for 'alienware area 51'\nGo to newegg.com, search for 'macbook pro 15 inch', and select the first entry\nGo to newegg.com\nGo to newegg.com, search for 'macbook pro 15 inch'\nGo to costco.com, search for 'macbook air', and select the first entry\nGo to costco.com\nGo to costco.com, search for 'macbook air'\nGo to newegg.com, search for 'macbook', and select the first entry\nGo to newegg.com\nGo to newegg.com, search for 'macbook'\nGo to costco.com, search for 'panasonic triple a', and select the first entry\nGo to costco.com\nGo to costco.com, search for 'panasonic triple a'\nGo to bestbuy.com, search for 'usb-a', and select the first entry\nGo to bestbuy.com\nGo to bestbuy.com, search for 'usb-a'\nGo to bestbuy.com, search for 'bestbuy', and select the first entry\nGo to bestbuy.com\nGo to bestbuy.com, search for 'bestbuy'\nGo to costco.com, search for 'logitech g pro', and select the first entry\nGo to costco.com\nGo to costco.com, search for 'logitech g pro'\nGo to bestbuy.com, search for 'apple airpods pro', and select the first entry\nGo to bestbuy.com\nGo to bestbuy.com, search for 'apple airpods pro'\nGo to bestbuy.com, search for 'usb-a', and select the first entry\nGo to bestbuy.com\nGo to bestbuy.com, search for 'usb-a'\nGo to newegg.com, search for 'razer kraken', and select the first entry\nGo to newegg.com\nGo to newegg.com, search for 'razer kraken'\nGo to newegg.com, search for 'macbook pro', and select the first entry\nGo to newegg.com\nGo to newegg.com, search for 'macbook pro'\nGo to newegg.com, search for 'usb-c', and select the first entry\nGo to newegg.com\nGo to newegg.com, search for 'usb-c'\nGo to ebay.com, search for 'bose soundlink mini', and select the first entry\nGo to ebay.com\nGo to ebay.com, search for 'bose soundlink mini'\nGo to costco.com, search for 'logitech g933', and select the first entry\nGo to costco.com\nGo to costco.com, search for 'logitech g933'\nGo to costco.com, search for 'macbook air', and select the first entry\nGo to costco.com\nGo to costco.com, search for 'macbook air'\nGo to bestbuy.com, search for 'lg ultragear', and select the first entry\nGo to bestbuy.com\nGo to bestbuy.com, search for 'lg ultragear'\nGo to bestbuy.com, search for 'razer kraken', and select the first entry\nGo to bestbuy.com\nGo to bestbuy.com, search for 'razer kraken'\nGo to walmart.com, search for 'bose soundlink', and select the first entry\nGo to walmart.com\nGo to walmart.com, search for 'bose soundlink'\nGo to bestbuy.com, search for 'logitech g pro', and select the first entry\nGo to bestbuy.com\nGo to bestbuy.com, search for 'logitech g pro'\nGo to bestbuy.com, search for 'macbook pro', and select the first entry\nGo to bestbuy.com\nGo to bestbuy.com, search for 'macbook pro'\nGo to newegg.com, search for 'bose soundlink', and select the first entry\nGo to newegg.com\nGo to newegg.com, search for 'bose soundlink'\nGo to bestbuy.com, search for 'usb-c to usb-a', and select the first entry\nGo to bestbuy.com\nGo to bestbuy.com, search for 'usb-c to usb-a'\nGo to newegg.com, search for 'alienware aurora', and select the first entry\nGo to newegg.com\nGo to newegg.com, search for 'alienware aurora'\nGo to costco.com, search for 'usb-c', and select the first entry\nGo to costco.com\nGo to costco.com, search for 'usb-c'\nGo to walmart.com, search for 'razer thresher', and select the first entry\nGo to walmart.com\nGo to walmart.com, search for 'razer thresher'\nGo to ebay.com, search for 'usb-c to usb-a', and select the first entry\nGo to ebay.com\nGo to ebay.com, search for 'usb-c to usb-a'\nGo to bestbuy.com, search for 'macbook air', and select the first entry\nGo to bestbuy.com\nGo to bestbuy.com, search for 'macbook air'\nGo to bestbuy.com, search for 'beats solo 3', and select the first entry\nGo to bestbuy.com\nGo to bestbuy.com, search for 'beats solo 3'\nGo to bestbuy.com, search for 'usb-c to usb-b', and select the first entry\nGo to bestbuy.com\nGo to bestbuy.com, search for 'usb-c to usb-b'\nGo to newegg.com, search for 'rayovac triple a', and select the first entry\nGo to newegg.com\nGo to newegg.com, search for 'rayovac triple a'\nGo to bestbuy.com, search for 'logitech g502', and select the first entry\nGo to bestbuy.com\nGo to bestbuy.com, search for 'logitech g502'\nGo to walmart.com, search for 'duracell triple a', and select the first entry\nGo to walmart.com\nGo to walmart.com, search for 'duracell triple a'\nGo to newegg.com, search for 'razer blade', and select the first entry\nGo to newegg.com\nGo to newegg.com, search for 'razer blade'\nGo to walmart.com, search for 'logitech g pro', and select the first entry\nGo to walmart.com\nGo to walmart.com, search for 'logitech g pro'\nGo to bestbuy.com, search for 'usb-c to usb-b', and select the first entry\nGo to bestbuy.com\nGo to bestbuy.com, search for 'usb-c to usb-b'\nGo to costco.com, search for 'usb-c to usb-a', and select the first entry\nGo to costco.com\nGo to costco.com, search for 'usb-c to usb-a'\nGo to bestbuy.com, search for 'logitech g910', and select the first entry\nGo to bestbuy.com\nGo to bestbuy.com, search for 'logitech g910'\nGo to bestbuy.com, search for 'apple airpods', and select the first entry\nGo to bestbuy.com\nGo to bestbuy.com, search for 'apple airpods'\nGo to costco.com, search for 'logitech g910', and select the first entry\nGo to costco.com\nGo to costco.com, search for 'logitech g910'\nGo to ebay.com, search for 'jbl charge 4', and select the first entry\nGo to ebay.com\nGo to ebay.com, search for 'jbl charge 4'\nGo to ebay.com, search for 'energizer triple a', and select the first entry\nGo to ebay.com\nGo to ebay.com, search for 'energizer triple a'\nGo to newegg.com, search for 'asus rog', and select the first entry\nGo to newegg.com\nGo to newegg.com, search for 'asus rog'\nGo to walmart.com, search for 'dell alienware', and select the first entry\nGo to walmart.com\nGo to walmart.com, search for 'dell alienware'\nGo to bestbuy.com, search for 'bose quietcomfort 35', and select the first entry\nGo to bestbuy.com\nGo to bestbuy.com, search for 'bose quietcomfort 35'\nGo to costco.com, search for 'macbook pro', and select the first entry\nGo to costco.com\nGo to costco.com, search for 'macbook pro'\nGo to costco.com, search for 'macbook pro 13 inch', and select the first entry\nGo to costco.com\nGo to costco.com, search for 'macbook pro 13 inch'\nGo to newegg.com, search for 'razer thresher', and select the first entry\nGo to newegg.com\nGo to newegg.com, search for 'razer thresher'\nGo to ebay.com, search for 'macbook air', and select the first entry\nGo to ebay.com\nGo to ebay.com, search for 'macbook air'\nGo to costco.com, search for 'soundlink mini', and select the first entry\nGo to costco.com\nGo to costco.com, search for 'soundlink mini'\nGo to newegg.com, search for 'razer blackwidow', and select the first entry\nGo to newegg.com\nGo to newegg.com, search for 'razer blackwidow'\nGo to walmart.com, search for 'macbook pro', and select the first entry\nGo to walmart.com\nGo to walmart.com, search for 'macbook pro'\nGo to walmart.com, search for 'razer nari', and select the first entry\nGo to walmart.com\nGo to walmart.com, search for 'razer nari'\nGo to bestbuy.com, search for 'logitech g910', and select the first entry\nGo to bestbuy.com\nGo to bestbuy.com, search for 'logitech g910'\nGo to ebay.com, search for 'alienware area 51', and select the first entry\nGo to ebay.com\nGo to ebay.com, search for 'alienware area 51'\nGo to bestbuy.com, search for 'usb-a to usb-b', and select the first entry\nGo to bestbuy.com\nGo to bestbuy.com, search for 'usb-a to usb-b'\nGo to walmart.com, search for 'bose soundsport free', and select the first entry\nGo to walmart.com\nGo to walmart.com, search for 'bose soundsport free'\nGo to ebay.com, search for 'dell xps', and select the first entry\nGo to ebay.com\nGo to ebay.com, search for 'dell xps'\nGo to walmart.com, search for 'razer kraken', and select the first entry\nGo to walmart.com\nGo to walmart.com, search for 'razer kraken'\nGo to ebay.com, search for 'bose soundsport free', and select the first entry\nGo to ebay.com\nGo to ebay.com, search for 'bose soundsport free'\nGo to newegg.com, search for 'usb-b', and select the first entry\nGo to newegg.com\nGo to newegg.com, search for 'usb-b'\nGo to newegg.com, search for 'razer blade', and select the first entry\nGo to newegg.com\nGo to newegg.com, search for 'razer blade'\n"
  },
  {
    "path": "digirl/environment/android/assets/task_set/webshop_train.txt",
    "content": "Go to newegg.com\nGo to newegg.com, search for \"macbook air\"\nGo to newegg.com, search for \"macbook air\", and select the first entry\nGo to bestbuy.com\nGo to bestbuy.com, search for \"logitech g910\"\nGo to bestbuy.com, search for \"logitech g910\", and select the first entry\nGo to bestbuy.com\nGo to bestbuy.com, search for \"logitech g910\"\nGo to bestbuy.com, search for \"logitech g910\", and select the first entry\nGo to costco.com\nGo to costco.com, search for \"bose soundsport free\"\nGo to costco.com, search for \"bose soundsport free\", and select the first entry\nGo to walmart.com\nGo to walmart.com, search for \"logitech g910\"\nGo to walmart.com, search for \"logitech g910\", and select the first entry\nGo to ebay.com\nGo to ebay.com, search for \"asus zenbook\"\nGo to ebay.com, search for \"asus zenbook\", and select the first entry\nGo to newegg.com\nGo to newegg.com, search for \"rayovac triple a\"\nGo to newegg.com, search for \"rayovac triple a\", and select the first entry\nGo to ebay.com\nGo to ebay.com, search for \"razer blade\"\nGo to ebay.com, search for \"razer blade\", and select the first entry\nGo to costco.com\nGo to costco.com, search for \"razer blackwidow\"\nGo to costco.com, search for \"razer blackwidow\", and select the first entry\nGo to costco.com\nGo to costco.com, search for \"beats solo 3\"\nGo to costco.com, search for \"beats solo 3\", and select the first entry\nGo to costco.com\nGo to costco.com, search for \"usb-c to usb-b\"\nGo to costco.com, search for \"usb-c to usb-b\", and select the first entry\nGo to costco.com\nGo to costco.com, search for \"bose soundsport free\"\nGo to costco.com, search for \"bose soundsport free\", and select the first entry\nGo to costco.com\nGo to costco.com, search for \"bose soundlink mini\"\nGo to costco.com, search for \"bose soundlink mini\", and select the first entry\nGo to bestbuy.com\nGo to bestbuy.com, search for \"bose quietcomfort 35\"\nGo to bestbuy.com, search for \"bose quietcomfort 35\", and select the first entry\nGo to walmart.com\nGo to walmart.com, search for \"usb-a\"\nGo to walmart.com, search for \"usb-a\", and select the first entry\nGo to walmart.com\nGo to walmart.com, search for \"razer blade\"\nGo to walmart.com, search for \"razer blade\", and select the first entry\nGo to costco.com\nGo to costco.com, search for \"logitech g pro\"\nGo to costco.com, search for \"logitech g pro\", and select the first entry\nGo to ebay.com\nGo to ebay.com, search for \"lg ultragear\"\nGo to ebay.com, search for \"lg ultragear\", and select the first entry\nGo to ebay.com\nGo to ebay.com, search for \"razer deathadder\"\nGo to ebay.com, search for \"razer deathadder\", and select the first entry\nGo to walmart.com\nGo to walmart.com, search for \"razer kraken\"\nGo to walmart.com, search for \"razer kraken\", and select the first entry\nGo to newegg.com\nGo to newegg.com, search for \"asus zenbook\"\nGo to newegg.com, search for \"asus zenbook\", and select the first entry\nGo to newegg.com\nGo to newegg.com, search for \"bose soundlink mini\"\nGo to newegg.com, search for \"bose soundlink mini\", and select the first entry\nGo to bestbuy.com\nGo to bestbuy.com, search for \"dell xps\"\nGo to bestbuy.com, search for \"dell xps\", and select the first entry\nGo to newegg.com\nGo to newegg.com, search for \"alienware aurora\"\nGo to newegg.com, search for \"alienware aurora\", and select the first entry\nGo to walmart.com\nGo to walmart.com, search for \"acer predator\"\nGo to walmart.com, search for \"acer predator\", and select the first entry\nGo to walmart.com\nGo to walmart.com, search for \"macbook air\"\nGo to walmart.com, search for \"macbook air\", and select the first entry\nGo to bestbuy.com\nGo to bestbuy.com, search for \"macbook\"\nGo to bestbuy.com, search for \"macbook\", and select the first entry\nGo to walmart.com\nGo to walmart.com, search for \"bose soundlink\"\nGo to walmart.com, search for \"bose soundlink\", and select the first entry\nGo to newegg.com\nGo to newegg.com, search for \"macbook pro\"\nGo to newegg.com, search for \"macbook pro\", and select the first entry\nGo to walmart.com\nGo to walmart.com, search for \"dell alienware\"\nGo to walmart.com, search for \"dell alienware\", and select the first entry\nGo to costco.com\nGo to costco.com, search for \"logitech g pro\"\nGo to costco.com, search for \"logitech g pro\", and select the first entry\nGo to ebay.com\nGo to ebay.com, search for \"lenovo thinkpad\"\nGo to ebay.com, search for \"lenovo thinkpad\", and select the first entry\nGo to ebay.com\nGo to ebay.com, search for \"acer predator\"\nGo to ebay.com, search for \"acer predator\", and select the first entry\nGo to costco.com\nGo to costco.com, search for \"corsair k70\"\nGo to costco.com, search for \"corsair k70\", and select the first entry\nGo to costco.com\nGo to costco.com, search for \"sony triple a\"\nGo to costco.com, search for \"sony triple a\", and select the first entry\nGo to newegg.com\nGo to newegg.com, search for \"razer thresher\"\nGo to newegg.com, search for \"razer thresher\", and select the first entry\nGo to costco.com\nGo to costco.com, search for \"macbook pro 13 inch\"\nGo to costco.com, search for \"macbook pro 13 inch\", and select the first entry\nGo to bestbuy.com\nGo to bestbuy.com, search for \"razer kraken\"\nGo to bestbuy.com, search for \"razer kraken\", and select the first entry\nGo to newegg.com\nGo to newegg.com, search for \"lenovo thinkpad\"\nGo to newegg.com, search for \"lenovo thinkpad\", and select the first entry\nGo to costco.com\nGo to costco.com, search for \"macbook\"\nGo to costco.com, search for \"macbook\", and select the first entry\nGo to walmart.com\nGo to walmart.com, search for \"corsair k70\"\nGo to walmart.com, search for \"corsair k70\", and select the first entry\nGo to ebay.com\nGo to ebay.com, search for \"usb-c to usb-a\"\nGo to ebay.com, search for \"usb-c to usb-a\", and select the first entry\nGo to newegg.com\nGo to newegg.com, search for \"logitech g pro\"\nGo to newegg.com, search for \"logitech g pro\", and select the first entry\nGo to ebay.com\nGo to ebay.com, search for \"bose soundlink\"\nGo to ebay.com, search for \"bose soundlink\", and select the first entry\nGo to ebay.com\nGo to ebay.com, search for \"razer blackwidow\"\nGo to ebay.com, search for \"razer blackwidow\", and select the first entry\nGo to ebay.com\nGo to ebay.com, search for \"logitech g903\"\nGo to ebay.com, search for \"logitech g903\", and select the first entry\nGo to walmart.com\nGo to walmart.com, search for \"jbl flip 4\"\nGo to walmart.com, search for \"jbl flip 4\", and select the first entry\nGo to costco.com\nGo to costco.com, search for \"razer deathadder\"\nGo to costco.com, search for \"razer deathadder\", and select the first entry\nGo to costco.com\nGo to costco.com, search for \"logitech g903\"\nGo to costco.com, search for \"logitech g903\", and select the first entry\nGo to walmart.com\nGo to walmart.com, search for \"asus rog\"\nGo to walmart.com, search for \"asus rog\", and select the first entry\nGo to ebay.com\nGo to ebay.com, search for \"razer blade\"\nGo to ebay.com, search for \"razer blade\", and select the first entry\nGo to ebay.com\nGo to ebay.com, search for \"usb-a to usb-b\"\nGo to ebay.com, search for \"usb-a to usb-b\", and select the first entry\nGo to costco.com\nGo to costco.com, search for \"rayovac triple a\"\nGo to costco.com, search for \"rayovac triple a\", and select the first entry\nGo to ebay.com\nGo to ebay.com, search for \"razer blade\"\nGo to ebay.com, search for \"razer blade\", and select the first entry\nGo to newegg.com\nGo to newegg.com, search for \"dell xps\"\nGo to newegg.com, search for \"dell xps\", and select the first entry\nGo to newegg.com\nGo to newegg.com, search for \"logitech g903\"\nGo to newegg.com, search for \"logitech g903\", and select the first entry\nGo to bestbuy.com\nGo to bestbuy.com, search for \"logitech g pro\"\nGo to bestbuy.com, search for \"logitech g pro\", and select the first entry\nGo to newegg.com\nGo to newegg.com, search for \"bose soundsport free\"\nGo to newegg.com, search for \"bose soundsport free\", and select the first entry\nGo to newegg.com\nGo to newegg.com, search for \"bose quietcomfort 35\"\nGo to newegg.com, search for \"bose quietcomfort 35\", and select the first entry\nGo to walmart.com\nGo to walmart.com, search for \"lenovo thinkpad\"\nGo to walmart.com, search for \"lenovo thinkpad\", and select the first entry\nGo to ebay.com\nGo to ebay.com, search for \"logitech g910\"\nGo to ebay.com, search for \"logitech g910\", and select the first entry\nGo to bestbuy.com\nGo to bestbuy.com, search for \"jbl charge 4\"\nGo to bestbuy.com, search for \"jbl charge 4\", and select the first entry\nGo to newegg.com\nGo to newegg.com, search for \"logitech g910\"\nGo to newegg.com, search for \"logitech g910\", and select the first entry\nGo to costco.com\nGo to costco.com, search for \"lg ultragear\"\nGo to costco.com, search for \"lg ultragear\", and select the first entry\nGo to ebay.com\nGo to ebay.com, search for \"alienware area 51\"\nGo to ebay.com, search for \"alienware area 51\", and select the first entry\nGo to walmart.com\nGo to walmart.com, search for \"razer blade\"\nGo to walmart.com, search for \"razer blade\", and select the first entry\nGo to costco.com\nGo to costco.com, search for \"usb-b\"\nGo to costco.com, search for \"usb-b\", and select the first entry\nGo to bestbuy.com\nGo to bestbuy.com, search for \"alienware aurora\"\nGo to bestbuy.com, search for \"alienware aurora\", and select the first entry\nGo to bestbuy.com\nGo to bestbuy.com, search for \"usb-b\"\nGo to bestbuy.com, search for \"usb-b\", and select the first entry\nGo to walmart.com\nGo to walmart.com, search for \"macbook\"\nGo to walmart.com, search for \"macbook\", and select the first entry\nGo to newegg.com\nGo to newegg.com, search for \"razer nari\"\nGo to newegg.com, search for \"razer nari\", and select the first entry\nGo to costco.com\nGo to costco.com, search for \"usb-a to usb-b\"\nGo to costco.com, search for \"usb-a to usb-b\", and select the first entry\nGo to walmart.com\nGo to walmart.com, search for \"usb-c to usb-b\"\nGo to walmart.com, search for \"usb-c to usb-b\", and select the first entry\nGo to bestbuy.com\nGo to bestbuy.com, search for \"corsair k70\"\nGo to bestbuy.com, search for \"corsair k70\", and select the first entry\nGo to ebay.com\nGo to ebay.com, search for \"logitech g933\"\nGo to ebay.com, search for \"logitech g933\", and select the first entry\nGo to newegg.com\nGo to newegg.com, search for \"beats solo 3\"\nGo to newegg.com, search for \"beats solo 3\", and select the first entry\nGo to ebay.com\nGo to ebay.com, search for \"macbook pro 15 inch\"\nGo to ebay.com, search for \"macbook pro 15 inch\", and select the first entry\nGo to costco.com\nGo to costco.com, search for \"lenovo thinkpad\"\nGo to costco.com, search for \"lenovo thinkpad\", and select the first entry\nGo to ebay.com\nGo to ebay.com, search for \"macbook pro\"\nGo to ebay.com, search for \"macbook pro\", and select the first entry\nGo to walmart.com\nGo to walmart.com, search for \"logitech g903\"\nGo to walmart.com, search for \"logitech g903\", and select the first entry\nGo to walmart.com\nGo to walmart.com, search for \"macbook\"\nGo to walmart.com, search for \"macbook\", and select the first entry\nGo to ebay.com\nGo to ebay.com, search for \"usb-a to usb-b\"\nGo to ebay.com, search for \"usb-a to usb-b\", and select the first entry\nGo to newegg.com\nGo to newegg.com, search for \"usb-a to usb-b\"\nGo to newegg.com, search for \"usb-a to usb-b\", and select the first entry\nGo to ebay.com\nGo to ebay.com, search for \"duracell triple a\"\nGo to ebay.com, search for \"duracell triple a\", and select the first entry\nGo to costco.com\nGo to costco.com, search for \"razer huntsman\"\nGo to costco.com, search for \"razer huntsman\", and select the first entry\nGo to walmart.com\nGo to walmart.com, search for \"razer blackwidow\"\nGo to walmart.com, search for \"razer blackwidow\", and select the first entry\nGo to walmart.com\nGo to walmart.com, search for \"macbook air\"\nGo to walmart.com, search for \"macbook air\", and select the first entry\nGo to ebay.com\nGo to ebay.com, search for \"logitech g pro\"\nGo to ebay.com, search for \"logitech g pro\", and select the first entry\nGo to walmart.com\nGo to walmart.com, search for \"beats solo 3\"\nGo to walmart.com, search for \"beats solo 3\", and select the first entry\nGo to walmart.com\nGo to walmart.com, search for \"razer huntsman\"\nGo to walmart.com, search for \"razer huntsman\", and select the first entry\nGo to bestbuy.com\nGo to bestbuy.com, search for \"razer blackwidow\"\nGo to bestbuy.com, search for \"razer blackwidow\", and select the first entry\nGo to newegg.com\nGo to newegg.com, search for \"razer huntsman\"\nGo to newegg.com, search for \"razer huntsman\", and select the first entry\nGo to ebay.com\nGo to ebay.com, search for \"corsair k70\"\nGo to ebay.com, search for \"corsair k70\", and select the first entry\nGo to bestbuy.com\nGo to bestbuy.com, search for \"corsair k70\"\nGo to bestbuy.com, search for \"corsair k70\", and select the first entry\nGo to bestbuy.com\nGo to bestbuy.com, search for \"usb-c to usb-a\"\nGo to bestbuy.com, search for \"usb-c to usb-a\", and select the first entry\nGo to ebay.com\nGo to ebay.com, search for \"razer thresher\"\nGo to ebay.com, search for \"razer thresher\", and select the first entry\nGo to costco.com\nGo to costco.com, search for \"macbook\"\nGo to costco.com, search for \"macbook\", and select the first entry\nGo to walmart.com\nGo to walmart.com, search for \"lenovo thinkpad\"\nGo to walmart.com, search for \"lenovo thinkpad\", and select the first entry\nGo to ebay.com\nGo to ebay.com, search for \"alienware aurora\"\nGo to ebay.com, search for \"alienware aurora\", and select the first entry\nGo to bestbuy.com\nGo to bestbuy.com, search for \"dell xps\"\nGo to bestbuy.com, search for \"dell xps\", and select the first entry\nGo to costco.com\nGo to costco.com, search for \"macbook air\"\nGo to costco.com, search for \"macbook air\", and select the first entry\nGo to costco.com\nGo to costco.com, search for \"lg ultragear\"\nGo to costco.com, search for \"lg ultragear\", and select the first entry\nGo to costco.com\nGo to costco.com, search for \"razer kraken\"\nGo to costco.com, search for \"razer kraken\", and select the first entry\nGo to ebay.com\nGo to ebay.com, search for \"apple airpods\"\nGo to ebay.com, search for \"apple airpods\", and select the first entry\nGo to walmart.com\nGo to walmart.com, search for \"dell alienware\"\nGo to walmart.com, search for \"dell alienware\", and select the first entry\nGo to bestbuy.com\nGo to bestbuy.com, search for \"duracell triple a\"\nGo to bestbuy.com, search for \"duracell triple a\", and select the first entry\nGo to costco.com\nGo to costco.com, search for \"razer kraken\"\nGo to costco.com, search for \"razer kraken\", and select the first entry\nGo to costco.com\nGo to costco.com, search for \"razer deathadder\"\nGo to costco.com, search for \"razer deathadder\", and select the first entry\nGo to newegg.com\nGo to newegg.com, search for \"sony triple a\"\nGo to newegg.com, search for \"sony triple a\", and select the first entry\nGo to costco.com\nGo to costco.com, search for \"razer blackwidow\"\nGo to costco.com, search for \"razer blackwidow\", and select the first entry\nGo to ebay.com\nGo to ebay.com, search for \"panasonic triple a\"\nGo to ebay.com, search for \"panasonic triple a\", and select the first entry\nGo to walmart.com\nGo to walmart.com, search for \"lg ultragear\"\nGo to walmart.com, search for \"lg ultragear\", and select the first entry\nGo to costco.com\nGo to costco.com, search for \"panasonic triple a\"\nGo to costco.com, search for \"panasonic triple a\", and select the first entry\nGo to costco.com\nGo to costco.com, search for \"alienware area 51\"\nGo to costco.com, search for \"alienware area 51\", and select the first entry\nGo to bestbuy.com\nGo to bestbuy.com, search for \"energizer triple a\"\nGo to bestbuy.com, search for \"energizer triple a\", and select the first entry\nGo to newegg.com\nGo to newegg.com, search for \"lenovo thinkpad\"\nGo to newegg.com, search for \"lenovo thinkpad\", and select the first entry\nGo to walmart.com\nGo to walmart.com, search for \"razer naga\"\nGo to walmart.com, search for \"razer naga\", and select the first entry\nGo to walmart.com\nGo to walmart.com, search for \"usb-c to usb-b\"\nGo to walmart.com, search for \"usb-c to usb-b\", and select the first entry\nGo to bestbuy.com\nGo to bestbuy.com, search for \"jbl charge 4\"\nGo to bestbuy.com, search for \"jbl charge 4\", and select the first entry\nGo to newegg.com\nGo to newegg.com, search for \"alienware area 51\"\nGo to newegg.com, search for \"alienware area 51\", and select the first entry\nGo to newegg.com\nGo to newegg.com, search for \"razer kraken\"\nGo to newegg.com, search for \"razer kraken\", and select the first entry\nGo to newegg.com\nGo to newegg.com, search for \"dell xps\"\nGo to newegg.com, search for \"dell xps\", and select the first entry\nGo to costco.com\nGo to costco.com, search for \"logitech g502\"\nGo to costco.com, search for \"logitech g502\", and select the first entry\nGo to walmart.com\nGo to walmart.com, search for \"acer nitro\"\nGo to walmart.com, search for \"acer nitro\", and select the first entry\nGo to bestbuy.com\nGo to bestbuy.com, search for \"beats solo 3\"\nGo to bestbuy.com, search for \"beats solo 3\", and select the first entry\nGo to walmart.com\nGo to walmart.com, search for \"bose quitecomfort 35\"\nGo to walmart.com, search for \"bose quitecomfort 35\", and select the first entry\nGo to bestbuy.com\nGo to bestbuy.com, search for \"acer nitro\"\nGo to bestbuy.com, search for \"acer nitro\", and select the first entry\nGo to ebay.com\nGo to ebay.com, search for \"asus rog\"\nGo to ebay.com, search for \"asus rog\", and select the first entry\nGo to ebay.com\nGo to ebay.com, search for \"duracell triple a\"\nGo to ebay.com, search for \"duracell triple a\", and select the first entry\nGo to ebay.com\nGo to ebay.com, search for \"usb-c\"\nGo to ebay.com, search for \"usb-c\", and select the first entry\nGo to bestbuy.com\nGo to bestbuy.com, search for \"razer huntsman\"\nGo to bestbuy.com, search for \"razer huntsman\", and select the first entry\nGo to costco.com\nGo to costco.com, search for \"beats solo 3\"\nGo to costco.com, search for \"beats solo 3\", and select the first entry\nGo to bestbuy.com\nGo to bestbuy.com, search for \"acer nitro\"\nGo to bestbuy.com, search for \"acer nitro\", and select the first entry\nGo to bestbuy.com\nGo to bestbuy.com, search for \"lenovo thinkpad\"\nGo to bestbuy.com, search for \"lenovo thinkpad\", and select the first entry\nGo to ebay.com\nGo to ebay.com, search for \"logitech g502\"\nGo to ebay.com, search for \"logitech g502\", and select the first entry\nGo to ebay.com\nGo to ebay.com, search for \"logitech g933\"\nGo to ebay.com, search for \"logitech g933\", and select the first entry\nGo to ebay.com\nGo to ebay.com, search for \"logitech g pro\"\nGo to ebay.com, search for \"logitech g pro\", and select the first entry\nGo to costco.com\nGo to costco.com, search for \"panasonic triple a\"\nGo to costco.com, search for \"panasonic triple a\", and select the first entry\nGo to ebay.com\nGo to ebay.com, search for \"dell xps\"\nGo to ebay.com, search for \"dell xps\", and select the first entry\nGo to ebay.com\nGo to ebay.com, search for \"logitech g910\"\nGo to ebay.com, search for \"logitech g910\", and select the first entry\nGo to ebay.com\nGo to ebay.com, search for \"razer thresher\"\nGo to ebay.com, search for \"razer thresher\", and select the first entry\nGo to ebay.com\nGo to ebay.com, search for \"logitech g933\"\nGo to ebay.com, search for \"logitech g933\", and select the first entry\nGo to ebay.com\nGo to ebay.com, search for \"logitech g910\"\nGo to ebay.com, search for \"logitech g910\", and select the first entry\nGo to walmart.com\nGo to walmart.com, search for \"dell xps\"\nGo to walmart.com, search for \"dell xps\", and select the first entry\nGo to bestbuy.com\nGo to bestbuy.com, search for \"usb-c to usb-a\"\nGo to bestbuy.com, search for \"usb-c to usb-a\", and select the first entry\nGo to ebay.com\nGo to ebay.com, search for \"bose soundlink\"\nGo to ebay.com, search for \"bose soundlink\", and select the first entry\n"
  },
  {
    "path": "digirl/environment/android/autoui_utils.py",
    "content": "from enum import Enum\nfrom dataclasses import dataclass\nfrom typing import List, Tuple, Union\nfrom transformers import Blip2VisionModel, AutoProcessor, Blip2Model\nimport torch\nfrom PIL import Image\n\nclass ImageFeatureExtractor:\n    def __init__(self, device):\n        # Set device based on CUDA availability\n        self.device = device\n        \n        # Initialize and load the BLIP2 model and processor\n        self.model = Blip2Model.from_pretrained(\"Salesforce/blip2-opt-2.7b\").cpu()\n        self.model.language_model = None\n        # self.model = self.model.to(self.device)\n        self.processor = AutoProcessor.from_pretrained(\"Salesforce/blip2-opt-2.7b\")\n\n    def to_feat(self, image: Image.Image):\n        \"\"\"Converts a PIL image to a feature representation using the BLIP2 model.\n        \n        Args:\n            image: A PIL.Image object representing the image to convert.\n            \n        Returns:\n            A tensor representing the image feature.\n        \"\"\"\n        with torch.no_grad():\n            # Preprocess the image and move to the correct device\n            inputs = self.processor(images=image, return_tensors=\"pt\").to(self.model.device)\n            \n            # Get the image features from the model\n            image_features = self.model.get_image_features(**inputs).pooler_output[0]\n            \n            # Detach the tensor from the graph and move it to CPU\n            image_features = image_features.detach().cpu()\n            \n        return image_features\n\n# class ImageFeatureExtractor:\n#     def __init__(self, device):\n#         # Set device based on CUDA availability\n#         self.device = device\n        \n#         # Initialize and load the BLIP2 model and processor\n#         self.model = Blip2VisionModel.from_pretrained(\"Salesforce/blip2-opt-2.7b\").to(self.device)\n#         self.processor = AutoProcessor.from_pretrained(\"Salesforce/blip2-opt-2.7b\")\n\n#     def to_feat(self, image: Image.Image):\n#         \"\"\"Converts a PIL image to a feature representation using the BLIP2 model.\n        \n#         Args:\n#             image: A PIL.Image object representing the image to convert.\n            \n#         Returns:\n#             A tensor representing the image feature.\n#         \"\"\"\n#         with torch.no_grad():\n#             # Preprocess the image and move to the correct device\n#             inputs = self.processor(images=image, return_tensors=\"pt\").to(self.device)\n            \n#             # Get the image features from the model\n#             image_features = self.model(**inputs,\n#                                         output_attentions=False,\n#                                         output_hidden_states=False,\n#                                         return_dict=True).pooler_output[0]\n#             #size is 1408\n            \n#             # Detach the tensor from the graph and move it to CPU\n#             image_features = image_features.detach().cpu()\n            \n#         return image_features\n\nclass ActionType(Enum):\n    Idle=0\n    DualPoint=1\n    Type=2\n    GoBack=3\n    GoHome=4\n    Enter=5\n    TaskComplete=6\n    TaskImpossible=7\n\n@dataclass\nclass AndroidAction():\n    action_type: ActionType\n    touch_point: Tuple[float, float] = None\n    lift_point: Tuple[float, float] = None\n    typed_text: str = None\n\n    def __str__(self):\n        # Construct the basic action type string.\n        components = [f\"Action Type: {self.action_type.name}\"]\n\n        # Format and add touch_point if it's not None.\n        if self.touch_point:\n            touch_point_str = f\"({self.touch_point[0]:.4f}, {self.touch_point[1]:.4f})\"\n            components.append(f\"Touch Point: {touch_point_str}\")\n\n        # Format and add lift_point if it's not None.\n        if self.lift_point:\n            lift_point_str = f\"({self.lift_point[0]:.4f}, {self.lift_point[1]:.4f})\"\n            components.append(f\"Lift Point: {lift_point_str}\")\n\n        # Add typed_text if it's not None.\n        if self.typed_text:\n            components.append(f\"Typed Text: '{self.typed_text}'\")\n\n        # Join all components into a single string.\n        return \", \".join(components)\n\n    def to_act(self):\n        pass\n\n\ndef cogagent_translate_action(out):\n    raw_action = out\n    try:\n        raw_action = raw_action.split('Grounded Operation:')[1]\n        action = raw_action.split(\" \")[0]\n        if action == 'tap':\n            numbers = raw_action.split('[[')[1].split(',')\n            x = int(numbers[0])\n            y = int(numbers[1].split(']]')[0])\n            touch_point = (x/1000, y/1000)\n            return AndroidAction(action_type=ActionType.DualPoint, touch_point=touch_point, lift_point=touch_point)\n        elif \"type\" in action:\n            text = raw_action.split('\"')[1]\n            return AndroidAction(action_type=ActionType.Type, typed_text=text)\n        elif \"press home\" in raw_action:\n            return AndroidAction(action_type=ActionType.GoHome)\n        elif \"press back\" in raw_action:\n            return AndroidAction(action_type=ActionType.GoBack)\n        elif \"press enter\" in raw_action:\n            return AndroidAction(action_type=ActionType.Enter)\n        elif \"task complete\" in raw_action:\n            return AndroidAction(action_type=ActionType.TaskComplete)\n        elif \"task impossible\" in raw_action:\n            return AndroidAction(action_type=ActionType.TaskImpossible)\n        elif \"swipe up\" in raw_action:\n            return AndroidAction(action_type=ActionType.DualPoint, touch_point=(0.5, 0.5), lift_point=(0.5, 0.2))\n        elif \"swipe down\" in raw_action:\n            return AndroidAction(action_type=ActionType.DualPoint, touch_point=(0.5, 0.2), lift_point=(0.5, 0.5))\n        elif \"swipe left\" in raw_action:\n            return AndroidAction(action_type=ActionType.DualPoint, touch_point=(0.8, 0.5), lift_point=(0.2, 0.5))\n        elif \"swipe right\" in raw_action:\n            return AndroidAction(action_type=ActionType.DualPoint, touch_point=(0.2, 0.5), lift_point=(0.8, 0.5))\n        else:\n            print(f\"Action {raw_action} not supported yet.\")\n            return AndroidAction(action_type=ActionType.Idle)\n    except Exception as e:\n        print(f\"Action {raw_action} Parsing Error: {e}\")\n        return AndroidAction(action_type=ActionType.Idle)\n\ndef autoui_translate_action(out):\n    action_str = out.split(\"Action Decision: \")[1]\n    action_type, touch_point_1, touch_point_2, lift_point_1, lift_point_2, typed_text = action_str.split(\", \")\n    touch_point = touch_point_1 + \", \" + touch_point_2\n    lift_point = lift_point_1 + \", \" + lift_point_2\n    try:\n        action_type = action_type.split(\": \")[1].strip('\"')\n        if action_type == 'DUAL_POINT':\n            touch_point_yx = touch_point.split(\": \")[1].strip('[]\"')\n            touch_point_yx = [float(num) for num in touch_point_yx.split(\", \")]\n            lift_point_yx = lift_point.split(\": \")[1].strip('[]\"')\n            lift_point_yx = [float(num) for num in lift_point_yx.split(\", \")]\n            return AndroidAction(action_type=ActionType.DualPoint, touch_point=touch_point_yx[::-1], lift_point=lift_point_yx[::-1])\n        elif action_type == 'TYPE':\n            text = typed_text.split(\": \")[1].strip('\"')\n            return AndroidAction(action_type=ActionType.Type, typed_text=text)\n        elif action_type == 'PRESS_HOME':\n            return AndroidAction(action_type=ActionType.GoHome)\n        elif action_type == 'PRESS_BACK':\n            return AndroidAction(action_type=ActionType.GoBack)\n        elif action_type == 'PRESS_ENTER':\n            return AndroidAction(action_type=ActionType.Enter)\n        elif action_type == 'STATUS_TASK_COMPLETE':\n            return AndroidAction(action_type=ActionType.TaskComplete)\n        elif action_type == 'TASK_IMPOSSIBLE':\n            return AndroidAction(action_type=ActionType.TaskImpossible)\n        else:\n            print(f\"Action {out} not supported yet.\")\n            return AndroidAction(action_type=ActionType.Idle)\n    except Exception as e:\n        print(f\"Action {out} Parsing Error: {e}\")\n        return AndroidAction(action_type=ActionType.Idle)\n\ndef to_autoui(act: AndroidAction):\n    if act.action_type == ActionType.DualPoint:\n        return f'\"action_type\": \"DUAL_POINT\", \"touch_point\": \"[{act.touch_point[1]:.4f}, {act.touch_point[0]:.4f}]\", \"lift_point\": \"[{act.lift_point[1]:.4f}, {act.lift_point[0]:.4f}]\", \"typed_text\": \"\"'\n    elif act.action_type == ActionType.Type:\n        return f'\"action_type\": \"TYPE\", \"touch_point\": \"[-1.0, -1.0]\", \"lift_point\": \"[-1.0, -1.0]\", \"typed_text\": \"{act.typed_text}\"'\n    elif act.action_type == ActionType.GoBack:\n        return f'\"action_type\": \"PRESS_BACK\", \"touch_point\": \"[-1.0, -1.0]\", \"lift_point\": \"[-1.0, -1.0]\", \"typed_text\": \"\"'\n    elif act.action_type == ActionType.GoHome:\n        return f'\"action_type\": \"PRESS_HOME\", \"touch_point\": \"[-1.0, -1.0]\", \"lift_point\": \"[-1.0, -1.0]\", \"typed_text\": \"\"'\n    elif act.action_type == ActionType.Enter:\n        return f'\"action_type\": \"PRESS_ENTER\", \"touch_point\": \"[-1.0, -1.0]\", \"lift_point\": \"[-1.0, -1.0]\", \"typed_text\": \"\"'\n    elif act.action_type == ActionType.TaskComplete or act.action_type == ActionType.TaskImpossible:\n        return f'\"action_type\": \"STATUS_TASK_COMPLETE\", \"touch_point\": \"[-1.0, -1.0]\", \"lift_point\": \"[-1.0, -1.0]\", \"typed_text\": \"\"'\n    else:\n        print(f\"Action {act} not supported yet.\")\n        return \"\"\n\ndef autoui_prepare_prompt(task, history):\n        prompt = \"Previous Actions: \"\n        for act in history[-1:]:\n            prompt += f\"{to_autoui(act)} \"\n        prompt += f\"Goal: {task}</s>\"\n        return prompt"
  },
  {
    "path": "digirl/environment/android/client.py",
    "content": "from gradio_client import Client\nfrom PIL import Image\nfrom .env import AndroidAction, ActionType\nfrom typing import Dict, Union\nfrom time import sleep\n\n\n\nfrom abc import ABC, abstractmethod\nclass AbstractAgent(ABC):\n    @abstractmethod\n    def act(self, task:str, image_path:str)->Union[AndroidAction, Dict]:\n        pass\n\n\nclass AutoUI:\n    def __init__(self, url):\n        self.client = Client(url)\n        self.reset_history()\n\n    def predict(self, text:str, image_path:str)->str:\n        for _ in range(3):\n            try:\n                out = self.client.predict(text, image_path)\n                break\n            except:\n                sleep(1)\n        return out\n\n    @classmethod\n    def to_autoui(self, act: AndroidAction):\n        if act.action_type == ActionType.DualPoint:\n            return f'\"action_type\": \"DUAL_POINT\", \"touch_point\": \"[{act.touch_point[1]:.4f}, {act.touch_point[0]:.4f}]\", \"lift_point\": \"[{act.lift_point[1]:.4f}, {act.lift_point[0]:.4f}]\", \"typed_text\": \"\"'\n        elif act.action_type == ActionType.Type:\n            return f'\"action_type\": \"TYPE\", \"touch_point\": \"[-1.0, -1.0]\", \"lift_point\": \"[-1.0, -1.0]\", \"typed_text\": \"{act.typed_text}\"'\n        elif act.action_type == ActionType.GoBack:\n            return f'\"action_type\": \"PRESS_BACK\", \"touch_point\": \"[-1.0, -1.0]\", \"lift_point\": \"[-1.0, -1.0]\", \"typed_text\": \"\"'\n        elif act.action_type == ActionType.GoHome:\n            return f'\"action_type\": \"PRESS_HOME\", \"touch_point\": \"[-1.0, -1.0]\", \"lift_point\": \"[-1.0, -1.0]\", \"typed_text\": \"\"'\n        elif act.action_type == ActionType.Enter:\n            return f'\"action_type\": \"PRESS_ENTER\", \"touch_point\": \"[-1.0, -1.0]\", \"lift_point\": \"[-1.0, -1.0]\", \"typed_text\": \"\"'\n        elif act.action_type == ActionType.TaskComplete or act.action_type == ActionType.TaskImpossible:\n            return f'\"action_type\": \"STATUS_TASK_COMPLETE\", \"touch_point\": \"[-1.0, -1.0]\", \"lift_point\": \"[-1.0, -1.0]\", \"typed_text\": \"\"'\n        else:\n            print(f\"Action {act} not supported yet.\")\n            return \"\"\n\n    def act(self, task:str, image_path:str)->Union[AndroidAction, Dict]:\n        prompt = self.prepare_prompts(task)\n        out = self.predict(prompt, image_path)\n        translated_action = self._translate_action(out)\n        self.history_acts.append(translated_action)\n        return translated_action, {\"prompt\": prompt, \"output\": out}\n    \n    def reset_history(self):\n        self.history_acts = []\n\n    def prepare_prompts(self, task:str):\n        prompt = \"Previous Actions: \"\n        for act in self.history_acts[-8:]:\n            prompt += f\"{AutoUI.to_autoui(act)} \"\n        prompt += f\"Goal: {task}</s>\"\n        return prompt\n\n    def _translate_action(self, out):\n        action_str = out.split(\"Action Decision: \")[1]\n        action_type, touch_point_1, touch_point_2, lift_point_1, lift_point_2, typed_text = action_str.split(\", \")\n        touch_point = touch_point_1 + \", \" + touch_point_2\n        lift_point = lift_point_1 + \", \" + lift_point_2\n        try:\n            action_type = action_type.split(\": \")[1].strip('\"')\n            if action_type == 'DUAL_POINT':\n                touch_point_yx = touch_point.split(\": \")[1].strip('[]\"')\n                touch_point_yx = [float(num) for num in touch_point_yx.split(\", \")]\n                lift_point_yx = lift_point.split(\": \")[1].strip('[]\"')\n                lift_point_yx = [float(num) for num in lift_point_yx.split(\", \")]\n                return AndroidAction(action_type=ActionType.DualPoint, touch_point=touch_point_yx[::-1], lift_point=lift_point_yx[::-1])\n            elif action_type == 'TYPE':\n                text = typed_text.split(\": \")[1].strip('\"')\n                return AndroidAction(action_type=ActionType.Type, typed_text=text)\n            elif action_type == 'PRESS_HOME':\n                return AndroidAction(action_type=ActionType.GoHome)\n            elif action_type == 'PRESS_BACK':\n                return AndroidAction(action_type=ActionType.GoBack)\n            elif action_type == 'PRESS_ENTER':\n                return AndroidAction(action_type=ActionType.Enter)\n            elif action_type == 'STATUS_TASK_COMPLETE':\n                return AndroidAction(action_type=ActionType.TaskComplete)\n            elif action_type == 'TASK_IMPOSSIBLE':\n                return AndroidAction(action_type=ActionType.TaskImpossible)\n            else:\n                print(f\"Action {out} not supported yet.\")\n                return AndroidAction(action_type=ActionType.Idle)\n        except Exception as e:\n            print(f\"Action {out} Parsing Error: {e}\")\n            return AndroidAction(action_type=ActionType.Idle)"
  },
  {
    "path": "digirl/environment/android/env.py",
    "content": "import os\nimport shutil\nimport subprocess, signal\nimport re\nfrom time import sleep\nimport random\nfrom .autoui_utils import autoui_prepare_prompt, AndroidAction, ActionType, ImageFeatureExtractor\nimport time\nfrom digirl.misc import colorful_print\n\nfrom appium import webdriver\nfrom appium.options.android import UiAutomator2Options\n\nimport base64\nfrom PIL import Image\nfrom io import BytesIO\nfrom termcolor import colored, cprint\nimport concurrent.futures\nimport numpy as np\nimport traceback\n\ndef escape_shell_text(text):\n    # List of characters to escape\n    chars_to_escape = ['\\\\','\"', \"'\", '`', '$']\n    \n    # Escape the characters by adding a backslash before them\n    for char in chars_to_escape:\n        text = text.replace(char, '\\\\' + char)\n    text = text.replace(\" \", \"%s\")\n    return text\n\ndef kill_all_emulators(adb_path, emulators=None):\n    # Get the list of connected devices\n    result = subprocess.run([adb_path, 'devices'], stdout=subprocess.PIPE)\n    devices_output = result.stdout.decode('utf-8')\n    \n    # Find all emulator device names using a regular expression\n    running_emulators = re.findall(r'emulator-\\d+', devices_output)\n    \n    # Shut down each emulator found\n    for emulator in emulators:\n        if emulator not in running_emulators:\n            continue\n        subprocess.run([adb_path, '-s', emulator, 'emu', 'kill'])\n        print(f'{emulator} has been shut down.')\n\n    if not emulators:\n        print(\"No running emulators found.\")\n\ndef clone_avd(src_avd_name, tar_avd_name, android_avd_home):\n    \"\"\"\n    Clone the source AVD to the target AVD.\n\n    Parameters:\n    - src_avd_name: The name of the source AVD folder.\n    - tar_avd_name: The name of the target AVD folder.\n    - android_avd_home: The path to the .android/avd directory.\n\n    This function copies the source AVD folder and its .ini file to a new target AVD\n    and updates the paths inside the .ini files accordingly.\n    \"\"\"\n\n    # Paths for source and target AVD directories and .ini files\n    src_avd_dir = os.path.join(android_avd_home, src_avd_name + '.avd')\n    tar_avd_dir = os.path.join(android_avd_home, tar_avd_name + '.avd')\n    src_ini_file = os.path.join(android_avd_home, src_avd_name + '.ini')\n    tar_ini_file = os.path.join(android_avd_home, tar_avd_name + '.ini')\n\n    # Copy the AVD folder\n    colorful_print(f\"Copying the AVD folder from {src_avd_dir} to {tar_avd_dir}\", \"green\")\n    if not os.path.exists(tar_avd_dir):\n        shutil.copytree(src_avd_dir, tar_avd_dir)\n\n    # Copy the .ini file and modify it for the new AVD\n    with open(src_ini_file, 'r') as src_ini, open(tar_ini_file, 'w') as tar_ini:\n        for line in src_ini:\n            tar_ini.write(line.replace(src_avd_name, tar_avd_name))\n\n    # Update paths inside the target AVD's .ini files\n    for ini_name in ['config.ini', 'hardware-qemu.ini']:\n        ini_path = os.path.join(tar_avd_dir, ini_name)\n        if os.path.exists(ini_path):\n            with open(ini_path, 'r') as file:\n                lines = file.readlines()\n            with open(ini_path, 'w') as file:\n                for line in lines:\n                    # Update paths and AVD name/ID\n                    new_line = line.replace(src_avd_name, tar_avd_name)\n                    file.write(new_line)\n\n    # Update the snapshots' hardware.ini file if it exists\n    snapshots_hw_ini = os.path.join(tar_avd_dir, 'snapshots', 'default_boot', 'hardware.ini')\n    if os.path.exists(snapshots_hw_ini):\n        with open(snapshots_hw_ini, 'r') as file:\n            lines = file.readlines()\n        with open(snapshots_hw_ini, 'w') as file:\n            for line in lines:\n                # Update AVD name/ID\n                new_line = line.replace(src_avd_name, tar_avd_name)\n                file.write(new_line)\n\n\nclass AndroidEmulator():\n    def __init__(self, avd_name, max_steps, temp_path, evaluator, emulator_path=\"~/Android/Sdk/emulator/emulator\", appium_server_url='http://localhost:4723', no_window=False, udid = None,\n        feature_extractor = None, all_tasks = None, prepare_prompt = autoui_prepare_prompt, translate_action = None, save_images = False, task_id=None, task_split=\"train\", sample_mode=None, record=False):\n        \"\"\"\n        temp_path temporary path to store the images for evaluation\n        \"\"\"\n        self.temp_path = temp_path\n        if not os.path.exists(temp_path):\n            os.makedirs(temp_path)\n        self.emulator_path = os.path.expanduser(emulator_path)\n        self.avd_name = avd_name\n        self.save_images = save_images\n        self.image_id = str(time.time())\n        port_number = udid.split(\"-\")[-1]\n        self.udid = udid\n        cprint(colored(f\"Starting the Emulator\", \"green\"))\n        command = f\"\"\"{self.emulator_path} -avd {self.avd_name} \"-no-audio\" \"-skip-adb-auth\" \"-no-boot-anim\" \"-gpu\" \"auto\" \"-no-snapshot-save\" -port {port_number}\"\"\"\n        if no_window:\n            command += \" -no-window\"\n        print(f\"executing command {command}\")\n        self.emulator_process = subprocess.Popen(command, shell=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)\n        sleep(30)\n        self.record = record\n        if self.record:\n            self.record_random_id = random.randint(0, 100000)\n            try_record_command = f\"\"\"adb -s {self.udid} shell screenrecord --size 540x1140 --bit-rate 4M --time-limit=180 /sdcard/video_{self.image_id}_1.mp4\"\"\"\n            # redirect the output and error to the output of the main process\n            import sys\n            print(f\"Trying to record the screen of {self.udid}\")\n            self.try_record_process = subprocess.Popen(try_record_command, shell=True, stdout=sys.stdout, stderr=sys.stderr)\n            sleep(20)\n            self.try_record_process.terminate()\n            try:\n                self.try_record_process.wait(timeout=20)\n            except subprocess.TimeoutExpired:\n                self.try_record_process.kill()\n                self.try_record_process.wait()\n            sleep(5)\n            print(f\"Recording the screen of {self.udid}\")\n            do_record_command = f\"\"\"adb -s {self.udid} shell screenrecord --size 540x1140 --bit-rate 4M --time-limit=180 /sdcard/video_{self.image_id}_1.mp4 &&\nadb -s {self.udid} shell screenrecord --size 540x1140 --bit-rate 4M --time-limit=180 /sdcard/video_{self.image_id}_2.mp4 &&\nadb -s {self.udid} shell screenrecord --size 540x1140 --bit-rate 4M --time-limit=180 /sdcard/video_{self.image_id}_3.mp4 &&\nadb -s {self.udid} shell screenrecord --size 540x1140 --bit-rate 4M --time-limit=180 /sdcard/video_{self.image_id}_4.mp4 &&\nadb -s {self.udid} shell screenrecord --size 540x1140 --bit-rate 4M --time-limit=180 /sdcard/video_{self.image_id}_5.mp4 &&\nadb -s {self.udid} shell screenrecord --size 540x1140 --bit-rate 4M --time-limit=180 /sdcard/video_{self.image_id}_6.mp4\"\"\"\n            self.record_process = subprocess.Popen(do_record_command, shell=True, stdout=subprocess.PIPE, preexec_fn=os.setsid) # should be good the second time\n            sleep(5)\n\n        capabilities = dict(\n            platformName='Android',\n            automationName='uiautomator2',\n            deviceName='Android',\n            newCommandTimeout=\"120000\",\n            adbExecTimeout=\"120000\",\n            uiautomator2ServerInstallTimeout=\"120000\",\n            uiautomator2ServerLaunchTimeout=\"120000\",\n            uiautomator2ServerReadTimeout=\"120000\",\n            noSign=True\n        )\n        if udid:\n            capabilities[\"udid\"] = udid\n        self.options = UiAutomator2Options().load_capabilities(capabilities)\n        self.appium_server_url = appium_server_url\n        for i in range(3):\n            try:\n                self.driver = webdriver.Remote(self.appium_server_url, options=self.options)\n                print(\"connected!\")\n                break\n            except Exception as e:\n                cprint(colored(f\"Failed to connect to the appium server: {e}\\n Retrying\", \"red\"))\n                if i == 3:\n                    raise Exception(\"Failed to connect to the appium server\")\n                sleep(20)\n        self.terminated = False\n        self.max_steps = max_steps\n        self.steps = 0\n        self.feature_extractor = feature_extractor\n        screen_size = self.driver.get_window_size()\n        self.screen_size = (screen_size[\"width\"], screen_size[\"height\"])\n        if sample_mode == \"random\":\n            # randomly sample a task from the task set\n            self.current_task = random.choice(all_tasks)\n        elif sample_mode == \"sequential\":\n            self.current_task = all_tasks[task_id]\n        else:\n            print(\"Invalid sample mode\")\n        self.prepare_prompt = prepare_prompt\n        self.translate_action = translate_action\n        self.history = []\n        self.evaluator = evaluator\n    \n    def terminate(self):\n        \n        if self.record:\n            # send sigterm to the record process\n            os.killpg(os.getpgid(self.record_process.pid), signal.SIGINT)\n            sleep(5)\n            os.system(f\"adb -s {self.udid} pull /sdcard/video_{self.image_id}_1.mp4 {self.temp_path}\")\n            os.system(f\"adb -s {self.udid} pull /sdcard/video_{self.image_id}_2.mp4 {self.temp_path}\")\n            os.system(f\"adb -s {self.udid} pull /sdcard/video_{self.image_id}_3.mp4 {self.temp_path}\")\n            os.system(f\"adb -s {self.udid} pull /sdcard/video_{self.image_id}_4.mp4 {self.temp_path}\")\n            os.system(f\"adb -s {self.udid} pull /sdcard/video_{self.image_id}_5.mp4 {self.temp_path}\")\n            os.system(f\"adb -s {self.udid} pull /sdcard/video_{self.image_id}_6.mp4 {self.temp_path}\")\n            print(\"it's okay if you see errros like failed to stat remote object '/sdcard/video_1718747809.256034_{i}.mp4' where i is larger than 1.\")\n\n        sleep(5)\n        self.emulator_process.terminate()\n        try:\n            self.emulator_process.wait(timeout=20)\n        except subprocess.TimeoutExpired:\n            self.emulator_process.kill()\n            self.emulator_process.wait()\n        self.terminated = True\n    \n    def refresh_driver(self):\n        self.driver.quit()\n        self.driver = webdriver.Remote(self.appium_server_url, options=self.options)\n    \n    def count_white_pixels(self, img):\n        # Convert the image to RGB format if it's not\n        img = img.convert('RGB')\n        # Convert image to numpy array\n        data = np.array(img)\n        # Count white pixels\n        # Assuming 'white' is (255, 255, 255)\n        white_count = np.sum(np.all(data > 240, axis=-1))\n        return white_count > 2_300_000\n    \n    def get_obs(self):\n        for _ in range(3):\n            try:\n                is_white = True\n                for _ in range(5):\n                    if not is_white:\n                        break\n                    sleep(5)\n                    screenshot_str = self.driver.get_screenshot_as_base64()\n                    imgdata = base64.b64decode(screenshot_str)\n                    image =  Image.open(BytesIO(imgdata))\n                    is_white = self.count_white_pixels(image)\n                # print(\"Saving observation!\")\n                image.save(os.path.join(self.temp_path, f\"{self.image_id}_{self.steps}.png\"))\n                # Assuming 'image' is your PIL Image object in RGBA mode\n                if image.mode == 'RGBA':\n                    image = image.convert('RGB')\n                if self.feature_extractor is not None:\n                    image = self.feature_extractor.to_feat(image)\n                # colorful_print(f\"history: {self.history}\", \"green\")\n                # colorful_print(f\"prompt: {self.prepare_prompt(self.current_task, self.history)}\", \"yellow\")\n                return {\"prompt\": self.prepare_prompt(self.current_task, self.history),\n                        \"image_feature\": image,\n                        \"task\": self.current_task,\n                        \"image_path\": os.path.join(self.temp_path, f\"{self.image_id}_{self.steps}.png\"),\n                        \"video_path\": os.path.join(self.temp_path, f\"video_{self.record_random_id}.mp4\") if self.record else None\n                }\n            except Exception as e:\n                print(f\"Exception happened during screenshotting\")\n                print(e)\n                print(traceback.format_exc())\n                sleep(6)\n                continue\n    def step(self, raw_action: str):\n        if self.terminated:\n            return None\n        try:\n            # colorful_print(f\"raw action: {raw_action}\", \"green\")\n            action = self.translate_action(raw_action)\n            # colorful_print(f\"translated action: {action}\", \"green\")\n        except Exception as e:\n            print(e)\n            print(f\"Failed to translate action: {raw_action}, terminating the environment\")\n            action = AndroidAction(action_type=ActionType.TaskImpossible)\n        self.history.append(action)\n        self.steps += 1\n        if self.steps > self.max_steps:\n            action = AndroidAction(action_type=ActionType.TaskImpossible)\n            cprint(colored(f\"Terminate the Emulator: Max Steps Exceeded {self.max_steps}.\", \"red\"))\n        screenshot = None\n        info = {}\n        for i in range(2):\n            try:\n                if action.action_type == ActionType.DualPoint:\n                    assert len(action.touch_point) == 2\n                    assert len(action.lift_point) == 2\n                    touch_x = action.touch_point[0] * self.screen_size[0]\n                    touch_y = action.touch_point[1] * self.screen_size[1]\n                    lift_x = action.lift_point[0] * self.screen_size[0]\n                    lift_y = action.lift_point[1] * self.screen_size[1]\n                    if (touch_x - lift_x)**2 + (touch_y - lift_y)**2 < 10:\n                        self.driver.tap([(touch_x, touch_y)])\n                    else:\n                        self.driver.swipe(touch_x, touch_y, lift_x, lift_y)\n                elif action.action_type == ActionType.Type:\n                    # This doesn't work well because of active element\n                    for i in range(2):\n                        try:\n                            sleep(4)\n                            element = self.driver.switch_to.active_element\n                            element.send_keys(action.typed_text)\n                            break\n                        except Exception as e:\n                            cprint(f\"The element is not loaded yet or agent did not click anything\", \"red\")\n                    \n                elif action.action_type == ActionType.GoBack:\n                    self.driver.back()\n                elif action.action_type == ActionType.GoHome:\n                    self.driver.press_keycode(3)\n                elif action.action_type == ActionType.Enter:\n                    self.driver.press_keycode(66)\n                elif action.action_type == ActionType.TaskComplete:\n                    self.terminated = True\n                elif action.action_type == ActionType.TaskImpossible:\n                    self.terminated = True\n                elif action.action_type == ActionType.Idle:\n                    pass\n                else:\n                    raise Exception(f\"Unknown action type: {action.action_type}\")\n                action_success = True\n                screenshot = self.get_obs()\n                break\n            except Exception as e:\n                cprint(colored(\"an Exception occurred during environment interaction\", \"red\"))\n                print(e)\n                cprint(colored(\"Retrying\", \"red\"))\n                sleep(10)\n                if i == 1:\n                    action_success = False\n                    info[\"error\"] = str(e)\n                    self.driver.quit()\n                    self.terminate()\n                    return None\n                continue\n        r = 0\n        if screenshot is not None and self.evaluator is not None:\n            r = self.evaluator([os.path.join(self.temp_path, f\"{self.image_id}_{self.steps-1}.png\"), \n                                os.path.join(self.temp_path, f\"{self.image_id}_{self.steps}.png\")], self.current_task)\n        info[\"action_success\"] = action_success\n        #terminate the environment if there is a success\n        if r >= 1 or self.terminated:\n            self.driver.quit()\n            self.terminate()\n        if self.terminated and not self.save_images:\n            os.system(f\"rm -rf {self.temp_path}/*\")\n        return screenshot, r, self.terminated\n\n\nclass BatchedAndroidEnv():\n    \"\"\"\n    This class wraps around the android emulator and provides a more infrastructure for free-form GUI navigation\n    This is a batched version for Android Env\n    cache_avd is the avd to be used the avd is the initial one\n    \"\"\"\n    def __init__(self, \n        avd_name, \n        cache_avd_names,\n        udids,\n        appium_base_port,\n        android_avd_home: str = '/nfs/kun2/users/yifei/openended/.android/android_avd/avd',\n        emulator_path: str = '~/Android/Sdk/emulator/emulator',\n        adb_path: str = \"~/Library/Android/sdk/platform-tools/adb\",\n        run_headless: bool = False,\n        max_steps: int = 10,\n        use_feature_extractor = False, \n        evaluators = None,\n        prepare_prompt = autoui_prepare_prompt, \n        translate_action = None,\n        device = \"cuda:2\",\n        temp_path = \"/nfs/kun2/users/yifei/openended/logs/images\",\n        save_images = False,\n        all_tasks = None,\n        task_split = \"train\",\n        sample_mode = None,\n        record = False):\n        \n        self.android_avd_home = os.path.expanduser(android_avd_home)\n        self.emulator_path = os.path.expanduser(emulator_path)\n        self.adb_path = os.path.expanduser(adb_path)\n        self.avd_name = avd_name\n        self.save_images = save_images\n        self.bsize = len(cache_avd_names)\n        self.cache_avd_names = cache_avd_names\n        self.run_headless = run_headless\n        self.max_steps = max_steps\n        self.emulator_group_offset = 0\n        if use_feature_extractor:\n            self.feature_extractor = ImageFeatureExtractor(\"cpu\")\n        else:\n            self.feature_extractor = None\n        self.device = device\n        self.record = record\n        self.all_tasks = all_tasks\n        self.task_split = task_split\n        self.prepare_prompt = prepare_prompt\n        self.translate_action = translate_action\n        self.temp_path = temp_path\n        if evaluators is None:\n            evaluators = [None for _ in range(self.bsize)]\n        self.evaluators = evaluators\n        if not os.path.exists(temp_path):\n            os.makedirs(temp_path)\n        self.udids = udids\n        self.base_port = appium_base_port\n        self.appium_processes = []\n        self.sample_mode = sample_mode\n\n        # Start the appium servers\n        for i in range(self.base_port, self.base_port+self.bsize):\n            self.appium_processes.append(subprocess.Popen(f\"appium --relaxed-security -p {i} > /dev/null\", stdout=subprocess.DEVNULL, shell=True))\n            print(\"starting appium server at port \", i)\n        self.appium_server_urls = [f\"http://localhost:{i}\" for i in range(self.base_port, self.base_port+self.bsize)]\n    \n    def reset_appium(self):\n        for p in self.appium_processes:\n            p.terminate()\n            try:\n                p.wait(timeout=20)\n            except subprocess.TimeoutExpired:\n                p.kill()\n                p.wait()\n        os.system(\"pkill -f appium\")\n        self.base_port = self.base_port + self.bsize * 2\n        self.appium_processes = []\n        for i in range(self.base_port, self.base_port+self.bsize):\n            self.appium_processes.append(subprocess.Popen(f\"appium --relaxed-security -p {i} > /dev/null\", stdout=subprocess.DEVNULL, shell=True))\n        # sleep(10)\n        self.appium_server_urls = [f\"http://localhost:{i}\" for i in range(self.base_port, self.base_port+self.bsize)]\n\n    def reset(self):\n        \"\"\"\n        Reset the emulator to a clean state\n        \"\"\"\n        # If the emulator is already running, kill it,\n        # Then delete the cache AVD\n        kill_all_emulators(self.adb_path, emulators=self.udids)\n        if hasattr(self, \"emulator_process\"):\n            self.emulator_process.send_signal(signal.SIGINT)\n            self.emulator_process.wait()\n        self.emulators = []\n        for cache_avd_name in self.cache_avd_names:\n            # print(cache_avd_name)\n            for _ in range(3):\n                try:\n                    cache_avd_path = os.path.join(self.android_avd_home, cache_avd_name + \".avd\")\n                    cache_avd_ini_path = os.path.join(self.android_avd_home, cache_avd_name + \".ini\")\n                    if os.path.exists(cache_avd_path):\n                        shutil.rmtree(cache_avd_path, ignore_errors=True)\n                    if os.path.exists(cache_avd_ini_path):\n                        os.remove(cache_avd_ini_path)\n                    sleep(2)\n                    # Clone the source AVD and start the emulator\n                    clone_avd(self.avd_name, cache_avd_name, self.android_avd_home)\n                    break\n                except OSError as e:\n                    print(f\"Failed to reset the emulator: {e}\")\n                    import traceback\n                    print(traceback.format_exc())\n                    sleep(20)\n\n        # # use parallel version only when you've got nice CPUs, or it will error out\n        # def reset_emulator(cache_avd_name, avd_name, android_avd_home):\n        #     for _ in range(3):\n        #         try:\n        #             cache_avd_path = os.path.join(android_avd_home, cache_avd_name + \".avd\")\n        #             cache_avd_ini_path = os.path.join(android_avd_home, cache_avd_name + \".ini\")\n        #             if os.path.exists(cache_avd_path):\n        #                 shutil.rmtree(cache_avd_path, ignore_errors=True)\n        #             if os.path.exists(cache_avd_ini_path):\n        #                 os.remove(cache_avd_ini_path)\n        #             sleep(2)\n        #             # Clone the source AVD and start the emulator\n        #             clone_avd(avd_name, cache_avd_name, android_avd_home)\n        #             break\n        #         except OSError as e:\n        #             print(f\"Failed to reset the emulator: {e}\")\n        #             import traceback\n        #             print(traceback.format_exc())\n        #             sleep(20)\n\n        # with concurrent.futures.ThreadPoolExecutor() as executor:\n        #     futures = [executor.submit(reset_emulator, cache_avd_name, self.avd_name, self.android_avd_home) for cache_avd_name in self.cache_avd_names]\n        #     for future in futures:\n        #         future.result()\n\n        def emulator_constructor(udid, appium_server_url, cache_avd_name, evaluator, task_id, task_split):\n            return AndroidEmulator(avd_name=cache_avd_name, max_steps=self.max_steps, emulator_path=self.emulator_path, \n                appium_server_url=appium_server_url, \n                no_window=self.run_headless, \n                udid = udid,\n                feature_extractor = self.feature_extractor,\n                prepare_prompt = self.prepare_prompt,\n                translate_action = self.translate_action,\n                all_tasks = self.all_tasks,\n                evaluator = evaluator,\n                temp_path = os.path.join(self.temp_path, cache_avd_name),\n                save_images = self.save_images,\n                task_id=task_id,\n                task_split=task_split,\n                sample_mode=self.sample_mode,\n                record=self.record)\n\n        with concurrent.futures.ThreadPoolExecutor() as executor:\n            jobs = [executor.submit(emulator_constructor, udid, appium_server_url, cache_avd_name, evaluator, task_id, self.task_split)\n                for udid, appium_server_url, cache_avd_name, evaluator, task_id in \n                zip(self.udids, self.appium_server_urls, self.cache_avd_names, self.evaluators, range(self.emulator_group_offset, self.emulator_group_offset+self.bsize))]\n            self.emulators = [job.result() for job in jobs]\n\n        with concurrent.futures.ThreadPoolExecutor() as executor:\n            jobs = [executor.submit(emulator.get_obs) for emulator in self.emulators]\n            # for i, job in enumerate(jobs):\n                # colorful_print(f\"Getting observation from emulator {i}: {job.result()}\", \"green\")\n            return [job.result() for job in jobs]\n\n    def step(self, actions):\n        if not self.emulators:\n            raise Exception(\"Please call reset() before calling step()\")\n        with concurrent.futures.ThreadPoolExecutor() as executor:\n            jobs = [executor.submit(emulator.step, action) \n                    for emulator, action in \n                    zip(self.emulators, actions)]\n            results = [job.result() for job in jobs]\n        return results\n"
  },
  {
    "path": "digirl/environment/android/evaluate.py",
    "content": "import json\nimport requests\nfrom PIL import Image\nfrom typing import List, Tuple\nfrom gradio_client import Client\nfrom transformers import AutoTokenizer\nimport numpy as np\nfrom gradio_client.utils import QueueError, file\nfrom time import sleep\nimport re\nimport os\nimport io   \nfrom tenacity import retry, wait_random_exponential, stop_after_attempt, wait_fixed, wait_chain\nimport base64\nimport traceback\nimport google.generativeai as genai\nimport subprocess\nimport time\nimport signal\n\ndef extract_status(text):\n    match = re.search(r'Status:\\s*(\\w+)', text)\n    if match:\n        return match.group(1)\n    else:\n        return None\n\ndef build_prompt_webshop(intent) -> Tuple[str, str]:\n    system_msg = \"\"\"You're an expert in evaluating whether the Screenshot successfully completes the Task.\"\"\"\n    prompt = [f\"\"\"Task: Go to bestbuy.com\nQ: What should I expect to see on the screenshot if I've gone to bestbuy.com?\nA: I should expect to see I'm in the Best Buy website, which usually shows the best buy logo with some featured products and categories. The screenshot shows I'm searching for \"bestbuy.com\" in the Google search (with some search suggestions) instead of being in the Best Buy website.\nStatus: failure\"\"\", #0\nf\"\"\"Task: Go to costco.com\nQ: What should I expect to see on the screenshot if I've gone to costco.com?\nA: I should expect to see I'm in the Costco website, which usually shows the homepage with some featured products and categories. The screenshot shows I'm in the Costco website with some featured products and categories.\nStatus: success\"\"\", #1\nf\"\"\"Task: Go to bestbuy.com, search for \"macbook\"\nQ: What should I expect to see on the screenshot if I've gone to bestbuy.com AND searched for \"macbook\"?\nA: I should expect to see I'm in the Best Buy website and search results for \"macbook\". The screenshot shows I'm in the Best Buy website and have several search suggestions for \"macbook\", but it does not show search results of the product, which usually includes price and the product details.\nStatus: failure\"\"\", #2\nf\"\"\"Task: Go to ebay.com, search for \"corsair k70\"\nQ: What should I expect to see on the screenshot if I've gone to ebay.com AND searched for \"corsair k70\"?\nA: I should expect to see I'm in the eBay website and search results for \"corsair k70\". The screenshot shows I'm in the eBay ebay website with some search suggestions for \"corsair k70\", but it does not show search results of the product, which usually includes price and the product details.\nStatus: failure\"\"\", #3\nf\"\"\"Task: Go to walmart.com, search for \"macbook air\"\nQ: What should I expect to see on the screenshot if I've gone to walmart.com AND searched for \"macbook air\"?\nA: I should expect to see I'm in the Walmart website and search results for \"razer huntsman\". The screenshot shows I'm in Google search with some search suggestions for \"macbook air\", not Walmart.\nStatus: failure\"\"\", #4\nf\"\"\"Task: Go to walmart.com, search for \"razer huntsman\"\nQ: What should I expect to see on the screenshot if I've gone to walmart.com AND searched for \"razer huntsman\"?\nA: I should expect to see I'm in the Walmart website and search results for \"razer huntsman\". The screenshot shows I'm in the Walmart website, but there's no search results for \"razer huntsman\", which usually includes the product details and price.\nStatus: failure\"\"\", #5\nf\"\"\"Task: Go to ebay.com, search for \"lenovo thinkpad\"\nQ: What should I expect to see on the screenshot if I've gone to ebay.com AND searched for \"lenovo thinkpad\"?\nA: I should expect to see I'm in the eBay website and search results for \"lenovo thinkpad\". The screenshot shows I'm in the eBay website and have several search results for \"lenovo thinkpad\".\nStatus: success\"\"\", #6\nf\"\"\"Task: Go to ebay.com, search for \"razer thresher\", select the first entry\nQ: What should I expect to see on the screenshot if I've gone to ebay.com AND going to the first entry of the search results of \"razer thresher\"?\nA: I should expect to see I'm in the eBay website and detailed information of a razer thresher product, like a big image of the product, the price, and the product details. The screenshot shows I'm in the eBay website but with more than one search results for \"razer thresher\", which means the user has not selected the first entry of the search results.\nStatus: failure\"\"\", #7\nf\"\"\"Task: Go to target.com, search for \"razer kraken\", and select the first entry\nQ: What should I expect to see on the screenshot if I've gone to target.com AND gone to the first entry of the search results of \"razer kraken\"?\nA: I should expect to see I'm in the Target website and can see detailed information of a razer thresher product, like a big image of the product, the price, and the product details. The screenshot shows I'm in Google Search, not in the Target website.\nStatus: failure\"\"\", #8\nf\"\"\"Task: Go to ebay.com, search for \"acer predator\", and select the first entry\nQ: What should I expect to see on the screenshot if I've gone to ebay.com AND gone to the first entry of the search results of \"acer predator\"?\nA: I should expect to see I'm in the eBay website with detailed information of an acer predator product, like a big image of the product, the price, and the product details. The screenshot shows I'm in the eBay website and have more than one search results for \"acer predator\", which means the user has not selected the first entry of the search results.\nStatus: failure\"\"\", #9\nf\"\"\"Task: Go to bestbuy.com, search for \"macbook\", select the first entry\nQ: What should I expect to see on the screenshot if I've gone to bestbuy.com AND gone to the first entry of the search results of \"macbook\"?\nA: I should expect to see I'm in the eBay website and detailed information of a macbook product, like a big image of the product, the price, and the product details. The screenshot shows I'm in the eBay website and have detailed information of Macbook Air, including the price and the product details.\nStatus: success\"\"\", #10\nf\"\"\"Task: {intent}\nRespond in this format:\nQ: What should I expect to see on the screenshot if I've <repeat the task>?\nA: I should expect to see <first expectation, then what's in the given screenshot.>\nStatus: success or failure (don't return anything else)\nStart with \"Q:\".\"\"\"]\n\n    image_paths = os.path.join(os.path.dirname(__file__), \"assets\", \"images\")\n    cot_image_list = [os.path.join(image_paths, \"step1_bestbuy.png\"), # 0\n                    os.path.join(image_paths, \"step1_costco.png\"), # 1\n                    os.path.join(image_paths, \"step2_bestbuy.png\"), # 2\n                    os.path.join(image_paths, \"step2_ebay.png\"), # 3\n                    os.path.join(image_paths, \"step2_walmart.png\"), # 4\n                    os.path.join(image_paths, \"step2_walmart2.png\"), # 5\n                    os.path.join(image_paths, \"step2_ebay2.png\"), # 6\n                    os.path.join(image_paths, \"step3_ebay.png\"), # 7\n                    os.path.join(image_paths, \"step3_target.png\"), # 8\n                    os.path.join(image_paths, \"step3_ebay2.png\"), # 9\n                    os.path.join(image_paths, \"step3_bestbuy.png\"), # 10\n                    \"\" # -1\n                    ]    \n    \n    return system_msg, prompt, cot_image_list\n\n\ndef build_prompt_general(intent) -> Tuple[str, str]:\n    system_msg = \"\"\"You're an expert in evaluating whether the Screenshot successfully completes the Task.\"\"\"\n    prompt = [f\"\"\"Task: Open the settings.\nQ: What should I expect to see on the screenshot if I've opened the settings?\nA: I should expect to see I'm in the settings app. The screenshot shows the home screen of a mobile device, with various app icons displayed, including the settings app icon, but the settings app is not opened.\nStatus: failure\"\"\", #0\nf\"\"\"Task: Find hotels in washington dc\nQ: What should I expect to see on the screenshot if I've searched for hotels in Washington, DC?\nA: I should expect to see I'm in a search results page for hotels in Washington, DC. The screenshot shows a Google search page with the search field populated with the query \"hotels in washington dc\" and a list of suggested searches related to hotels in Washington, DC, but it does not show any search results for hotels in Washington, DC.\nStatus: failure\"\"\", #1\nf\"\"\"Task: What's a good restaurant in Portland?\nQ: What should I expect to see on the screenshot if I've searched for a good restaurant in Portland?\nA: I should expect to see I'm in a search results page for a good restaurant in Portland. The screenshot shows a Google search page with a search input field for \"good restaurant in portland\" and a map results preview showing business locations near Portland, like \"Li Pigeon\", \"Portland City Grill\", and \"Higgins\",\nStatus: success\"\"\", #2\nf\"\"\"Task: What's on the menu at In-N-Out?\nQ: What should I expect to see on the screenshot if I've searched for the menu at In-N-Out?\nA: I should expect to see a menu page for In-N-Out, including product names, thumbnails and prices. The screenshot shows a Google search page with a search input field for \"In-N-Out menu\" and some page snippets of In-N-Out indicating potential menu items, but does not actually show the actual menu.\nStatus: failure\"\"\", #3\nf\"\"\"Task: What's the news in Suriname?\nQ: What should I expect to see on the screenshot if I've searched for the news in Suriname?\nA: I should expect to see some news in Suriname, such as someone did something or some accident happens in Suriname. The screenshot shows a Google search page with a search input field for \"Suriname news today\" and some page snippets indicating potential news items, but does not actually show the news.\nStatus: failure\"\"\", #4\nf\"\"\"Task: What's the weather like in Chicago?\nQ: What should I expect to see on the screenshot if I've searched for the weather in Chicago?\nA: I should expect to see some exact values like temperature, humidity, wind speed, and weather condition in Chicago. The screenshot shows a Google search page with a search input field for \"weather in Chicago\" and some page snippets indicating potential weather information. Although one page snippet contains some weather information, the information is not comprehensive enough to determine the weather in Chicago.\nStatus: failure\"\"\", #5\nf\"\"\"Task: Set an alarm for 6pm.\nQ: What should I expect to see on the screenshot if I've set an alarm for 6pm?\nA: I should expect to see some alarms including a 6pm alarm activated in the clock app. The screenshot shows an attempt to set an alarm for 6pm in the clock app, but the alarm is not set yet.\nStatus: failure\"\"\", #6\nf\"\"\"Task: What's the news in French today?\nQ: What should I expect to see on the screenshot if I've searched for the news in French today?\nA: I should expect to see some news in French today, such as someone did something or some accident happens in French today. The screenshot shows I'm in the website france24.com but blocked with a cookie consent banner.\nStatus: failure\"\"\", #7\nf\"\"\"Task: What's the news in French today?\nQ: What should I expect to see on the screenshot if I've searched for the news in French today?\nA: I should expect to see some news in French today, such as someone did something or some accident happens in French today. The screenshot shows I'm in the website france24.com and can see the news, like something about the Olympic flame.\nStatus: success\"\"\", #8\nf\"\"\"Task: {intent}\nRespond in this format:\nQ: What should I expect to see on the screenshot if I've <repeat the task>?\nA: I should expect to see <first expectation, then what's in the given screenshot.>\nStatus: success or failure (don't return anything else)\nStart with \"Q:\".\"\"\"]\n    \n    image_paths = os.path.join(os.path.dirname(__file__), \"assets\", \"images\")\n    cot_image_list = [os.path.join(image_paths, \"screenshot_menu.png\"), # 0\n                os.path.join(image_paths, \"screenshot_hotel.png\"), # 1\n                os.path.join(image_paths, \"screenshot_restaurant.png\"), # 2\n                os.path.join(image_paths, \"screenshot_foodmenu.png\"), # 3\n                os.path.join(image_paths, \"screenshot_news.png\"), # 4\n                os.path.join(image_paths, \"screenshot_weather.png\"), # 5\n                os.path.join(image_paths, \"screenshot_alarm.png\"), # 6\n                os.path.join(image_paths, \"screenshot_frenchnews_blocked.png\"), # 7\n                os.path.join(image_paths, \"screenshot_frenchnews_okay.png\"), # 8\n                \"\" # -1\n                ]\n    \n    return system_msg, prompt, cot_image_list\n\n@retry(wait=wait_chain(*[wait_fixed(1) for i in range(3)] + [wait_fixed(3) for i in range(2)] + [wait_fixed(5)]),\n         stop=stop_after_attempt(5))\ndef call_gemini(client, system_msg, prompt, image_list, image_path):\n    if type(prompt) == list:\n        input_msg = [system_msg + \"\\n\" + \"=====Examples=====\"]\n        for i in range(len(image_list)-1):\n            input_msg += [\n                \"\\nScreenshot:\",\n                process_image(image_list[i]),\n                prompt[i]\n            ]\n        input_msg += [\n            \"=====Your Turn=====\",\n            \"\\nScreenshot: \",\n            process_image(image_path),\n            prompt[-1]\n        ]\n        response = client.generate_content(\n           input_msg\n        )\n    else:\n        response = client.generate_content(\n            [\n                system_msg + \"\\n\" + prompt,\n                process_image(image_path)\n            ]\n        )\n    response.resolve()\n    response_text = response.text\n    return response_text\n\ndef process_image(image_path):\n    image = Image.open(image_path, 'r')\n    image = image.resize((image.width // 4, image.height // 4))\n    # Save to a BytesIO object (in-memory file) as PNG\n    buffer = io.BytesIO()\n    image.save(buffer, format=\"PNG\")\n\n    # Load it back from the BytesIO object\n    buffer.seek(0)\n    image_reloaded = Image.open(buffer)\n    return image_reloaded\n\nclass EndResultEvaluator:\n    def __init__(self, gemini_key=None, task_set=None):\n        genai.configure(api_key=gemini_key)\n        self.client = genai.GenerativeModel(\"models/gemini-1.5-pro-latest\")\n        self.img_matrix = None\n        self.cache_max = 5\n        self.threshold = 0.001 * 255**2\n        self.task_set = task_set\n\n    def __call__(self, last_two_images, intent: str) -> bool:\n        \"\"\"\n        last_two_images: a list of two image path. [last_image_path, this_image_path]\n        intent: a string representing the user's intent\n\n        Returns:\n        - True if the task is completed\n        - False otherwise\n\n        If there's an error, it will return False and print the error message\n        \"\"\"\n        with Image.open(last_two_images[0]) as img1_src, Image.open(last_two_images[1]) as img2_src:   \n            img1 = np.array(img1_src)\n            img2 = np.array(img2_src)\n        if np.mean((img1.astype(np.float64) - img2.astype(np.float64))**2) < self.threshold:\n            print(\"skipping evaluation due to same images\")\n            return 0\n        # this is an approximation, but it should be fine to add frequently viewed false negatives\n        if self.img_matrix is None:\n            self.img_matrix = np.expand_dims(img2, axis = 0)\n        # will always trigger after the first time\n        else:\n            distances = np.mean((self.img_matrix.astype(np.float64) - img2.astype(np.float64))**2, axis = (1,2,3))\n            if np.min(distances) < self.threshold:\n                print(\"skipping evaluation due to previously seen image, current img_matrix size: \", self.img_matrix.shape[0])\n                return 0\n            elif self.img_matrix.shape[0] < self.cache_max:\n                self.img_matrix = np.concatenate([self.img_matrix, np.expand_dims(img2, axis = 0)], axis = 0)\n        \n        print(f\"Task: {intent}, image: {last_two_images[1]}\")\n        eval_res = self._evaluate(intent, last_two_images[1])\n            \n        del img1, img2\n        return eval_res\n\n    def _evaluate(self, intent: str, image_path: str) -> bool:\n        if self.task_set == \"general\":\n            system_msg, prompt, cot_image_list = build_prompt_general(intent)\n        elif self.task_set == \"webshop\":\n            system_msg, prompt, cot_image_list = build_prompt_webshop(intent)\n        \n        response_text = call_gemini(self.client, system_msg, prompt, cot_image_list, image_path)\n\n        if extract_status(response_text) is not None and 'success' in extract_status(response_text).lower():\n            print(\"Success!\")\n            print(\"image path:\" + image_path)\n            print(\"prompt\")\n            print(prompt)\n            print(\"response\")\n            print(response_text)\n            return 1\n        return 0\n    \n"
  },
  {
    "path": "digirl/environment/env_utils.py",
    "content": "import torch\nfrom tqdm import tqdm\nimport numpy as np\nimport accelerate\nfrom digirl.models import timeout\n\ndef add_trajectory_reward(trajectory):\n    \"\"\"\n    add trajectory reward to the dict of each interaction\n    \"\"\"\n    trajectory_reward = np.sum([d[\"reward\"] for d in trajectory])\n    for d in trajectory:\n        d.update({\"trajectory_reward\": trajectory_reward})\n    return trajectory\n\ndef add_mc_return(trajectory, gamma = 0.95):\n    \"\"\"\n    add trajectory reward to the dict of each interaction\n    \"\"\"\n    trajectory_rewards = np.array([d[\"reward\"] for d in trajectory]).reshape(1, -1)\n    gamma_row = np.cumprod(np.ones((1, trajectory_rewards.shape[1]))*gamma)\n    gamma_matrix = np.triu(gamma_row.reshape(1, -1 )/ gamma_row.reshape(-1, 1))\n    mc_returns = np.sum(trajectory_rewards*gamma_matrix, axis = 1)\n    for d, mc in zip(trajectory, mc_returns):\n        d.update({\"mc_return\": mc})\n    return trajectory\n\ndef batch_interact_environment(agent, env, num_trajectories,\\\n        accelerator, post_f = lambda x: x, use_tqdm = True, decode_f = lambda x: x, gamma = 0.95, iter=0):\n    \"\"\"\n    in a bacthed way, interact with the environments  to get a list of trajectories\n    [[{\"observation\":, \"next_observation\":, \"reward\":, \"done\":},...],...]\n    post_f: function to add additional attributes to the trajectory\n    \"\"\"\n    # broadcast the batch size\n    bsize = torch.Tensor([0,]).to(accelerator.device)\n    if accelerator.is_main_process:\n        bsize[0] = env.bsize\n    accelerate.utils.broadcast(bsize)\n    bsize = int(bsize.item())\n    all_trajectories = []\n    if accelerator.is_main_process:\n        if hasattr(agent, \"critic\"):\n            env.feature_extractor.model = env.feature_extractor.model.to(env.device)\n            agent.critic.to(\"cpu\")\n    for num_t in tqdm(range(num_trajectories//bsize), disable = not use_tqdm):\n        if accelerator.is_main_process:\n            env.emulator_group_offset = iter * num_trajectories + num_t * bsize\n        for _ in range(3):\n            try:\n                done = False\n                trajectories = [[] for _ in range(bsize)]\n                #handle the case where the reset fails and timeouts\n                reset_success = torch.Tensor([False,]).to(accelerator.device)\n                while not all(reset_success):\n                    for _ in range(5):\n                        try:\n                            if accelerator.is_main_process:\n                                with timeout(seconds=240): # change this if frequently timeout\n                                    batch_obs = env.reset()\n                                #the observation space is now a tuple of (text, image)\n                                if type(batch_obs[0]['image_feature']) == torch.Tensor:\n                                    batch_img = [obs[\"image_feature\"] for obs in batch_obs]\n                                else:\n                                    batch_img = [\"Image feature is not a tensor\" for _ in range(bsize)]\n                                if env.feature_extractor is not None:\n                                    # colorful_print(\"autoui has critic, so batch_obs being refractored\", \"red\")\n                                    batch_obs = [obs[\"prompt\"] for obs in batch_obs]\n                                reset_success[0] = True\n                            accelerate.utils.broadcast(reset_success)\n                            break\n                        except Exception as e:\n                            print(f\"Error in environment reset\")\n                            print(e)\n                            if hasattr(env, \"reset_appium\"):\n                                print(\"Resetting appium\")\n                                env.reset_appium()\n                            accelerate.utils.broadcast(reset_success)\n                            continue\n                batch_done = torch.Tensor([False,]*bsize).to(accelerator.device)\n                accelerate.utils.broadcast(batch_done)\n                steps = 0\n                while not all(batch_done):\n                    steps += 1\n                    if accelerator.is_main_process:\n                        # print(f\"Environment stpes {str(steps)}\")\n                        # print(\"getting actions!\")\n                        if env.feature_extractor is not None:\n                            action = agent.get_action(batch_obs, torch.cat([i.unsqueeze(0) for i in batch_img], dim = 0))\n                        else:\n                            action = agent.get_action(batch_obs, None)\n                        # import IPython; IPython.embed(); exit(1)\n                        with timeout(seconds=5*60):\n                            batch_return = env.step(decode_f(action))\n                        # batch_return = env.step(decode_f(action))\n                        # import IPython; IPython.embed()\n                        for i,result in zip(range(bsize), batch_return):\n                            if result is None:\n                                batch_done[i] = True\n                                continue\n                            obs_dict, r, done = result\n                            next_img = obs_dict[\"image_feature\"]\n                            next_obs = obs_dict[\"prompt\"]\n                            if not hasattr(agent, \"critic\"):\n                                trajectories[i].append({\"observation\": batch_obs[i], \\\n                                    \"next_observation\": next_obs, \\\n                                    \"image_features\": None, \\\n                                    \"image_path\": obs_dict[\"image_path\"], \\\n                                    \"next_image_features\": None, \\\n                                    \"task\": obs_dict[\"task\"],\\\n                                    \"reward\": r, \\\n                                    \"done\": done, \\\n                                    \"action\": action[i]})\n                                batch_obs[i] = obs_dict\n                            else:\n                                trajectories[i].append({\"observation\": batch_obs[i], \\\n                                    \"next_observation\": next_obs, \\\n                                    \"image_features\": batch_img[i].cpu().numpy(), \\\n                                    \"image_path\": obs_dict[\"image_path\"], \\\n                                    \"video_path\": obs_dict[\"video_path\"], \\\n                                    \"next_image_features\": next_img.cpu().numpy(), \\\n                                    \"task\": obs_dict[\"task\"],\\\n                                    \"reward\": r, \\\n                                    \"done\": done, \\\n                                    \"action\": action[i]})\n                                batch_obs[i] = next_obs\n                            \n                            batch_img[i] = next_img\n                            batch_done[i] = done\n                    accelerate.utils.broadcast(batch_done)\n                    # print(\"waiting for everyone\")\n                    # accelerator.wait_for_everyone()\n                    # obs = next_obs\n                if accelerator.is_main_process:\n                    print(trajectories[0][-1][\"next_observation\"])\n                    all_trajectories += [post_f(add_mc_return(add_trajectory_reward(trajectory), gamma=gamma))\\\n                                        for trajectory in trajectories]\n                break\n            except Exception as e:\n                print(f\"Error in environment interaction\")\n                import traceback\n                print(traceback.format_exc())\n                print(e)\n                if hasattr(env, \"reset_appium\"):\n                    print(\"Resetting appium\")\n                    env.reset_appium()\n                continue\n    if accelerator.is_main_process:\n        if env.feature_extractor is not None:\n            env.feature_extractor.model = env.feature_extractor.model.to(\"cpu\")\n            if hasattr(agent, \"critic\"):\n                agent.critic.to(agent.device)\n        \n    return all_trajectories\n"
  },
  {
    "path": "digirl/misc.py",
    "content": "\"\"\"\nMiscellaneous Utility Functions\n\"\"\"\nimport click\nimport warnings\nfrom torch.utils.data import Dataset\ndef colorful_print(string: str, *args, **kwargs) -> None:\n    print(click.style(string, *args, **kwargs))\n\ndef colorful_warning(string: str, *args, **kwargs) -> None:\n    warnings.warn(click.style(string, *args, **kwargs))\n"
  },
  {
    "path": "digirl/models/__init__.py",
    "content": "from .autoui_agent import AutoUIAgent, timeout\nfrom .cog_agent import CogAgent"
  },
  {
    "path": "digirl/models/autoui_agent.py",
    "content": "import torch\nfrom transformers import AutoTokenizer\nfrom digirl.models.critic import VLMDoubleCritic, TrajectoryCritic\nfrom .model import T5ForMultimodalGeneration\nimport signal\n\nclass timeout:\n    def __init__(self, seconds=1, error_message='Timeout'):\n        self.seconds = seconds\n        self.error_message = error_message\n    def handle_timeout(self, signum, frame):\n        raise TimeoutError(self.error_message)\n    def __enter__(self):\n        signal.signal(signal.SIGALRM, self.handle_timeout)\n        signal.alarm(self.seconds)\n    def __exit__(self, type, value, traceback):\n        signal.alarm(0)\n\nclass AutoUIAgent(torch.nn.Module):\n    def __init__(self, device, accelerator, policy_lm = \"gpt2\", critic_lm = \"roberta-base\", \n                cache_dir = '~/.cache', dropout = 0.5, TEMPLATE = None, use_lora=False,\n                do_sample = True, temperature = 1.0, max_new_tokens = 32, use_bfloat16 = False, eos_str = None):\n        super(AutoUIAgent, self).__init__()\n        if use_bfloat16:\n            self.model = T5ForMultimodalGeneration.from_pretrained(policy_lm, cache_dir=cache_dir,\n                                                              torch_dtype = torch.bfloat16).to(device)\n        else:\n            self.model = T5ForMultimodalGeneration.from_pretrained(policy_lm, cache_dir=cache_dir).to(device)\n        if use_lora:\n            from peft import LoraConfig, TaskType, get_peft_model\n            lora_config = LoraConfig(\n                r=16,\n                target_modules=['q_proj', 'k_proj', 'v_proj', 'o_proj'],\n                task_type=TaskType.CAUSAL_LM,\n                lora_alpha=32,\n                lora_dropout=0.05\n            )\n            self.model = get_peft_model(self.model, lora_config)\n            print(\"Using LoRA\")\n            self.model.print_trainable_parameters()\n        self.template = TEMPLATE\n        self.policy_lm = policy_lm\n        self.critic = VLMDoubleCritic(device, accelerator, critic_lm = critic_lm, cache_dir = cache_dir, in_dim = 768, out_dim = 1)  \n        self.trajectory_critic = TrajectoryCritic(device, accelerator, critic_lm = critic_lm, cache_dir = cache_dir, in_dim = 768, out_dim = 1)\n        self.target_critic = None\n        self.tokenizer = AutoTokenizer.from_pretrained(policy_lm, trust_remote_code=True, cache_dir=cache_dir)\n        self.tokenizer.truncation_side = 'left'\n        self.tokenizer.pad_token = self.tokenizer.eos_token\n        self.tokenizer.pad_token_id = self.tokenizer.eos_token_id\n        self.device = device\n        self.dropout = torch.nn.Dropout(p=dropout)\n        self.softmax = torch.nn.Softmax(dim= -1)\n        self.do_sample = do_sample\n        self.temperature = temperature\n        self.accelerator = accelerator\n        self.max_new_tokens = max_new_tokens\n        self.eos_str = eos_str\n    \n    def prepare(self):\n        self.model = self.accelerator.prepare(self.model)\n        self.critic = self.accelerator.prepare(self.critic)\n        self.trajectory_critic = self.accelerator.prepare(self.trajectory_critic)\n\n    def get_action(self, observation, image_features):\n        image_features = image_features[..., -1408:]\n        # if self.template is not None:\n        #     observation = [self.template.replace(\"{obs}\", obs) for obs in observation]\n        for _ in range(3):\n            try:\n                with timeout(seconds=60):\n                    with torch.no_grad():\n                        obs_ids = self.tokenizer(observation, return_tensors='pt', padding=True, max_length=512, truncation = True).to(self.device)\n                        image_features = image_features.to(self.device)\n                        outputs = self.accelerator.unwrap_model(self.model).generate(**obs_ids, image_ids = image_features,\n                                                    max_new_tokens=self.max_new_tokens, do_sample=self.do_sample, temperature = self.temperature,\n                                                    pad_token_id = self.tokenizer.eos_token_id).cpu()\n                    break\n            except TimeoutError:\n                print(\"Timeout while accessing actions\")\n                continue\n        raw_action = self.tokenizer.batch_decode(outputs, skip_special_tokens  = True)\n        for _ in range(3):\n            raw_action = [a[1:] if a.startswith('\\n') else a for a in raw_action]\n        # return raw_action\n        if self.eos_str is not None:\n            # print(f\"using eos str {eos_str}\")\n            # print([raw_a.split(self.eos_str)[0] + self.eos_str for raw_a in raw_action])\n            return [raw_a.split(self.eos_str)[0] for raw_a in raw_action]\n        else:\n            return raw_action\n\n    def get_log_prob(self, observation, image_features, action):\n        image_features = image_features[...,-1408:]\n        if self.template is not None:\n            observation = [self.template.replace(\"{obs}\", obs) for obs in observation]\n        obs_ids = self.tokenizer(observation, return_tensors='pt', padding=True, max_length=512, truncation = True).to(self.device)\n        action_ids = self.tokenizer(action, return_tensors='pt', padding=True, max_length=512, truncation = True).to(self.device)\n        outputs = self.model(input_ids = obs_ids[\"input_ids\"],\n                            image_ids = image_features,\n                            attention_mask = obs_ids[\"attention_mask\"],\n                            labels = action_ids[\"input_ids\"])\n        \n        # # action_embeds = self.model.get_input_embeddings()(action_ids[\"input_ids\"]).detach()\n        # # obs_embeds = self.model.get_input_embeddings()(obs_ids[\"input_ids\"]).detach()\n        # input_ids = torch.cat([obs_ids[\"input_ids\"], action_ids[\"input_ids\"]], dim = 1)\n        # # input_embeds = torch.cat([obs_embeds, action_embeds], dim = 1)\n        # attention_mask = torch.cat([obs_ids[\"attention_mask\"], action_ids[\"attention_mask\"]],\\\n        #                         dim = 1)\n        # outputs = self.model(input_ids=input_ids, attention_mask = attention_mask)\n        # values = None\n        # if isinstance(outputs, Tuple):\n        #     values, outputs = outputs\n        ## TODO: need to check if token shifting is done correctly\n        prediction_probs = self.softmax(outputs.logits)\n        selected_prediction_probs = torch.take_along_dim(prediction_probs,\\\n                                                 action_ids[\"input_ids\"].unsqueeze(2), dim=2).squeeze(2)\n        selected_prediction_probs = torch.clamp(selected_prediction_probs, min=0.001, max=0.99)\n        # import IPython; IPython.embed(); exit()\n        return torch.log(selected_prediction_probs)*action_ids[\"attention_mask\"]"
  },
  {
    "path": "digirl/models/cog_agent.py",
    "content": "import signal\nfrom gradio_client import Client, handle_file # remember to use gradio==4.43.0 for both client and host!\nimport gradio_client\nfrom time import sleep\nfrom concurrent.futures import ThreadPoolExecutor, as_completed\n\ndef _get_a_action(pair):\n    client, obs = pair\n    text = f'What steps do I need to take to \"{obs[\"task\"]}\"?(with grounding)'\n    for _ in range(3):\n        try:\n            out = client.predict(input_text=text, \n                                 image_prompt=handle_file(obs['image_path']),\n                                 api_name=\"/predict\")\n            return out\n        except:\n            sleep(1)\n    return None\n\nclass CogAgent:\n    def __init__(self, url):\n        urls = url\n        self.clients = [Client(u) for u in urls]\n    \n    def prepare(self):\n        pass\n    \n    def get_action(self, observation, image_features):\n        results = []\n        client_obs_pairs = zip(self.clients, observation)\n        with ThreadPoolExecutor(max_workers=len(self.clients)) as executor:\n            future_to_client_obs = {executor.submit(_get_a_action, pair): pair for pair in client_obs_pairs}\n            for future in as_completed(future_to_client_obs):\n                # try:\n                result = future.result()\n                results.append(result)\n                # except Exception as exc:\n                #     print(f'Generated an exception: {exc}')\n        return results\n\n"
  },
  {
    "path": "digirl/models/critic.py",
    "content": "import torch\nfrom transformers import AutoTokenizer, AutoModel\nimport torch.nn as nn\n\nclass VLMDoubleCritic(torch.nn.Module):\n    def __init__(self, device, accelerator, critic_lm, cache_dir, in_dim, out_dim):\n        \"\"\"\n        VLM critic using image features\n        \"\"\"\n        super(VLMDoubleCritic, self).__init__()\n        self.device = device\n        self.accelerator = accelerator\n        self.base_lm = AutoModel.from_pretrained(critic_lm, cache_dir=cache_dir).to(device)\n        self.base_tokenizer = AutoTokenizer.from_pretrained(critic_lm, cache_dir=cache_dir)\n        self.base_tokenizer.truncation_side = 'left'\n        image_feature_dim = 1408*2\n        out_dim = 2\n\n        # for v\n        self.critic1 = nn.Sequential(nn.Linear(in_dim+image_feature_dim, in_dim),\\\n                                    nn.ReLU(),\\\n                                    nn.Linear(in_dim, in_dim),\\\n                                    nn.ReLU(),\\\n                                    nn.Linear(in_dim, out_dim)).to(device)\n        self.critic2 = nn.Sequential(nn.Linear(in_dim+image_feature_dim, in_dim),\\\n                                    nn.ReLU(),\\\n                                    nn.Linear(in_dim, in_dim),\\\n                                    nn.ReLU(),\\\n                                    nn.Linear(in_dim, out_dim)).to(device)\n\n    def forward(self, observation, image_features, action, detach_model=False):\n        detach_model = True\n        obs_ids = self.base_tokenizer(observation, padding = True, return_tensors='pt', max_length=512, truncation = True).to(self.device)\n        if detach_model:\n            with torch.no_grad():\n                lm_states = self.base_lm(**obs_ids).pooler_output\n        else:\n            lm_states = self.base_lm(**obs_ids).pooler_output\n        v_states = torch.cat([lm_states, image_features], dim = 1)\n        return self.critic1(v_states), self.critic2(v_states)\n\n\nclass TrajectoryCritic(torch.nn.Module):\n    def __init__(self, device, accelerator, critic_lm, cache_dir, in_dim, out_dim):\n        \"\"\"\n        VLM critic using image features\n        \"\"\"\n        super(TrajectoryCritic, self).__init__()\n        self.device = device\n        self.accelerator = accelerator\n        self.base_lm = AutoModel.from_pretrained(critic_lm, cache_dir=cache_dir).to(device)\n        self.base_tokenizer = AutoTokenizer.from_pretrained(critic_lm, cache_dir=cache_dir)\n        self.base_tokenizer.truncation_side = 'left'\n        out_dim = 2\n        self.critic = nn.Linear(in_dim, out_dim).to(device)\n\n    def forward(self, observation, detach_model=False):\n        detach_model = False\n        obs_ids = self.base_tokenizer(observation, padding = True, return_tensors='pt', max_length=512, truncation = True).to(self.device)\n        if detach_model:\n            with torch.no_grad():\n                lm_states = self.base_lm(**obs_ids).pooler_output\n        else:\n            lm_states = self.base_lm(**obs_ids).pooler_output\n        return self.critic(lm_states)\n"
  },
  {
    "path": "digirl/models/infer_utils.py",
    "content": "import torch\nfrom PIL import Image\nfrom transformers import AutoProcessor, Blip2VisionModel\n\nclass ImageFeatureExtractor:\n    def __init__(self):\n        # Set device based on CUDA availability\n        self.device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n        \n        # Initialize and load the BLIP2 model and processor\n        self.model = Blip2VisionModel.from_pretrained(\"/nfs/kun2/users/yifei/.cache/models--Salesforce--blip2-opt-2.7b/snapshots/235c75ea3861136b9dd202c6edc6a7ba285c35e3\").to(self.device)\n        self.processor = AutoProcessor.from_pretrained(\"Salesforce/blip2-opt-2.7b\")\n\n    def to_feat(self, image: Image.Image):\n        \"\"\"Converts a PIL image to a feature representation using the BLIP2 model.\n        \n        Args:\n            image: A PIL.Image object representing the image to convert.\n            \n        Returns:\n            A tensor representing the image feature.\n        \"\"\"\n        with torch.no_grad():\n            # Preprocess the image and move to the correct device\n            inputs = self.processor(images=image, return_tensors=\"pt\").to(self.device)\n            \n            # Get the image features from the model\n            image_features = self.model(**inputs,\n                                        output_attentions=False,\n                                        output_hidden_states=False,\n                                        return_dict=False).pooler_output[0]\n            #size is 1408\n            \n            # Detach the tensor from the graph and move it to CPU\n            image_features = image_features.detach().cpu()\n            \n        return image_features\n"
  },
  {
    "path": "digirl/models/model.py",
    "content": "'''\nAdapted from https://github.com/huggingface/transformers\n'''\n\nfrom transformers import T5Config, T5ForConditionalGeneration\nfrom transformers.models.t5.modeling_t5 import T5Stack, __HEAD_MASK_WARNING_MSG\nimport copy\nfrom transformers.modeling_outputs import BaseModelOutput, Seq2SeqLMOutput\nimport warnings\nfrom typing import Optional, Tuple, Union\nimport torch\nfrom torch import nn\nfrom torch.nn import CrossEntropyLoss\n\nclass T5ForMultimodalGeneration(T5ForConditionalGeneration):\n    _keys_to_ignore_on_load_missing = [\n        r\"encoder.embed_tokens.weight\",\n        r\"decoder.embed_tokens.weight\",\n        r\"lm_head.weight\",\n    ]\n    _keys_to_ignore_on_load_unexpected = [\n        r\"decoder.block.0.layer.1.EncDecAttention.relative_attention_bias.weight\",\n    ]\n\n    def __init__(self, config: T5Config, img_dim=1408, num_actions=12, use_lm_head = True):\n        super().__init__(config)\n        self.model_dim = config.d_model\n\n        self.shared = nn.Embedding(config.vocab_size, config.d_model)\n\n        self.image_dense = nn.Linear(img_dim, config.d_model)\n        \n        self.mha_layer = torch.nn.MultiheadAttention(embed_dim=config.hidden_size, kdim=config.hidden_size, vdim=config.hidden_size, num_heads=1, batch_first=True)\n        self.gate_dense = nn.Linear(2*config.hidden_size, config.hidden_size)\n        self.sigmoid = nn.Sigmoid()\n\n        encoder_config = copy.deepcopy(config)\n        encoder_config.is_decoder = False\n        encoder_config.use_cache = False\n        encoder_config.is_encoder_decoder = False\n        self.encoder = T5Stack(encoder_config, self.shared)\n\n        decoder_config = copy.deepcopy(config)\n        decoder_config.is_decoder = True\n        decoder_config.is_encoder_decoder = False\n        decoder_config.num_layers = config.num_decoder_layers\n        self.decoder = T5Stack(decoder_config, self.shared)\n        self.use_lm_head = use_lm_head\n        if self.use_lm_head:\n            self.lm_head = nn.Linear(config.d_model, config.vocab_size, bias=False)\n\n        # Initialize weights and apply final processing\n        self.post_init()\n\n        # Model parallel\n        self.model_parallel = False\n        self.device_map = None\n    \n    def remove_lm_head(self):\n        self.use_lm_head = False\n        self.lm_head = None\n\n    def forward(\n        self,\n        input_ids: Optional[torch.LongTensor] = None,\n        image_ids=None,\n        attention_mask: Optional[torch.FloatTensor] = None,\n        decoder_input_ids: Optional[torch.LongTensor] = None,\n        decoder_attention_mask: Optional[torch.BoolTensor] = None,\n        head_mask: Optional[torch.FloatTensor] = None,\n        decoder_head_mask: Optional[torch.FloatTensor] = None,\n        cross_attn_head_mask: Optional[torch.Tensor] = None,\n        encoder_outputs: Optional[Tuple[Tuple[torch.Tensor]]] = None,\n        past_key_values: Optional[Tuple[Tuple[torch.Tensor]]] = None,\n        inputs_embeds: Optional[torch.FloatTensor] = None,\n        decoder_inputs_embeds: Optional[torch.FloatTensor] = None,\n        labels: Optional[torch.LongTensor] = None,\n        use_cache: Optional[bool] = None,\n        output_attentions: Optional[bool] = None,\n        output_hidden_states: Optional[bool] = None,\n        return_dict: Optional[bool] = None,\n    ) -> Union[Tuple[torch.FloatTensor], Seq2SeqLMOutput]:\n        use_cache = use_cache if use_cache is not None else self.config.use_cache\n        return_dict = return_dict if return_dict is not None else self.config.use_return_dict\n\n        # FutureWarning: head_mask was separated into two input args - head_mask, decoder_head_mask\n        if head_mask is not None and decoder_head_mask is None:\n            if self.config.num_layers == self.config.num_decoder_layers:\n                warnings.warn(__HEAD_MASK_WARNING_MSG, FutureWarning)\n                decoder_head_mask = head_mask\n\n        # Encode if needed (training, first prediction pass)\n        if encoder_outputs is None:\n            # Convert encoder inputs in embeddings if needed\n            encoder_outputs = self.encoder(\n                input_ids=input_ids,\n                attention_mask=attention_mask,\n                inputs_embeds=inputs_embeds,\n                head_mask=head_mask,\n                output_attentions=output_attentions,\n                output_hidden_states=output_hidden_states,\n                return_dict=return_dict,\n            )\n\n        elif return_dict and not isinstance(encoder_outputs, BaseModelOutput):\n            encoder_outputs = BaseModelOutput(\n                last_hidden_state=encoder_outputs[0],\n                hidden_states=encoder_outputs[1] if len(encoder_outputs) > 1 else None,\n                attentions=encoder_outputs[2] if len(encoder_outputs) > 2 else None,\n            )\n\n        hidden_states = encoder_outputs[0]\n        \n        image_embedding = self.image_dense(image_ids)\n        # use pooled image features\n        if len(image_embedding.size()) == 2:\n            image_embedding = image_embedding.unsqueeze(1)\n        image_att, _ = self.mha_layer(hidden_states, image_embedding, image_embedding)\n        merge = torch.cat([hidden_states, image_att], dim=-1)\n        gate = self.sigmoid(self.gate_dense(merge))\n        hidden_states = (1 - gate) * hidden_states + gate * image_att\n        \n        if self.model_parallel:\n            torch.cuda.set_device(self.decoder.first_device)\n\n        if labels is not None and decoder_input_ids is None and decoder_inputs_embeds is None:\n            # get decoder inputs from shifting lm labels to the right\n            decoder_input_ids = self._shift_right(labels)\n\n        # Set device for model parallelism\n        if self.model_parallel:\n            torch.cuda.set_device(self.decoder.first_device)\n            hidden_states = hidden_states.to(self.decoder.first_device)\n            if decoder_input_ids is not None:\n                decoder_input_ids = decoder_input_ids.to(self.decoder.first_device)\n            if attention_mask is not None:\n                attention_mask = attention_mask.to(self.decoder.first_device)\n            if decoder_attention_mask is not None:\n                decoder_attention_mask = decoder_attention_mask.to(self.decoder.first_device)\n\n        # Decode\n        decoder_outputs = self.decoder(\n            input_ids=decoder_input_ids,\n            attention_mask=decoder_attention_mask,\n            inputs_embeds=decoder_inputs_embeds,\n            past_key_values=past_key_values,\n            encoder_hidden_states=hidden_states,\n            encoder_attention_mask=attention_mask,\n            head_mask=decoder_head_mask,\n            cross_attn_head_mask=cross_attn_head_mask,\n            use_cache=use_cache,\n            output_attentions=output_attentions,\n            output_hidden_states=output_hidden_states,\n            return_dict=return_dict,\n        )\n\n        sequence_output = decoder_outputs[0]\n\n        # Set device for model parallelism\n        if self.model_parallel:\n            torch.cuda.set_device(self.encoder.first_device)\n            if self.use_lm_head:\n                self.lm_head = self.lm_head.to(self.encoder.first_device)\n                sequence_output = sequence_output.to(self.lm_head.weight.device)\n\n        if self.config.tie_word_embeddings:\n            # Rescale output before projecting on vocab\n            # See https://github.com/tensorflow/mesh/blob/fa19d69eafc9a482aff0b59ddd96b025c0cb207d/mesh_tensorflow/transformer/transformer.py#L586\n            sequence_output = sequence_output * (self.model_dim**-0.5)\n        if self.use_lm_head:    \n            lm_logits = self.lm_head(sequence_output)\n        else:\n            lm_logits = None\n        loss = None\n        if labels is not None and self.use_lm_head:\n            loss_fct = CrossEntropyLoss(ignore_index=-100)\n            loss = loss_fct(lm_logits.view(-1, lm_logits.size(-1)), labels.view(-1))\n\n            # TODO(thom): Add z_loss https://github.com/tensorflow/mesh/blob/fa19d69eafc9a482aff0b59ddd96b025c0cb207d/mesh_tensorflow/layers.py#L666\n\n        if not return_dict:\n            output = (lm_logits,) + decoder_outputs[1:] + encoder_outputs\n            return ((loss,) + output) if loss is not None else output\n\n        return Seq2SeqLMOutput(\n            loss=loss,\n            logits=lm_logits,\n            past_key_values=decoder_outputs.past_key_values,\n            decoder_hidden_states=decoder_outputs.hidden_states,\n            decoder_attentions=decoder_outputs.attentions,\n            cross_attentions=decoder_outputs.cross_attentions,\n            encoder_last_hidden_state=encoder_outputs.last_hidden_state,\n            encoder_hidden_states=encoder_outputs.hidden_states,\n            encoder_attentions=encoder_outputs.attentions,\n        )\n\n    def prepare_inputs_for_generation(\n        self, decoder_input_ids, past=None, attention_mask=None, use_cache=None, encoder_outputs=None, **kwargs\n    ):\n    # cut decoder_input_ids if past is used\n        if past is not None:\n            decoder_input_ids = decoder_input_ids[:, -1:]\n\n        output = {\n            \"input_ids\": None,  # encoder_outputs is defined. input_ids not needed\n            \"encoder_outputs\": encoder_outputs,\n            \"past_key_values\": past,\n            \"decoder_input_ids\": decoder_input_ids,\n            \"attention_mask\": attention_mask,\n            \"use_cache\": use_cache,  # change this to avoid caching (presumably for debugging)\n        }\n\n        output[\"image_ids\"] = kwargs['image_ids']\n\n        return output\n    \n    def test_step(self, tokenizer, batch, **kwargs):\n        device = next(self.parameters()).device\n        input_ids = batch['input_ids'].to(device)\n        image_ids = batch['image_ids'].to(device)\n\n        output = self.generate(\n            input_ids=input_ids,\n            image_ids=image_ids,\n            **kwargs\n        )\n\n        generated_sents = tokenizer.batch_decode(output, skip_special_tokens=True)\n        targets = tokenizer.batch_decode(batch['labels'], skip_special_tokens=True)\n\n        result = {}\n        result['preds'] = generated_sents\n        result['targets'] = targets\n\n        return result"
  },
  {
    "path": "env_setup/README.md",
    "content": "# Environment Installation Guide \n\nWe recommend using the Linux environment. Support to Windows and MacOS are not provided, but we welcome contributions.\n\n## Android Software Development Kit (SDK)\n\nPart of this tutorial is based on this [GitHub Gist](https://gist.github.com/nhtua/2d294f276dc1e110a7ac14d69c37904f).\n\n### Install Java (JDK 8)\n\nDownload a Java Development Kit 8 (v1.8.0) release version from the open-source Java releaser [OpenLogic](https://www.oracle.com/java/technologies/downloads/). Install using your Linux package installer, like `apt` or `rpm`. For example, on a Debian server:\n\n```bash\nsudo apt-get update\ncd ~ && mkdir install-android/ && cd install-android\nwget https://builds.openlogic.com/downloadJDK/openlogic-openjdk/8u412-b08/openlogic-openjdk-8u412-b08-linux-x64-deb.deb\nsudo apt install ./openlogic-openjdk-8u412-b08-linux-x64-deb.deb\n```\n\nIf you already has a java binary previously, you should also do this:\n\n```bash\nsudo update-alternatives --config java # select /usr/lib/jvm/openlogic-openjdk-8-hotspot-amd64/bin/java\n```\n\nCheck whether the installation is successful by `java -version`. You should expect the output shows version 1.8.0. Higher versions makes `sdkmanager` crash.\n\n```bash\njava -version\n# openjdk version \"1.8.0_412-412\"\n# OpenJDK Runtime Environment (build 1.8.0_412-412-b08)\n# OpenJDK 64-Bit Server VM (build 25.412-b08, mixed mode)\n```\n\n### Install SDK Manager\n\nDownload the Android SDK for Linux from the [official website](https://developer.android.com/studio/index.html#downloads). For your convenience, you can also directly download the [installation package](https://dl.google.com/android/repository/sdk-tools-linux-4333796.zip).\n\n```bash\nwget https://dl.google.com/android/repository/sdk-tools-linux-4333796.zip\n```\n\nNow specify the android installation path and unzip the installation package to that path. It's recommended to use `/home/<username>/.android` as the default installation path.\n\n```bash\nexport ANDROID_HOME=<intended_path_here> # recommended: /home/<username>/.android\nmkdir -p $ANDROID_HOME\nunzip sdk-tools-linux-4333796.zip -d $ANDROID_HOME\n```\n\nMake sure you have `unzip` installed. For example, use `sudo apt install unzip -y` to install on Debian servers. To check whether the unzip is successful:\n\n```bash\nls $ANDROID_HOME\n# tools\n```\n\n### SDK Emulator\n\nPrior to install the SDK emulators, set the environment variables:\n\n```bash\necho \"export ANDROID_HOME=$ANDROID_HOME\" >> ~/.bashrc\necho 'export SDK=$ANDROID_HOME' >> ~/.bashrc\necho 'export ANDROID_SDK_ROOT=$ANDROID_HOME' >> ~/.bashrc\necho 'export PATH=$SDK/emulator:$SDK/tools:$SDK/tools/bin:$SDK/platform-tools:$PATH' >> ~/.bashrc\nsource ~/.bashrc\n```\n\nNow you should be able to locate the `sdkmanager` binary:\n\n```bash\nwhich sdkmanager\n# .../tools/bin/sdkmanager\n```\n\nThen install the Android emulator 28 (other versions should also work, but the offline data we provided is in version 28):\n\n```bash\nyes | sdkmanager \"platform-tools\" \"platforms;android-28\" \"emulator\"\nyes | sdkmanager \"system-images;android-28;google_apis;x86_64\"\nyes | sdkmanager \"build-tools;28.0.0\"\n```\n\nNow you should be able to view the version of the emulator:\n\n```bash\nemulator -version\n# INFO    | Storing crashdata in: /tmp/android-<username>/emu-crash-34.2.14.db, detection is enabled for process: 16670\n# INFO    | Android emulator version 34.2.14.0 (build_id 11834374) (CL:N/A)\n# INFO    | Storing crashdata in: /tmp/android-<username>/emu-crash-34.2.14.db, detection is enabled for process: 16670\n# INFO    | Duplicate loglines will be removed, if you wish to see each individual line launch with the -log-nofilter flag.\n# ...\n```\n\n## Android Virtual Device (AVD) Initialization\n\nIn the next step, we create an AVD snapshot as the environment. \n\n### Device Creation\n\nDownload the device image [here](https://drive.google.com/drive/folders/1ZGKrWiSoGqg8_NoIGT7rWmiZ8CXToaBF?usp=sharing).\n\nUnzip the device image to `$ANDROID_HOME/avd`.\n\n```bash\ncd $ANDROID_HOME\nmkdir avd\ncd avd\nunzip test_Android.zip\n```\n\nYou have now successfully copied the Pixel 28 device that we use for our research.\n\n### KVM Acceleration\n\nIn order to launch the emulator, check whether `kvm` is reachable on your machine. Simply run this command to check:\n\n```bash\nls /dev/kvm\n# /dev/kvm -> you have KVM support\n# ls: cannot access '/dev/kvm': No such file or directory -> you don't have KVM support\n```\n\nIf you don't have KVM support, try to enable it. During our experiments, we find that KVM virtualization makes the emulator at least 5x faster (in all aspects, including bootstrapping and interactions). **Again, failure to set up KVM is likely to backfire your research by significantly increasing the interaction time during reinforcement learning.** You can check whether you can virtualize your machine via\n\n```bash\nsudo apt-get install cpu-checker\nsudo kvm-ok # yes means your machine supports virtualization\n```\n\nIf your machine doesn't support virtualization, first enable this feature (this can be enabled on most virtual server providers). On GCP, for example, refer to [this guide](https://cloud.google.com/compute/docs/instances/nested-virtualization/enabling). To best of our knowledge, AWS only allows virtualization on bare metal machines, so try to set up bare metals for this research.\n\nAfter checking that your machine supports virtualization, enable KVM by referring to [this guide](https://developer.android.com/studio/run/emulator-acceleration#vm-linux). If you have done all steps in this guide and you still can't set up KVM, try rebooting your machine.\n\n### Device Bootstrapping\n\nNow check whether you can successfully run an AVD instance with KVM acceleration by starting an emulator:\n\n```bash\nemulator -avd test_Android \"-no-window\" \"-no-audio\" \"-skip-adb-auth\" \"-no-boot-anim\" \"-gpu\" \"auto\" \"-no-snapshot-load\"\n# ...\n# Cold boot: requested by the user\n# INFO    | Boot completed in 12579 ms\n```\n\nA successful launch should show `Cold boot: requested by the user` in the end. Now open a new terminal tab, you should be able to see an online devices through `adb`:\n\n```bash\nadb devices\n# List of devices attached\n# emulator-5554   device\n```\n\n## Remote Driver: Appium\n\nNow **don't close the emulator** and open a new terminal tab. We use `appium` as the bind between Python (software) and the Android device (hardware). \n\n### Install Node.js\n\nAppium is based on Node.js. On a Linux system, simply do\n\n```bash\ncurl -fsSL https://deb.nodesource.com/setup_22.x | sudo -E bash -\nsudo apt-get install -y nodejs\n# the order matters, first install nodesource then install nodejs\n```\n\nNow check the installation through `node -v`:\n\n```bash\nnode -v\n# v18.19.0\n```\n\n### Install Appium\n\nNow install `appium` using Node.js **globally**. Avoid local installations to avoid messing up the `digirl` repo. Also install the `uiautomator2` driver for `appium`.\n\n```bash\nsudo npm i --location=global appium\nappium driver install uiautomator2\n```\n\nNow in the `digirl` conda environment, install the Python interface for Appium (you should have created the `digirl` environment in the main README):\n\n```bash\nconda activate digirl\npip install Appium-Python-Client # this should already be installed using requirements.txt, but better double-check\n```\n\n## Final Step: AVD Snapshot for Quickboot\n\nNow we create an AVD snapshot for quickboot. This avoids bootstrapping the device every time we launch it by saving a bootstrapped snapshot.\n\n### Install Device Interface for Appium\n\nFirst, launch `appium`:\n\n```bash\nappium --relaxed-security\n```\n\nThen open a new terminal tab (now you should have 3 tabs, one running Android emulator, one running appium, and this new one) and execute the screenshot script:\n\n```bash\n# wait for half a minute... (if you do screenshot right away, you will get errors cmd: Can't find service: settings. allow some time for emulator to install the packages.)\npython <path_to_digirl_repo>/env_setup/screenshot.py # keep trying this command till it no longer raises errors\n# wait for half a minute...\n# screenshot saved to <current_path>/screenshot.png\n```\n\nYou should now see a screenshot like this: \n\n<img src=\"../assets/screenshot1.png\" alt=\"screenshot1\" width=\"100\"/>\n\nNow go back to the emulator terminal tab. Use `ctrl+c` to exit the emulator, and you should see \n\n```bash\nctrl+c\n# INFO    | Saving with gfxstream=1\n# ERROR   | stop: Not implemented (ignore this error)\n```\n\nNow execute this command to check whether the snapshot is successfully saved:\n\n```bash\nemulator -avd test_Android \"-no-window\" \"-no-audio\" \"-skip-adb-auth\" \"-no-boot-anim\" \"-gpu\" \"auto\" \"-no-snapshot-save\"\n# Successfully loaded snapshot 'default_boot'\n```\n\nCongratulations! You're good to go now. Close all tabs and move on the main README for the experiments.\n\n## Optional (Not Recommended): Create a Device from Scratch\n\nAlternatively, you can create a device from scratch, not using our device. This may result in different behavior as our experiments, so it's not recommended for reproducing our results\n\n### Device Creation\n\nCreate a Android 28 device from `google_apis`:\n\n```bash\necho no | avdmanager create avd -n test_Android -k \"system-images;android-28;google_apis;x86_64\"\n```\n\nYou should now see a directory at `$ANDOIRD_HOME/avd`. For simplicity, it's advised to set the environment variable `ANDROID_AVD_HOME` to this path:\n\n```bash\necho 'export ANDROID_AVD_HOME=$ANDROID_HOME/avd' >> ~/.bashrc\nsource ~/.bashrc\n```\n\nYou should now be able to see the empty AVD instance at `$ANDROID_AVD_HOME`:\n\n```bash\nls $ANDROID_AVD_HOME\n# test_Android.avd  test_Android.ini\n```\n\nTo align with our experiments, replace the `$ANDROID_AVD_HOME/test_Android.avd/config.ini` file with [our released config file](./config.ini).\n\n```bash\ncp <path_to_digirl_repo>/env_setup/config.ini $ANDROID_AVD_HOME/test_Android.avd/config.ini\n```\n\nYou have now successfully created an empty Pixel device image of the same configuration as the data collected in our research.\n\n### Device Bootstrapping\n\nNow check whether you can successfully run an AVD instance with KVM acceleration by starting an emulator:\n\n```bash\nemulator -avd test_Android \"-no-window\" \"-no-audio\" \"-skip-adb-auth\" \"-no-boot-anim\" \"-gpu\" \"auto\" \"-no-snapshot-load\"\n# ...\n# Cold boot: requested by the user\n```\n\nA successful launch should show `Cold boot: requested by the user` in the end. Now open a new terminal tab, you should be able to see an online devices through `adb`:\n\n```bash\nadb devices\n# List of devices attached\n# emulator-5554   device\n```\n\n\n### Install Device Interface for Appium\n\nFirst, launch `appium`:\n\n```bash\nappium --relaxed-security\n```\n\nThen open a new terminal tab (now you should have 3 tabs, one running Android emulator, one running appium, and this new one) and execute the screenshot script:\n\n```bash\n# wait for half a minute... (if you do screenshot right away, you will get errors cmd: Can't find service: settings. allow some time for emulator to install the packages.)\npython <path_to_digirl_repo>/env_setup/screenshot.py # keep trying this command till it no longer raises errors\n# wait for half a minute...\n# screenshot saved to <current_path>/screenshot.png\n```\n\nYou should now see a screenshot like this: \n\n<img src=\"../assets/screenshot1.png\" alt=\"screenshot1\" width=\"100\"/>\n\n### Click Google Login in Chrome and Update Chrome\n\nAs we frequently use Chrome for our experiments, we need to ensure Chrome isn't blocked by the login interface. To complete that, we need to manually remove that in the device with ADB commands:\n\n```bash\nadb shell input tap 739 1828 # click the Chrome icon\nsleep 2\nadb shell input tap 654 2043 # click \"accept & continue\"\nsleep 2\nadb shell input tap 197 2066 # click \"no thanks\"\n```\n\nNow do the screenshot again. You should see the produced screenshot like this:\n\n<img src=\"../assets/screenshot2.png\" alt=\"screenshot2\" width=\"100\"/>\n\n```bash\npython <path_to_digirl_repo>/env_setup/screenshot.py # should work on the first try\n```\n\n```\nadb shell input tap 322 719 # click the search bar\nsleep 2\nadb shell input tap 559 2063 # click \"no\"\n```\n\nNow do the screenshot again. You should see the produced screenshot like this:\n\n<img src=\"../assets/screenshot4.png\" alt=\"screenshot2\" width=\"100\"/>\n\n```bash\npython <path_to_digirl_repo>/env_setup/screenshot.py # should work on the first try\n```\n\nThen reboot the device and click the Chrome icon again. Chrome will automatically update its main page. \n\n```bash\nadb reboot\nadb devices\n# ...wait until `adb device` gives `device` instead of `offline`\nadb shell input tap 739 1828 # click the Chrome icon\n```\n\nDo a screenshot again. You should expect the screenshot to change to this:\n\n<img src=\"../assets/screenshot3.png\" alt=\"screenshot3\" width=\"100\"/>\n\n```bash\npython <path_to_digirl_repo>/env_setup/screenshot.py # should work on the first try\n```\n\nNow go back to home screen and save a snapshot of the device:\n\n```bash\nadb shell input tap 551 2228 # click the home button\n# you can also use `adb shell input keyevent KEYCODE_HOME`\nsleep 2\nadb emu avd snapshot save default_boot\n# OK\n```\n\nNow go back to the emulator tab. Use `ctrl+c` to exit the emulator, and you should see \n\n```bash\nctrl+c\n# INFO    | Saving with gfxstream=1\n# ERROR   | stop: Not implemented (ignore this error)\n```\n\nNow execute this command to check whether the snapshot is successfully saved:\n\n```bash\nemulator -avd test_Android \"-no-window\" \"-no-audio\" \"-skip-adb-auth\" \"-no-boot-anim\" \"-gpu\" \"auto\" \"-no-snapshot-save\"\n# Successfully loaded snapshot 'default_boot'\n```\n\nCongratulations! You're good to go now. Close all tabs and move on the main README for the experiments.\n\n\n## Optional: Useful ADB Commands\n\nFeel free to play around ADB with these commands:\n\n|Function|Command|\n|--------|-------|\nManually save a snapshot | `adb emu avd snapshot save default_boot`\nClick | `adb shell input tap 160 240`\nScroll Down | `adb shell input swipe 500 1000 300 300`\nScreenshot | `adb exec-out screencap -p > screenshot.png`\nKeyboard input|`adb shell input text \"insert%syour%stext%shere\" && adb shell input keyevent KEYCODE_ENTER`\nOpen Chrome | `adb shell am start -n com.android.chrome/com.google.android.apps.chrome.Main`\nOpen a website in Chrome |`adb shell am start -a android.intent.action.VIEW -d http://www.stackoverflow.com`\nUninstall a package | `adb shell pm uninstall --user 0 com.example.yourapp`\nInstall a package | `adb install -r /path/to/package`\nCheck application version | `adb shell dumpsys package com.example.yourapp | grep versionName`\nCheck Android version | `adb shell getprop ro.build.version.release`\nScreen recording (high frame rate) | `adb shell screenrecord --size 540x1140 --bit-rate 4M /sdcard/video.mp4`\nPull screenshot to computer | `adb pull /sdcard/video.mp4`\n\n### Update Google Version\n\nCheck Chrome internal version:\n\n```bash\nadb shell input tap 739 1828 # click the Chrome icon\nadb shell input tap 1004 144 # click dots\nadb shell input tap 510 1382 # click settings\nadb shell input swipe 500 1000 300 300 # swipe down\nadb shell input tap 191 2093 # click about Chrome\npython env_setup/screenshot.py\n```\n\nInstall a new version: you must use architecture-specific binaries. In our setup, we use the `x86/x86_64` architecture. If you use a different architecture, you must download the corresponding binaries. For example, you can download the `x86 or x86_64` version from [this page](https://www.apkmirror.com/apk/google-inc/chrome/chrome-124-0-6367-172-release/google-chrome-124-0-6367-172-6-android-apk-download/download/?key=6b3dc806b877aa88cb664103bd5e596284b12b4d&forcebaseapk=true), which will be Chrome version 124 (our paper uses Chrome 69 thought).\n\n## Troubleshoot\n\n### Java\n\n```bash\nsdkmanager\n# Exception in thread \"main\" java.lang.NoClassDefFoundError: javax/xml/bind/annotation/XmlSchema\n#         at com.android.repository.api.SchemaModule$SchemaModuleVersion.<init>(SchemaModule.java:156)\n#         at com.android.repository.api.SchemaModule.<init>(SchemaModule.java:75)\n#         at com.android.sdklib.repository.AndroidSdkHandler.<clinit>(AndroidSdkHandler.java:81)\n#         at com.android.sdklib.tool.sdkmanager.SdkManagerCli.main(SdkManagerCli.java:73)\n#         at com.android.sdklib.tool.sdkmanager.SdkManagerCli.main(SdkManagerCli.java:48)\n# Caused by: java.lang.ClassNotFoundException: javax.xml.bind.annotation.XmlSchema\n#         at java.base/jdk.internal.loader.BuiltinClassLoader.loadClass(BuiltinClassLoader.java:641)\n#         at java.base/jdk.internal.loader.ClassLoaders$AppClassLoader.loadClass(ClassLoaders.java:188)\n#         at java.base/java.lang.ClassLoader.loadClass(ClassLoader.java:525)\n#         ... 5 more\n```\n\nThis error shows because your `java` version is too new. Downgrade to Java 8.\n\n### Emulator\n\n```bash\nemulator -avd test_Android \"-no-window\" \"-no-audio\" \"-skip-adb-auth\" \"-no-boot-anim\" \"-gpu\" \"auto\" \"-no-snapshot-save\"\n# INFO    | Storing crashdata in: /tmp/android-<username>/emu-crash-34.2.14.db, detection is enabled for process: 29307\n# INFO    | Android emulator version 34.2.14.0 (build_id 11834374) (CL:N/A)\n# INFO    | Found systemPath /home/<username>/.android/system-images/android-28/google_apis/x86_64/\n# INFO    | Storing crashdata in: /tmp/android-<username>/emu-crash-34.2.14.db, detection is enabled for process: 29307\n# INFO    | Duplicate loglines will be removed, if you wish to see each individual line launch with the -log-nofilter flag.\n# INFO    | Changing default hw.initialOrientation to portrait\n# INFO    | Increasing RAM size to 1536MB\n# ERROR   | x86_64 emulation currently requires hardware acceleration!\n# CPU acceleration status: KVM requires a CPU that supports vmx or svm\n# More info on configuring VM acceleration on Linux:\n# https://developer.android.com/studio/run/emulator-acceleration#vm-linux\n# General information on acceleration: https://developer.android.com/studio/run/emulator-acceleration.\n```\n\nThis error shows you've not installed KVM or your machine does not support KVM. Enable KVM in this case.\n\n```bash\nemulator -avd test_Android \"-no-window\" \"-no-audio\" \"-skip-adb-auth\" \"-no-boot-anim\" \"-gpu\" \"auto\" \"-no-snapshot-load\"\n# ...\n# ProbeKVM: This user doesn't have permissions to use KVM (/dev/kvm).\n```\n\nTry two things:\n\n```bash\nsudo chown <your_username> -R /dev/kvm\nsudo gpasswd -a $USER kvm\n```\n\n### Appium\n\n```bash\npython screenshot.py \n# selenium.common.exceptions.WebDriverException: Message: An unknown server-side error occurred while processing the command. Original error: Error executing adbExec. Original error: 'Command '/home/<username>/.android/platform-tools/adb -P 5037 -s emulator-5554 shell 'settings delete global hidden_api_policy_pre_p_apps;settings delete global hidden_api_policy_p_apps;settings delete global hidden_api_policy'' exited with code 20'; Command output: cmd: Can't find service: settings\n```\n\nDevice is installing the Appium interface. Wait for a minute then try again.\n"
  },
  {
    "path": "env_setup/config.ini",
    "content": "PlayStore.enabled = false\nabi.type = x86_64\navd.ini.encoding = UTF-8\nfastboot.chosenSnapshotFile=\nfastboot.forceChosenSnapshotBoot=no\nfastboot.forceColdBoot=no\nfastboot.forceFastBoot=yes\nhw.accelerometer=yes\nhw.arc=false\nhw.audioInput=yes\nhw.battery=yes\nhw.camera.back=virtualscene\nhw.dPad=no\nhw.device.hash2=MD5:d37b758e1a2b070e348378c2efde849a\nhw.device.manufacturer=Generic\nhw.gps=yes\nhw.gpu.enabled=yes\nhw.gpu.mode=auto\nhw.initialOrientation=Portrait\nhw.keyboard=yes\nhw.lcd.width = 1080\nhw.lcd.height = 2280\nhw.lcd.depth = 16\nhw.lcd.circular = false\nhw.lcd.density = 440\nhw.lcd.backlight = true\nhw.lcd.vsync = 60\nhw.mainKeys=no\nhw.trackBall=no\nhw.cpu.arch = x86_64\nimage.sysdir.1 = system-images/android-28/google_apis/x86_64/\ntag.display = Google APIs\ntag.id = google_apis\ndisk.dataPartition.size = 6442450944"
  },
  {
    "path": "env_setup/screenshot.py",
    "content": "''' \nTaking Screenshots with Appium\nI'll be using Python and a sample iOS application from Apple's Developer Library\nThis tutorial assumes you understand how to run, launch, and interact with your application.\n'''\n\nfrom appium import webdriver\nimport os\n\ndesired_capabilities = {}\ndesired_capabilities['deviceName'] = 'Simulator'\n\ncapabilities = dict(\n    platformName='Android',\n    automationName='uiautomator2',\n    deviceName='Android',\n    newCommandTimeout=\"120000\",\n    adbExecTimeout=\"120000\",\n    noReset=True,\n    uiautomator2ServerInstallTimeout=\"120000\",\n    uiautomator2ServerLaunchTimeout=\"120000\",\n    uiautomator2ServerReadTimeout=\"120000\",\n)\ncapabilities[\"udid\"] = \"emulator-5554\"\nfrom appium.options.android import UiAutomator2Options\noptions = UiAutomator2Options().load_capabilities(capabilities)\ndirectory = '%s/' % os.getcwd()\n\nappium_server_url = \"http://0.0.0.0:4723\"\ndriver = webdriver.Remote(appium_server_url, options=options)\nfile_name = 'screenshot.png'\ndriver.save_screenshot(directory + file_name)\nprint(\"screenshot saved to\", directory + file_name)\n"
  },
  {
    "path": "multimachine/README.md",
    "content": "# Guidelines on Multi-Machine Emulation Parallel\n\nWe support the feature to distributedly run environment on many machines and synchronize when all collections are done. The design works as below:\n\n<p align=\"center\">\n    <img src=\"../assets/parallel-design.png\" alt=\"digirl-diagram\" width=\"70%\">\n</p>\n\nYou need a host machine (with GPU) to synchronize the emulation process and train the agent, which subsequently sends the checkpoints to each worker machine (no GPU required) to simply interact with the environment to get trajectories. \n\n## Hands-on Guideline\n\n**Make sure you're able to run the single-machine pipeline before you move on to multi-machine setup.**\n\n### Configuring your cluster\n\nFirst, start 1 host machine with GPU support. Then start as many worker machines as you want, where no GPU is required on these worker machines. \n\nThen you need to make sure all **worker machines** support to interact with the emulator (the host machine does not interact with the environment, so no need to support this). We suggest using virtual machines, so you can copy the environment directly. \n\nMake sure that the host machine can access all worker machines by simply `ssh` them - this can be done by adding the public keys of the worker machines to the `~/.ssh/authorized_keys` file of the host machine. This step is **mandatory** because the host machine will send the checkpoint to the worker machines and get the collected trajectories from them.\n\n### Code Setup\n\nAgain, make sure you're able to run the single-machine pipeline on each machine before you move on.\n\nNow copy the source code you've modified (including all the individualized changes) to each and every machine (including both host and worker machine). Note that you must set `save_freq` to `1` in multi-machine training mode (which is already set for you). Then modify the paths containing `<username>` on each machine according to their username, respectively.\n\nThen simply run\n\n```\ncd ./script\npython run.py --config-path config/multimachine --config-name host\n```\n\nThe script will handle everything for you, you don't need to access any worker machine when running multi-machine emulation. Note that `bsize` in the configuration files means the number of parallel emulators on **each machine**. When there are multiple worker machines collecting trajectories parallely, the total number of parallel emulators will be the sum of `bsize` on each machine.\n\n"
  },
  {
    "path": "push.sh",
    "content": "git add .\ngit commit -m \"autopush\"\ngit push\n"
  },
  {
    "path": "requirements.txt",
    "content": "annotated-types==0.6.0\nblis==0.7.11\nbrotlipy==0.7.0\ncatalogue==2.0.10\ncertifi \ncffi \ncharset-normalizer \nclick==8.1.7\ncloudpathlib==0.16.0\ncloudpickle==3.0.0\nconfection==0.1.3\ncontourpy==1.1.1\ncryptography \ncycler==0.12.1\ncymem==2.0.8\nFarama-Notifications==0.0.4\nfonttools==4.43.1\ngym\ngym-notices\ngymnasium\nhashids==1.3.1\njericho==3.1.2\nJinja2==3.1.2\nkiwisolver==1.4.5\nlangcodes==3.3.0\nMarkupSafe==2.1.3\nmatplotlib==3.8.1\nmementos==1.3.1\nmore-itertools==10.1.0\nmurmurhash==1.0.10\nnetworkx==3.2.1\nnumpy\npackaging==23.2\nPillow\npluggy\npreshed==3.0.9\nprompt-toolkit==3.0.39\npycosat \npycparser \npydantic==2.4.2\npydantic_core==2.10.1\npyOpenSSL \npyparsing==3.1.1\nPySocks \npython-dateutil==2.8.2\nrequests \nruamel.yaml \nruamel.yaml.clib \nsix\nsmart-open==6.4.0\nspacy==3.7.2\nspacy-legacy==3.0.12\nspacy-loggers==1.0.5\nsrsly==2.4.8\nTatSu\nthinc==8.2.1\ntoolz \ntqdm \ntyper==0.9.0\ntyping_extensions==4.8.0\nurllib3 \nwasabi==1.1.2\nwcwidth==0.2.9\nweasel==0.3.3\nzstandard \nhydra-core\ngradio\ntorch\ntransformers==4.37.2\naccelerate\npeft\nopenai\nwandb\nbeautifulsoup4\nsentencepiece\npyinstrument\nmemory_profiler\nappium-python-client\ntenacity\ntermcolor\ngoogle-generativeai\njupyter\n"
  },
  {
    "path": "scripts/config/accelerate_config/default_config.yaml",
    "content": "compute_environment: LOCAL_MACHINE\ndebug: true\ndistributed_type: MULTI_GPU\ndowncast_bf16: 'no'\ngpu_ids: 0,1,2,3\nmachine_rank: 0\nmain_training_function: main\nmixed_precision: 'no'\nnum_machines: 1\nnum_processes: 4\nrdzv_backend: static\nsame_network: true\ntpu_env: []\ntpu_use_cluster: false\ntpu_use_sudo: false\nuse_cpu: false\n"
  },
  {
    "path": "scripts/config/cogagent/default.yaml",
    "content": "# ===================\n# ====== task ======\n# ===================\ntask_set: \"general\" # \"general\" or \"webshop\"\ntask_split: \"train\" # \"train\" or \"test\"\neval_sample_mode: \"random\" # \"random\" or \"sequential\"\n# max_steps of AitW General should be 10; of AitW Web Shopping should\nmax_steps: 10\n\n# ===================\n# ====== token ======\n# ===================\nhuggingface_token: ''\nwandb_key: ''\ngemini_key: ''\n\n# ===================\n# ====== placeholder (will not be used for cogagent) ======\n# ===================\npolicy_lm: '/home/<username>/Auto-UI-Base'\ncritic_lm: 'roberta-base'\ncapacity: 2000 # replay buffer size\nepochs: 5 # number of epochs for the critic each witeration\nbatch_size: 4\ngrad_accum_steps: 32\nwarmup_iter: 0 # how many iterations to only collect data and evaluate before training\nactor_epochs: 20 # number of epochs for training the actor each iteration\ntrajectory_critic_epochs: 5\nlm_lr: 1e-4\ncritic_lr: 1e-4\nmax_grad_norm: 0.01\ngamma: 0.5\n\n# ===================\n# ====== algo ======\n# ===================\nbsize: 1 # should be the same as the number of gradio urls provided for cogagent\nrollout_size: 16 # how many trajectories to collect between training iterations (should be multiple of bsize)\n\n# ===================\n# ====== agent ======\n# ===================\nuse_lora: False\nagent_name: \"cogagent\" # \"autoui\" or \"cogagent\" >>> the eval method must be \"autoui\" if train_algorithm is not none <<<\ndo_sample: True\ntemperature: 1.0\ntau: 0.01 # soft update parameter\nmax_new_tokens: 128\n\n# ===================\n# ====== log ======\n# ===================\nrecord: False # whether you want to record mp4 videos\nuse_wandb: True\nentity_name: ''\nproject_name: ''\n\n# ===================\n# ====== path ======\n# ===================\n# query by: echo $ANDROID_AVD_HOME\nandroid_avd_home: '/home/<username>/.android/avd'\n# query by: which emulator\nemulator_path: '/home/<username>/.android/emulator/emulator'\n# query by: which adb\nadb_path: '/home/<username>/.android/platform-tools/adb'\ncache_dir: '/home/<username>/.cache'\nassets_path: '/home/<username>/digirl/digirl/environment/android/assets/task_set'\n"
  },
  {
    "path": "scripts/config/cogagent/eval_only.yaml",
    "content": "defaults:\n  - default\n  - _self_\n\n# no train algorithm here\nsave_path: '/home/<username>/logs/ckpts/general-off2on-cogagent/' # the interacted trajectories will be saved to this path\nrun_name: 'cogagent-general-off2on'\n\ncogagent_url:\n- <cogagent-gradio_url>\n\n# training\ntask_mode: \"evaluate\" # \"train\" or \"evaluate\" - \"train\" will do training when evaluating, \"evaluate\" will NOT do training\nparallel: \"single\" # \"single\" or \"host\" or \"worker\"\n\n# evaluation-only mode doesn't require any training\n# train_mode: \"off2on\" # \"offline\" or \"online\" or \"off2on\"\n# offline_data_path: \"/home/<username>/logs/general-off2on-sft-trajectories.pt\"\n# offline_actor_iterations: 30\n# offline_critic_iterations: 20\n# offline_trajectory_critic_iterations: 20\n\n# train_iterations: 400\neval_iterations: 1000\nsave_freq: 3\n"
  },
  {
    "path": "scripts/config/main/default.yaml",
    "content": "# ===================\n# ====== task ======\n# ===================\ntask_set: \"general\" # \"general\" or \"webshop\"\ntask_split: \"train\" # \"train\" or \"test\"\neval_sample_mode: \"random\" # \"random\" or \"sequential\"\n# max_steps of AitW General should be 10; of AitW Web Shopping should\nmax_steps: 10\n\n# ===================\n# ====== token ======\n# ===================\nhuggingface_token: ''\nwandb_key: ''\ngemini_key: ''\n\n# ===================\n# ====== algo ======\n# ===================\npolicy_lm: '/home/<username>/Auto-UI-Base' # do NOT modify this. To load existing checkpoints, modify `save_path` in children config files.\ncritic_lm: 'roberta-base'\ncapacity: 2000 # replay buffer size\nepochs: 5 # number of epochs for the critic each witeration\nbatch_size: 4\nbsize: 8 # number of emulators parallelly on the machine\nrollout_size: 16 # how many trajectories to collect between training iterations (should be multiple of bsize)\ngrad_accum_steps: 32\nwarmup_iter: 0 # how many iterations to only collect data and evaluate before training\nactor_epochs: 20 # number of epochs for training the actor each iteration\ntrajectory_critic_epochs: 5\nlm_lr: 1e-4\ncritic_lr: 1e-4\nmax_grad_norm: 0.01\ngamma: 0.5\n\n# ===================\n# ====== agent ======\n# ===================\nuse_lora: False\nagent_name: \"autoui\" # \"autoui\" or \"cogagent\" >>> the eval method must be \"autoui\" if train_algorithm is not none <<<\ndo_sample: True\ntemperature: 1.0\ntau: 0.01 #soft update parameter\nmax_new_tokens: 128\n\n# ===================\n# ====== log ======\n# ===================\nrecord: False # whether you want to record mp4 videos\nuse_wandb: True\nentity_name: ''\nproject_name: ''\n\n# ===================\n# ====== path ======\n# ===================\n# query by: echo $ANDROID_AVD_HOME\nandroid_avd_home: '/home/<username>/.android/avd'\n# query by: which emulator\nemulator_path: '/home/<username>/.android/emulator/emulator'\n# query by: which adb\nadb_path: '/home/<username>/.android/platform-tools/adb'\ncache_dir: '/home/<username>/.cache'\nassets_path: '/home/<username>/digirl/digirl/environment/android/assets/task_set'\n"
  },
  {
    "path": "scripts/config/main/digirl_off2on.yaml",
    "content": "defaults:\n  - default\n  - _self_\n\nsave_path: '/home/<username>/logs/digirl-general-off2on/'\nrun_name: 'digirl-general-off2on'\n\n# training\ntrain_algorithm: \"digirl\" # \"digirl\" of \"filteredbc\"\ntrain_mode: \"off2on\" # \"offline\" or \"online\" or \"off2on\"\ntask_mode: \"train\" # \"train\" or \"evaluate\" - \"train\" will do training when evaluating, \"evaluate\" will NOT do training\nparallel: \"single\" # \"single\" or \"host\" or \"worker\"\n\n# offline config, uncomment offline_data_path as long as train_model is offline or off2on\noffline_data_path: \"/home/<username>/logs/general-off2on-sft-trajectories.pt\"\noffline_actor_iterations: 30\noffline_critic_iterations: 20\noffline_trajectory_critic_iterations: 20\n\ntrain_iterations: 400\neval_iterations: 1000\nsave_freq: 3\n"
  },
  {
    "path": "scripts/config/main/digirl_offline.yaml",
    "content": "defaults:\n  - default\n  - _self_\n\nsave_path: '/home/<username>/logs/digirl-general-offline/'\nrun_name: 'digirl-general-offline'\n\n# training\ntrain_algorithm: \"digirl\" # \"digirl\" of \"filteredbc\"\ntrain_mode: \"offline\" # \"offline\" or \"online\" or \"off2on\"\ntask_mode: \"train\" # \"train\" or \"evaluate\" - \"train\" will do training when evaluating, \"evaluate\" will NOT do training\nparallel: \"single\" # \"single\" or \"host\" or \"worker\"\n\n# offline config, uncomment offline_data_path as long as train_model is offline or off2on\noffline_data_path: \"/home/<username>/logs/general-off2on-sft-trajectories.pt\"\noffline_actor_iterations: 30\noffline_critic_iterations: 20\noffline_trajectory_critic_iterations: 20\n\ntrain_iterations: 0\neval_iterations: 1000\nsave_freq: 3\n"
  },
  {
    "path": "scripts/config/main/digirl_online.yaml",
    "content": "defaults:\n  - default\n  - _self_\n\nsave_path: '/home/<username>/logs/digirl-general-online/'\nrun_name: 'digirl-general-online'\n\n# training\ntrain_algorithm: \"digirl\" # \"digirl\" of \"filteredbc\"\ntrain_mode: \"online\" # \"offline\" or \"online\" or \"off2on\"\ntask_mode: \"train\" # \"train\" or \"evaluate\" - \"train\" will do training when evaluating, \"evaluate\" will NOT do training\nparallel: \"single\" # \"single\" or \"host\" or \"worker\"\n\n# offline config, uncomment offline_data_path as long as train_model is offline or off2on\n# offline_data_path: \"/home/<username>/logs/general-off2on-sft-trajectories.pt\"\noffline_actor_iterations: 30\noffline_critic_iterations: 20\noffline_trajectory_critic_iterations: 20\n\ntrain_iterations: 600\neval_iterations: 1000\nsave_freq: 3\n"
  },
  {
    "path": "scripts/config/main/eval_only.yaml",
    "content": "defaults:\n  - default\n  - _self_\n\nsave_path: '/home/<username>/logs/ckpts/general-off2on-digirl/'\nrun_name: 'autoui-general-eval-only'\n\n# training\ntrain_algorithm: \"digirl\" # \"digirl\" of \"filteredbc\", should be same as the evaluation checkpoint\ntask_mode: \"evaluate\" # \"train\" or \"evaluate\" - \"train\" will do training when evaluating, \"evaluate\" will NOT do training\nparallel: \"single\" # \"single\" or \"host\" or \"worker\"\n\n# evaluation-only mode doesn't require any training\n# train_mode: \"off2on\" # \"offline\" or \"online\" or \"off2on\"\n# offline_data_path: \"/home/<username>/logs/general-off2on-sft-trajectories.pt\"\n# offline_actor_iterations: 30\n# offline_critic_iterations: 20\n# offline_trajectory_critic_iterations: 20\n\n# train_iterations: 400\neval_iterations: 1000\nsave_freq: 3\n"
  },
  {
    "path": "scripts/config/multimachine/default.yaml",
    "content": "# ===================\n# ====== task ======\n# ===================\ntask_set: \"general\" # \"general\" or \"webshop\"\ntask_split: \"train\" # \"train\" or \"test\"\neval_sample_mode: \"random\" # \"random\" or \"sequential\"\n# max_steps of AitW General should be 10; of AitW Web Shopping should\nmax_steps: 10\n\n# ===================\n# ====== token ======\n# ===================\nhuggingface_token: ''\nwandb_key: ''\ngemini_key: ''\n\n# ===================\n# ====== algo ======\n# ===================\npolicy_lm: '/home/<username>/Auto-UI-Base'\ncritic_lm: 'roberta-base'\ncapacity: 2000 # replay buffer size\nepochs: 5 # number of epochs for the critic each witeration\nbatch_size: 4\nbsize: 8 # number of emulators parallelly on the machine\nrollout_size: 16 # how many trajectories to collect between training iterations (should be multiple of bsize)\ngrad_accum_steps: 32\nwarmup_iter: 0 # how many iterations to only collect data and evaluate before training\nactor_epochs: 20 # number of epochs for training the actor each iteration\nlm_lr: 1e-4\ncritic_lr: 1e-4\nmax_grad_norm: 0.01\ngamma: 0.5\n\n# ===================\n# ====== agent ======\n# ===================\nuse_lora: False\nagent_name: \"autoui\" # \"autoui\" or \"cogagent\" >>> the eval method must be \"autoui\" if train_algorithm is not none <<<\ndo_sample: True\ntemperature: 1.0\ntau: 0.01 #soft update parameter\nmax_new_tokens: 128\n\n# ===================\n# ====== log ======\n# ===================\nrecord: False # whether you want to record mp4 videos\nuse_wandb: True\nentity_name: ''\nproject_name: ''\n\n# ===================\n# ====== path ======\n# ===================\n# query by: echo $ANDROID_AVD_HOME\nandroid_avd_home: '/home/<username>/.android/avd'\n# query by: which emulator\nemulator_path: '/home/<username>/.android/emulator/emulator'\n# query by: which adb\nadb_path: '/home/<username>/.android/platform-tools/adb'\nassets_path: '/home/<username>/digirl/digirl/environment/android/assets/task_set'\n\ncache_dir: '/home/<username>/.cache'\n\n# offline config, uncomment offline_data_path as long as train_model is offline or off2on\noffline_data_path: \"/home/<username>/logs/general-off2on-sft-trajectories.pt\"\noffline_actor_iterations: 30\noffline_critic_iterations: 20\noffline_trajectory_critic_iterations: 20\n"
  },
  {
    "path": "scripts/config/multimachine/host.yaml",
    "content": "defaults:\n  - default\n  - _self_\n\nparallel: \"host\"\nrun_name: 'general-digirl-off2on-host'\nsave_path: \"/home/<username>/logs/multimachine\" # path that saves checkpoint, trajectories, and images\nworker_temp_path: \"/home/<username>/logs/worker\" # MUST BE identical to save_path on the worker machine\nworker_run_path: \"/home/<username>/digirl/scripts\" # where the script dir is on the worker machine\nworker_ips: [\"34.45.185.211\"]\nworker_username: <username>\nsave_freq: 1\n\n# training\ntrain_algorithm: \"digirl\" # \"digirl\" of \"filteredbc\"\ntrain_mode: \"online\" # \"offline\" or \"online\" or \"off2on\"\ntask_mode: \"train\" # \"train\" or \"evaluate\" - \"train\" will do training when evaluating, \"evaluate\" will NOT do training\n\ntrain_iterations: 400\neval_iterations: 1000\n"
  },
  {
    "path": "scripts/config/multimachine/worker.yaml",
    "content": "defaults:\n  - default\n  - _self_\n\n# parallel\nparallel: \"worker\" # \"single\" or \"host\" or \"worker\"\nrun_name: 'general-digirl-off2on-worker'\nsave_path: '/home/<username>/logs/worker'\n\n# training\ntrain_algorithm: \"digirl\" # \"digirl\" of \"filteredbc\"\ntrain_mode: \"online\" # \"offline\" or \"online\" or \"off2on\"\ntask_mode: \"train\" # \"train\" or \"evaluate\" - \"train\" will do training when evaluating, \"evaluate\" will NOT do training\n\ntrain_iterations: 1\neval_iterations: 0\n"
  },
  {
    "path": "scripts/run.py",
    "content": "import transformers\nfrom tqdm import tqdm\nfrom digirl.environment import BatchedAndroidEnv\nfrom digirl.models import AutoUIAgent, CogAgent\nfrom digirl.algorithms import offpolicy_train_loop, eval_loop, worker_collect_loop\nfrom digirl.misc import colorful_print\nfrom digirl.environment.android import EndResultEvaluator\nfrom digirl.environment.android import autoui_translate_action, cogagent_translate_action\nimport torch.nn as nn\nimport numpy as np \nimport wandb\nfrom omegaconf import DictConfig, OmegaConf\nimport os\nimport hydra\nfrom accelerate import Accelerator\nfrom datetime import timedelta\nfrom accelerate import DistributedDataParallelKwargs, InitProcessGroupKwargs\ntransformers.logging.set_verbosity_error()\n\nimport torch.distributed as dist\nimport datetime\n\ndef load_task_file(assets_path, task_set, task_split):\n    all_tasks = []\n    with open(os.path.join(assets_path, task_set + \"_\" + task_split + \".txt\")) as fb: \n        for line in fb:\n            all_tasks.append(line.replace(\"\\n\", \"\"))\n    return all_tasks\n\n@hydra.main(version_base=None, config_path=None, config_name=None)\ndef main(config: \"DictConfig\"):\n    colorful_print(OmegaConf.to_yaml(config), fg='red')\n    try:\n        from huggingface_hub import login\n        login(token=config.huggingface_token)\n    except:\n        print(\">>> Huggingface token not found.\")\n\n    ddp_kwargs = DistributedDataParallelKwargs(find_unused_parameters=True)\n    accelerator = Accelerator(InitProcessGroupKwargs(timeout=timedelta(minutes=40)), kwargs_handlers=[ddp_kwargs], project_dir = config.save_path)\n    device = accelerator.device\n    env = None\n    if accelerator.is_main_process:\n        # load environment\n        all_tasks = load_task_file(config.assets_path, config.task_set, config.task_split)\n        bsize = config.bsize\n        base_port = 5554\n        evaluators = [EndResultEvaluator(config.gemini_key, config.task_set)] * bsize\n        assert len(evaluators) == bsize\n        if config.agent_name == \"autoui\":\n            translate_action = autoui_translate_action\n            use_feature_extractor = True\n        elif config.agent_name == \"cogagent\":\n            translate_action = cogagent_translate_action\n            use_feature_extractor = False\n    decode_f = lambda x:x\n    if config.task_mode != \"evaluate\":\n        assert config.agent_name == \"autoui\", \"Only AutoUI agent is supported for training\"\n        colorful_print(\">>> Agent: AutoUI\", fg='blue')\n        colorful_print(\">>> Training algorithm: \"+config.train_algorithm, fg='blue')\n        colorful_print(\">>> Training mode: \"+config.train_mode, fg='blue')\n    else:\n        colorful_print(\">>> Agent: \"+config.agent_name, fg='blue')\n        colorful_print(\">>> Evauation mode\", fg='blue')\n    \n    if config.agent_name == \"autoui\":\n        agent = AutoUIAgent(device=device, accelerator=accelerator, \n                            temperature=config.temperature, do_sample=config.do_sample, \n                            policy_lm=config.policy_lm, critic_lm=config.critic_lm,\n                            cache_dir=config.cache_dir, max_new_tokens=config.max_new_tokens)\n        tokenizer = agent.tokenizer\n    elif config.agent_name == \"cogagent\":\n        agent = CogAgent(url=config.cogagent_url)\n        tokenizer = None\n    else:\n        raise NotImplementedError(\"Only AutoUI agent is supported for now\")\n\n    if config.use_wandb and accelerator.is_main_process:\n        wandb.login(key=config.wandb_key)\n        wandb.init(project=config.project_name, entity=config.entity_name, name=config.run_name, config=dict(config))\n\n    # this bunch of code should handle these functions:\n    # |-- autoui\n    #   |-- online train (eval in the end)\n    #   |-- offline train (eval in the end)\n    #   |-- off2on train (eval in the end)\n    #   |-- eval-only\n    # |-- cogagent (eval only)\n    # |-- set-of-marks (eval only)\n    # |-- appagent (eval only)\n\n    def construct_env(sample_mode):\n        env = BatchedAndroidEnv(avd_name=\"test_Android\", \n            cache_avd_names=[f\"test{i}\" for i in range(1,1+bsize)], \n            android_avd_home=config.android_avd_home,\n            emulator_path=config.emulator_path, \n            adb_path=config.adb_path, \n            udids = [f\"emulator-{base_port+2*i}\" for i in range(bsize)],\n            max_steps=config.max_steps-1, # will have 1 dangling step after stop signal is triggered\n            appium_base_port = base_port+1098,\n            run_headless=True, \n            use_feature_extractor=use_feature_extractor, \n            device=accelerator.device,\n            translate_action=translate_action,\n            evaluators=evaluators,\n            temp_path = os.path.join(config.save_path, \"images\"),\n            save_images=True,\n            all_tasks=all_tasks,\n            task_split=config.task_split,\n            sample_mode=sample_mode,\n            record=config.record,\n        )\n        return env\n\n    # autoui will be trained first then evaluated\n    if config.parallel in [\"single\", \"host\"]:\n        if config.agent_name == \"cogagent\" or config.task_mode == \"evaluate\":\n            if accelerator.is_main_process:\n                env = construct_env(sample_mode=config.eval_sample_mode)\n            eval_loop(env = env,\n                        tokenizer=tokenizer,\n                        agent = agent,\n                        accelerator = accelerator,\n                        decode_f=decode_f,\n                        **config)\n        elif config.agent_name == \"autoui\":\n            if accelerator.is_main_process:\n                env = construct_env(sample_mode=\"random\")\n            offpolicy_train_loop(env = env,\n                    tokenizer=tokenizer,\n                    agent = agent,\n                    accelerator = accelerator,\n                    decode_f=decode_f,\n                    **config)\n                \n            # always do eval after training (unless this is only a worker machine to collect trajectories)\n            if accelerator.is_main_process:\n                env = construct_env(sample_mode=config.eval_sample_mode)\n            eval_loop(env = env,\n                        tokenizer=tokenizer,\n                        agent = agent,\n                        accelerator = accelerator,\n                        decode_f=decode_f,\n                        **config)\n\n    elif config.parallel == \"worker\":\n        if accelerator.is_main_process:\n            env = construct_env(sample_mode=\"random\")\n        worker_collect_loop(env = env,\n                            agent = agent,\n                            tokenizer=tokenizer,\n                            accelerator = accelerator,\n                            decode_f=decode_f,\n                            **config)\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "setup.py",
    "content": "import setuptools\n\n# if sys.version_info < (3, 7):\n#     sys.exit('Python>=3.7 is required by digirl.')\n\nsetuptools.setup(\n    name=\"digirl\",\n    version='0.1.0',\n    url=\"https://github.com/DigiRL-agent/digirl\",\n    author=(\"Hao Bai\"),\n    description=\"Research code for digirl\",\n    long_description=open(\"README.md\", \"r\", encoding='utf-8').read(),\n    long_description_content_type=\"text/markdown\",\n    keywords='digirl',\n    license='MIT',\n    packages=setuptools.find_packages(),\n    install_requires=open(\"requirements.txt\", \"r\").read().split(),\n    include_package_data=True,\n    python_requires='>=3.9',\n    classifiers=[\n        'Intended Audience :: Science/Research',\n        'License :: OSI Approved :: MIT License',\n        'Programming Language :: Python :: 3',\n        'Topic :: Scientific/Engineering :: Artificial Intelligence',\n    ],\n)"
  }
]