Repository: FoundationAgents/ReCode Branch: main Commit: 6e7223f71281 Files: 74 Total size: 16.3 MB Directory structure: gitextract_bghxqcon/ ├── .gitignore ├── LICENSE ├── README.md ├── agents/ │ └── recode/ │ ├── agent.py │ ├── resources/ │ │ ├── fewshots/ │ │ │ ├── alfworld/ │ │ │ │ ├── clean.txt │ │ │ │ ├── cool.txt │ │ │ │ ├── examine.txt │ │ │ │ ├── heat.txt │ │ │ │ ├── put.txt │ │ │ │ └── puttwo.txt │ │ │ ├── sciworld/ │ │ │ │ └── base.txt │ │ │ └── webshop/ │ │ │ └── base.txt │ │ └── prompts/ │ │ ├── alfworld/ │ │ │ └── actions.txt │ │ ├── default_new.py │ │ ├── sciworld/ │ │ │ └── actions.txt │ │ └── webshop/ │ │ └── actions.txt │ └── utils.py ├── base/ │ ├── agent.py │ └── environment.py ├── configs/ │ ├── prices.json │ └── profiles_example.yaml ├── envs/ │ ├── alfworld/ │ │ ├── base_config.yaml │ │ └── env.py │ ├── sciworld/ │ │ ├── base_config.yaml │ │ ├── data/ │ │ │ ├── max_steps.json │ │ │ ├── taskname2id.json │ │ │ ├── test_indices.json │ │ │ ├── train_indices.json │ │ │ └── valid_indices.json │ │ └── env.py │ └── webshop/ │ ├── env.py │ ├── setup.py │ ├── setup.sh │ └── src/ │ └── webshop/ │ ├── __init__.py │ ├── run_envs/ │ │ ├── run_web_agent_site_env.py │ │ └── run_web_agent_text_env.py │ ├── search_engine/ │ │ └── lucene_searcher.py │ ├── transfer/ │ │ ├── README.md │ │ ├── __init__.py │ │ ├── app.py │ │ ├── predict_help.py │ │ └── webshop_lite.py │ └── web_agent_site/ │ ├── __init__.py │ ├── app.py │ ├── attributes/ │ │ ├── annotate.py │ │ └── generate_attrs.py │ ├── engine/ │ │ ├── __init__.py │ │ ├── engine.py │ │ ├── goal.py │ │ └── normalize.py │ ├── envs/ │ │ ├── __init__.py │ │ ├── chromedriver │ │ ├── web_agent_site_env.py │ │ └── web_agent_text_env.py │ ├── models/ │ │ ├── __init__.py │ │ └── models.py │ ├── static/ │ │ └── style.css │ ├── templates/ │ │ ├── attributes_page.html │ │ ├── description_page.html │ │ ├── done_page.html │ │ ├── features_page.html │ │ ├── item_page.html │ │ ├── results_page.html │ │ ├── review_page.html │ │ └── search_page.html │ └── utils.py ├── requirements.txt ├── run.py └── utils/ ├── common.py ├── errors.py ├── executor.py ├── llm.py ├── logger.py └── mockllm.py ================================================ FILE CONTENTS ================================================ ================================================ FILE: .gitignore ================================================ .vscode/ __pycache__/ .DS_Store *.pyc *.zip logs/ envs/webshop/data/ envs/webshop/search_index/ envs/webshop/data.zip envs/webshop/indexes.zip profiles.yaml ================================================ FILE: LICENSE ================================================ MIT License Copyright (c) 2026 Foundation Agents Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ================================================ FILE: README.md ================================================ # ReCode: Unify Plan and Action for Universal Granularity Control [![Arxiv](https://img.shields.io/badge/2510.23564-arXiv-red)](https://arxiv.org/abs/2510.23564) > If you encounter any difficulties in using or reproducing the code, please contact me at [zhaoyangyu713@gmail.com](mailto:zhaoyangyu713@gmail.com). ReCode introduces recursive code generation for LLM agents, unifying plan and action into a single representation. By treating high-level plans as placeholder functions that recursively decompose into executable primitives, it achieves universal granularity control and dynamically adapts from strategic thinking to concrete actions. This repository hosts the reference implementation used in the paper, along with environment wrappers and experiment tooling.

A comparison of LLM-based agent decision-making paradigms

## Core Idea ReCode adopts a divide-and-conquer strategy, decomposing complex tasks into executable code fragments: 1. **Tree-structured code**: Organizes partial programs in a tree where each node captures one sub-task and records its execution trace. 2. **Recursive expansion**: Placeholder functions are expanded by the LLM into more specific calls or smaller subroutines using environment-specific prompts and few-shots. 3. **Dynamic execution loop**: Each node is executed immediately; fresh observations decide whether to expand further, retry, or finish. 4. **Shared executor state**: A constrained Python executor maintains environment variables, validates code blocks, and exposes the toolset available to the agent.

An overview of ReCode

## Repository Layout - `run.py` – CLI entry point that instantiates agents/envs, manages concurrency, and writes run summaries. - `agents/recode/` – ReCode agent implementation, prompt templates, and utility helpers. - `envs/` – Environment wrappers and assets for `alfworld`, `webshop`, and `sciworld`. - `configs/` – LLM profile templates and (expected) pricing metadata used by the async client. - `utils/` – Shared components: async OpenAI wrapper, constrained executor, logging helpers, error types. - `figures/` – Paper figures used throughout this README. ## Experiments To evaluate the effectiveness of ReCode, we divide our experiments into the inference part and the training part. 1. **Inference Result**: we compare against several mainstream paradigm (ReAct, CodeAct) and some of the work focused on improving LLM-based agent planning (AdaPlanner and ADaPT). ReCode achieved significant performance improvements across all three environments, with an average score of 60.8, surpassing the best baseline method by 10.5 (relative 20.9%). _With our tests, ReCode can achieve a perfect **100** score in ALFWorld under `claude-4-sonnet`._

Inference performance across environments

2. **Training Result**: we conduct supervised fine-tuning (SFT) on ReCode, ReAct and CodeAct with `Qwen2.5-7B-Instruct`. ReCode+SFT delivers an impressive average performance of 70.4% across all environments, outperforming both ReAct+SFT (67.6%) and CodeAct+SFT (55.8%), highlighting its exceptional data efficiency.

SFT performance across environments

SFT performance across environments

## Quick Start To run ReCode, we need a conda environment. The python version should be 3.10 or newer. Then, it is necessary to configure dependencies for three environments (it has not been confirmed whether conflicts will arise in the same environment), and we suggest configuring them in three separate environments. ```bash conda create -n recode-envname python=3.10 # Replace "envname" with the your environment name. conda activate recode-envname ``` --- ### ALFWorld - Follow the [ALFWorld instructions](https://github.com/alfworld/alfworld). - Set `ALFWORLD_DATA` to the dataset root or edit `envs/alfworld/base_config.yaml` to point to your local paths: ```bash export ALFWORLD_DATA=/path/to/alfworld ``` ### ScienceWorld - Follow the instruciton from the [ScienceWorld repository](https://github.com/allenai/ScienceWorld). ### WebShop Thanks to [ETO ](https://github.com/Yifan-Song793/ETO) for providing a convenient script to configure WebShop environment. ```bash cd envs/webshop pip install -e . conda install -y -c conda-forge openjdk=11 pip install "en_core_web_lg @ https://github.com/explosion/spacy-models/releases/download/en_core_web_lg-3.6.0/en_core_web_lg-3.6.0-py3-none-any.whl" ``` Run the provided helper to fetch the goal set and pre-built search index: ```bash # The current path is "envs/webshop" bash setup.sh ``` --- Install some other dependencies. ```bash pip install -r requirements.txt # Here may not be complete, please contact me promptly if you encounter any problems ``` Ensure `configs/profiles.yaml` points to a valid API credential (copy `configs/profiles_example.yaml` if you need a template), then run a short dry run in any enabled environment: ```bash python run.py -a recode -e alfworld -n 1 --split test --profile default ``` Replace `alfworld` with `webshop` or `sciworld` once their assets are available. Logs are written to `logs//`, and the console prints a condensed summary for quick diagnostics. ## Configure LLM Access - `configs/profiles.yaml` contains named profiles. The `run.py --profile` flag selects which profile to forward to `AsyncLLM`. Example: ```yaml models: default: api_key: "sk-your_api_key" base_url: "https://api.openai.com/v1" model: "gpt-4o-mini" temperature: 0.0 track_costs: true gpt-4o: api_key: "sk-your_other_key" base_url: "https://api.openai.com/v1" model: "gpt-4o" temperature: 0.7 max_tokens: 512 ``` - Cost tracking loads `configs/prices.json`. If you do not want to record costs, set `track_costs: false` for the profile. - As a fallback, you can omit the file and set `OPENAI_API_KEY` in the environment; the default profile will then use it. - A ready-to-edit template lives at `configs/profiles_example.yaml`; copy it to `configs/profiles.yaml` if you're starting from scratch: ```bash cp configs/profiles_example.yaml configs/profiles.yaml ``` ## Running ReCode `run.py` is the canonical entry point. It resolves agent/environment aliases, manages concurrency, streams logs, and emits a structured summary. ```bash # ALFWorld, single instance python run.py -a recode -e alfworld -n 1 --split test --profile default # WebShop, 3 test goals, allow deeper recursion python run.py -a recode -e webshop -n 3 --split test --profile default --max-depth 12 # ScienceWorld, run 5 instances with 2-way concurrency python run.py -a recode -e sciworld -n 5 -c 2 --profile gpt-4o ``` Key CLI flags: - `-a / --agent` – class path or alias (`recode` resolves to `agents.recode.agent.ReCodeAgent`). - `-e / --env` – environment class or alias (`alfworld`, `webshop`, `sciworld`). - `-n / --instances` – number of evaluation episodes. - `-c / --concurrent` – max concurrent episodes (rich progress UI automatically adapts). - `--split`, `--seed`, `--max-depth`, `--profile` – forwarded to both agent and environment. - `-C / --config` – YAML file whose keys override CLI flags; useful for complex sweeps. Example YAML (`configs/example.yaml`): ```yaml agent: recode env: alfworld instances: 10 concurrent: 2 profile: gpt-4o split: test task_types: ["put", "clean"] # For ALFWorld max_depth: 12 max_retry: 4 ``` Run it with: ```bash python run.py -C configs/example.yaml ``` ## Logging & Results - Each run creates `logs//` with: - `running_logs/run.log` – aggregated stream of agent + environment logs. - `running_logs/instance_.log` – per-instance traces (when multiple instances are launched). - `` – structured summary written by `write_summary`, containing per-instance metrics and aggregated statistics (overall + per task type). - The console prints a condensed summary (success rate, standard metrics, by-task breakdown) after completion. ## Extending to New Environments 1. **Implement the `Env` interface** under `envs//env.py`. Use `base.environment.Env` as the contract: implement `reset`, `_run`, `is_done`, `is_success`, and `report`. Return `{"observations": [...], "env_name": , "env": self}` from `reset`. 2. **Expose prompts and guidance** in `agents/recode/resources/`: - `prompts//actions.txt` – concise description of valid `run("...")` calls/tools. - `fewshots//` – one or more `.txt` examples showing thought→execute patterns. - If your environment has task types, update `agents/recode/agent.py::_load_resources` and `agents/recode/utils.parse_raw_observation` to parse initial observations correctly. 3. **Register aliases** by adding your class to `ENV_ALIASES` in `run.py` (optional but convenient) and, if needed, plan-specific logic in the agent utilities. 4. Optionally add setup scripts (similar to `envs/webshop/setup.sh`) to document dataset fetching. ## Programmatic Use You can embed the agent directly inside your own loop by reusing the provided utilities: ```python import asyncio from agents.recode.agent import ReCodeAgent from envs.alfworld.env import AlfworldEnv async def solve_once(): config = {"split": "test", "task_types": ["put"], "max_depth": 10} env = AlfworldEnv(logger=None) agent = ReCodeAgent() init_info = env.reset(config) agent.reset(config, init_info) observations = init_info["observations"] while not env.is_done(): actions = await agent.act(observations) observations = await env.run(actions) print(env.report()) await env.close() asyncio.run(solve_once()) ``` The same pattern works for any `Env` implementation; be sure to pass a logger if you need file-backed traces. ## Citation ``` @misc{yu2025recodeunifyplanaction, title={ReCode: Unify Plan and Action for Universal Granularity Control}, author={Zhaoyang Yu and Jiayi Zhang and Huixue Su and Yufan Zhao and Yifan Wu and Mingyi Deng and Jinyu Xiang and Yizhang Lin and Lingxiao Tang and Yingchao Li and Yuyu Luo and Bang Liu and Chenglin Wu}, year={2025}, eprint={2510.23564}, archivePrefix={arXiv}, primaryClass={cs.AI}, url={https://arxiv.org/abs/2510.23564}, } ``` ================================================ FILE: agents/recode/agent.py ================================================ from __future__ import annotations from pathlib import Path from enum import Enum from typing import List, Optional from datetime import datetime, timezone from base.agent import Agent from utils.llm import AsyncLLM from utils.executor import Executor from utils.common import parse_xml_tag from agents.recode.resources.prompts.default_new import EXPAND_PROMPT from agents.recode.utils import ( parse_raw_observation, split_blocks, validate_blocks, NodeStatus, CodeNode, get_variables, ) DEFAULT_MAX_DEPTH = 10 DEFAULT_MAX_RETRY = 5 DEFAULT_MAX_REWRITE = 5 class ReCodeAgent(Agent): def __init__( self, logger=None, task_type: str = None, ) -> None: self.logger = logger self.llm = AsyncLLM() self.executor = Executor(if_run_print=True) self.root: Optional[CodeNode] = None self.current_node: Optional[CodeNode] = None self.previous_node: Optional[CodeNode] = None self.task_type: str = task_type self.is_start = False def reset(self, running_config: dict, init_info: dict=None) -> None: self.root = None self.current_node = None self.previous_node = None self.is_start = False self.max_depth: int = running_config.get('max_depth') or DEFAULT_MAX_DEPTH self.max_retry: int = running_config.get('max_retry') or DEFAULT_MAX_RETRY self.max_rewrite: int = running_config.get('max_rewrite') or DEFAULT_MAX_REWRITE if init_info and 'task_type' in init_info and init_info['task_type']: self.task_type = init_info['task_type'].lower() elif 'task_type' in running_config: self.task_type = running_config['task_type'].lower() if "profile" in running_config and running_config['profile']: self.logger.info(f"Using profile: {running_config['profile']}") self.llm = AsyncLLM(running_config['profile']) assert 'env_name' in init_info, "Envrioment must be specified" self.env_name = init_info['env_name'] if self.env_name == "alfworld": self.logger.info("Setting max steps to 80") init_info['env'].set_max_steps(80) self.executor.set_env(init_info['env']) self._load_resources() def _load_resources(self): resources_path = Path("agents/recode/resources/prompts") / self.env_name self.available_actions = open(resources_path / "actions.txt", "r").read() fewshots_path = Path("agents/recode/resources/fewshots") / self.env_name if self.env_name == "alfworld": self.fewshots = open(fewshots_path / f"{self.task_type}.txt", "r").read() elif self.env_name == "webshop": self.fewshots = open(fewshots_path / "base.txt", "r").read() # self.fewshots = "(No Examples)" elif self.env_name == "sciworld": self.fewshots = open(fewshots_path / "base.txt", "r").read() else: raise ValueError(f"Unsupported environment in _load_resources: {self.env_name}") async def act(self, observations: List[str]) -> List[str]: if not self.is_start: assert len(observations) == 1, "Only one observation is allowed for the first node" self._init_code_tree(observations[0]) self.is_start = True if self.current_node.status == NodeStatus.STUB: await self._handle_stub() elif self.current_node.status == NodeStatus.ERROR: return ["[FINISH]"] if not self.current_node: return ["[FINISH]"] self.logger.info(f"[Execute]\n{self.current_node.code}") result = self._execute(self.current_node.code) self.current_node.observations.extend(result["stdout"]) if result["stdout"] else None self.logger.info(f"[Exec Result]\n{result}") if result["success"]: self.logger.info(f"[Execution Stdout] {result['stdout']}") self.current_node.status = NodeStatus.COMPLETED self.previous_node = self.current_node self.current_node = self.current_node.next() if not self.current_node: return ["[FINISH]"] else: if "NeedExpansion" in result["error"]: self.current_node.status = NodeStatus.STUB else: self.current_node.status = NodeStatus.ERROR self.current_node.error = result["error"] async def _handle_stub(self) -> None: if self.current_node and self.current_node.depth >= self.max_depth: if self.logger: self.logger.warning("Max depth reached - terminating.") self.current_node = None return new_blocks = await self._expand() self.logger.info("[NEW_BLOCKS]\n" + "\n".join(new_blocks)) if new_blocks else None if self.current_node: if new_blocks is None: self.current_node = None return if new_blocks: for block in new_blocks: child_node = CodeNode(code=block, parent=self.current_node) self.current_node.children.append(child_node) else: self.current_node.status = NodeStatus.SKIP self.current_node = self.current_node.next() async def _expand(self) -> Optional[List[str]]: attempt = 0 retry_hint_added = False while True: user_prompt = self._build_expand_prompt() if retry_hint_added: user_prompt += ( "\n\n[Important] Your previous expansion produced syntactically invalid code and/or included disallowed constructs (e.g., def/async def). " "Strictly follow the rules: output a single valid Python code block, and do not use def or async def." ) if self.logger: self.logger.info("[LLM_IN]\n" + user_prompt) response, _cost = await self.llm(user_prompt) if self.logger: self.logger.info("[LLM_OUT]\n" + response.strip()) thought = parse_xml_tag(response, "think").strip() self.current_node.thought = thought expanded_code = parse_xml_tag(response, "execute").strip() try: blocks = split_blocks(expanded_code) validate_blocks(blocks) return blocks except (SyntaxError, ValueError) as e: attempt += 1 retry_hint_added = True if attempt >= self.max_rewrite: if self.logger: self.logger.info( f"[STOP] Reached max re-expands ({self.max_rewrite}). Last error: {e}. Ending episode." ) return None if self.logger: self.logger.info( f"[RE-EXPAND {attempt}/{self.max_rewrite}] Split/validation failed due to: {e}. Re-asking EXPAND..." ) def _execute(self, code: str) -> dict: return self.executor.execute(code) def _init_code_tree(self, observation: str) -> None: self.logger.info(f"[OBSERVATIONS]\n{observation}") initial_observation, instruction = parse_raw_observation(observation, self.env_name) self.executor.set_var('observation', initial_observation) self.executor.set_var('instruction', instruction) self.root = CodeNode(code=f"solve(instruction, observation)") self.current_node = self.root def _build_expand_prompt(self) -> str: # available_actions, examples, task, variables examples = self.fewshots if self.fewshots else "(No Examples)" task = self.current_node.code variables = get_variables(self.executor, self.current_node.code) variables = variables if variables else "(No Variables)" return EXPAND_PROMPT.format(available_actions=self.available_actions, examples=examples, task=task, variables=variables) def _get_max_depth(self, node: Optional[CodeNode]) -> int: if node is None: return 0 max_depth = node.depth for child in node.children: child_max = self._get_max_depth(child) if child_max > max_depth: max_depth = child_max return max_depth def _get_formatted_tree(self) -> dict: version = "recode.plan.v1" meta = { "env_name": getattr(self, "env_name", None), "task_type": getattr(self, "task_type", None), "created_at": datetime.now(timezone.utc).isoformat(), "max_depth": getattr(self, "max_depth", None), "max_retry": getattr(self, "max_retry", None), "max_rewrite": getattr(self, "max_rewrite", None), } nodes = {} edges = [] root_id = self.root.id if self.root else None if self.root: stack = [self.root] while stack: node = stack.pop() nodes[node.id] = { "code": node.code, "thought": getattr(node, "thought", None), "status": node.status.value if isinstance(node.status, Enum) else node.status, "depth": node.depth, "observations": list(node.observations) if node.observations else [], "error": node.error, } for child in node.children: edges.append([node.id, child.id]) # Preserve order by pushing children in reverse for DFS for child in reversed(node.children): stack.append(child) return { "version": version, "meta": meta, "root_id": root_id, "nodes": nodes, "edges": edges, } def report(self) -> dict: return { 'cost': self.llm.spent, 'tree': self._get_formatted_tree(), 'max_depth': self._get_max_depth(self.root) } ================================================ FILE: agents/recode/resources/fewshots/alfworld/clean.txt ================================================ [Input] The current function to expand is: solve(instruction, observation) The variables you can use is: - instruction (str): Your task is to: clean some apple and put it in sidetable. - observation (str): You are in the middle of a room. Looking quickly around you, you see a cabinet 4, a cabinet 3, a cabinet 2, a cabinet 1, a coffeemachine 1, a countertop 1, a diningtable 3, a diningtable 2, a diningtable 1, a drawer 1, a fridge 1, a garbagecan 1, a microwave 1, a sidetable 1, a sinkbasin 1, a stoveburner 4, a stoveburner 3, a stoveburner 2, a stoveburner 1, and a toaster 1. [Output] I first characterize the task: the object is an apple that must be cleaned (so the cleaning station is the only sinkbasin present) and finally placed on the sidetable. The initial observation provides a single `sinkbasin 1` and `sidetable 1`, so I persist both IDs. I also parse and persist all location IDs for later search. The expansion should be decomposed into three concrete phases: (1) acquire an apple by searching openable and non-openable receptacles; handle closures by opening when needed; once found, take the apple by its exact ID; (2) clean it strictly at the sinkbasin; (3) place it on the target surface, ensuring it is reachable (surfaces are non-openable, so navigation suffices). Typical mistakes to avoid include assuming an object ID without extracting it from observations, forgetting to open a closed container before taking, or trying to clean anywhere other than the sinkbasin. cleaning_location_ID = 'sinkbasin 1' target_location_ID = 'sidetable 1' obj = 'apple' all_location_IDs = re.findall(r'[a-z]+(?:[a-z]+)? \d+', observation) obj_ID = find_and_take(obj, all_location_IDs) clean_with(obj_ID, cleaning_location_ID) put_in(obj_ID, target_location_ID) [Input] The current function to expand is: solve(instruction, observation) The variables you can use is: - instruction (str): Your task is to: put a clean lettuce in diningtable. - observation (str): You are in the middle of a room. Looking quickly around you, you see a cabinet 13, a cabinet 12, a cabinet 11, a cabinet 10, a cabinet 9, a cabinet 8, a cabinet 7, a cabinet 6, a cabinet 5, a cabinet 4, a cabinet 3, a cabinet 2, a cabinet 1, a coffeemachine 1, a countertop 1, a diningtable 1, a drawer 1, a fridge 1, a garbagecan 1, a microwave 1, a shelf 3, a shelf 2, a shelf 1, a sinkbasin 1, a stoveburner 4, a stoveburner 3, a stoveburner 2, a stoveburner 1, and a toaster 1. [Output] The required object is lettuce, and the final location is the dining table. The cleaning step is mandatory and must use the unique `sinkbasin 1`. I will persist `sinkbasin 1` as the cleaning site and `diningtable 1` as the goal. I will also build `all_location_IDs` from the initial observation. The decomposition mirrors the previous case: systematically search and take a lettuce, clean it at the sink, then navigate to the dining table and place the item. I must be careful to open any closed storage (e.g., cabinet, drawer, fridge, microwave) before taking and avoid using move to a closed container. Finally, I will ensure the regex extracts the exact lettuce ID from the observation. cleaning_location_ID = 'sinkbasin 1' target_location_ID = 'diningtable 1' obj = 'lettuce' all_location_IDs = re.findall(r'[a-z]+(?:[a-z]+)? \d+', observation) obj_ID = find_and_take(obj, all_location_IDs) clean_with(obj_ID, cleaning_location_ID) put_in(obj_ID, target_location_ID) [Input] The current function to expand is: obj_ID = find_and_take(obj, all_location_IDs) The variables you can use is: - obj (str): apple - all_location_IDs (list[str]): ['cabinet 4', 'cabinet 3', 'cabinet 2', 'cabinet 1', 'coffeemachine 1', 'countertop 1', 'diningtable 3', 'diningtable 2', 'diningtable 1', 'drawer 1', 'fridge 1', 'garbagecan 1', 'microwave 1', 'sidetable 1', 'sinkbasin 1', 'stoveburner 4', 'stoveburner 3', 'stoveburner 2', 'stoveburner 1', 'toaster 1'] [Output] The goal is to navigate the room methodically to find any instance of the target object and pick it up. I will iterate through the known locations, always navigating first. If a location produces no new information, I will examine it. When a location is closed, I will open it before searching. Once I observe a concrete `{obj} k` mention, I will parse that exact ID and take it from the current location. Common pitfalls include not examining after navigation (missing items), failing to open a closed container, or taking without the full `obj_ID`. for location_ID in all_location_IDs: obs = run(f'go to {location_ID}') if 'Nothing happens.' in obs: obs = run(f'examine {location_ID}') location_closed = 'closed' in obs if location_closed: obs = run(f'open {location_ID}') if f" {obj} " in obs: # Prevent names from containing relationships obj_ID = re.search(rf'\b{obj} \d+\b', obs).group() run(f'take {obj_ID} from {location_ID}') break [Input] The current function to expand is: clean_with(obj_ID, cleaning_location_ID) The variables you can use is: - obj_ID (str): apple 1 - cleaning_location_ID (str): sinkbasin 1 [Output] Cleaning must be performed at the sinkbasin only. I will explicitly navigate to the recorded cleaning site and issue the cleaning command with the exact object ID. I do not need to open the sink; I only need to be there. A typical error is to attempt to clean somewhere else or to forget to navigate first. run(f'go to {cleaning_location_ID}') run(f'clean {obj_ID} with {cleaning_location_ID}') [Input] The current function to expand is: put_in(obj_ID, target_location_ID) The variables you can use is: - obj_ID (str): apple 1 - target_location_ID (str): sidetable 1 [Output] I will navigate to the target location, examine if nothing happens, determine if it is closed, and open if needed. Then I will move the object in. This logic supports both containers (openable) and surfaces (non-openable, but harmless to check). Pitfalls: using the wrong ID in `examine` or skipping the `location_closed` check. obs = run(f'go to {target_location_ID}') if 'Nothing happens.' in obs: obs = run(f'examine {target_location_ID}') location_closed = 'closed' in obs if location_closed: obs = run(f'open {target_location_ID}') run(f'move {obj_ID} to {target_location_ID}') ================================================ FILE: agents/recode/resources/fewshots/alfworld/cool.txt ================================================ [Input] The current function to expand is: solve(instruction, observation) The variables you can use is: - instruction (str): Your task is to: put a cool mug in shelf. - observation (str): You are in the middle of a room. Looking quickly around you, you see a cabinet 9, a cabinet 8, a cabinet 7, a cabinet 6, a cabinet 5, a cabinet 4, a cabinet 3, a cabinet 2, a cabinet 1, a coffeemachine 1, a countertop 3, a countertop 2, a countertop 1, a drawer 9, a drawer 8, a drawer 7, a drawer 6, a drawer 5, a drawer 4, a drawer 3, a drawer 2, a drawer 1, a fridge 1, a garbagecan 1, a microwave 1, a shelf 3, a shelf 2, a shelf 1, a sinkbasin 1, a stoveburner 4, a stoveburner 3, a stoveburner 2, a stoveburner 1, and a toaster 1. [Output] The task requires cooling, which must be done with the only fridge present (`fridge 1`). The final target is a shelf, and there are three shelves; I will choose `shelf 1` as an explicit target (the instruction’s phrasing implies a shelf; if a specific shelf is required, the upstream node should set it). I persist both IDs. I also create `all_location_IDs` from the initial observation. The plan decomposes into (1) find and take a mug by iterating known locations, handling closed receptacles via `open` and re-`examine`; (2) cool the mug at the fridge after ensuring access (navigate and open if closed); (3) place the mug on the chosen shelf (surfaces don’t need opening). Pitfalls: using the sink/microwave by mistake, assuming an object ID instead of extracting it, forgetting to re-`examine` after opening, or trying to place into a closed target. import random cooling_location_ID = 'fridge 1' target_location_ID = 'shelf 1' obj = 'mug' all_location_IDs = re.findall(r'[a-z]+(?:[a-z]+)? \d+', observation) random.shuffle(all_location_IDs) obj_ID = find_and_take(obj, all_location_IDs) cool_with(obj_ID, cooling_location_ID) put_in(obj_ID, target_location_ID) [Input] The current function to expand is: solve(instruction, observation) The variables you can use is: - instruction (str): Your task is to: cool some pan and put it in stoveburner. - observation (str): You are in the middle of a room. Looking quickly around you, you see a cabinet 16, a cabinet 15, a cabinet 14, a cabinet 13, a cabinet 12, a cabinet 11, a cabinet 10, a cabinet 9, a cabinet 8, a cabinet 7, a cabinet 6, a cabinet 5, a cabinet 4, a cabinet 3, a cabinet 2, a cabinet 1, a coffeemachine 1, a countertop 2, a countertop 1, a diningtable 1, a drawer 5, a drawer 4, a drawer 3, a drawer 2, a drawer 1, a fridge 1, a garbagecan 1, a microwave 1, a safe 1, a sinkbasin 1, a stoveburner 4, a stoveburner 3, a stoveburner 2, a stoveburner 1, and a toaster 1. [Output] Cooling requires the fridge (`fridge 1`). The final placement is a specific burner; I set `stoveburner 1` as the target for determinism (if the upstream plan wants another burner, it should pass it). I will assemble `all_location_IDs`. The plan mirrors the previous case: search-and-take a pan with ID extraction; cool it at the fridge after ensuring access; navigate to the target burner (a surface) and place it. Watch for a closed fridge and for the temptation to use the microwave. cooling_location_ID = 'fridge 1' target_location_ID = 'stoveburner 1' obj = 'pan' all_location_IDs = re.findall(r'[a-z]+(?:[a-z]+)? \d+', observation) obj_ID = find_and_take(obj, all_location_IDs) cool_with(obj_ID, cooling_location_ID) put_in(obj_ID, target_location_ID) [Input] The current function to expand is: obj_ID = find_and_take(obj, all_location_IDs) The variables you can use is: - obj (str): apple - all_location_IDs (list[str]): ['cabinet 4', 'cabinet 3', 'cabinet 2', 'cabinet 1', 'coffeemachine 1', 'countertop 1', 'diningtable 3', 'diningtable 2', 'diningtable 1', 'drawer 1', 'fridge 1', 'garbagecan 1', 'microwave 1', 'sidetable 1', 'sinkbasin 1', 'stoveburner 4', 'stoveburner 3', 'stoveburner 2', 'stoveburner 1', 'toaster 1'] [Output] The goal is to navigate the room methodically to find any instance of the target object and pick it up. I will iterate through the known locations, always navigating first. If a location produces no new information, I will examine it. When a location is closed, I will open it before searching. Once I observe a concrete `{obj} k` mention, I will parse that exact ID and take it from the current location. Common pitfalls include not examining after navigation (missing items), failing to open a closed container, or taking without the full `obj_ID`. for location_ID in all_location_IDs: obs = run(f'go to {location_ID}') if 'Nothing happens.' in obs: obs = run(f'examine {location_ID}') location_closed = 'closed' in obs if location_closed: obs = run(f'open {location_ID}') if f" {obj} " in obs: # Prevent names from containing relationships obj_ID = re.search(rf'\b{obj} \d+\b', obs).group() run(f'take {obj_ID} from {location_ID}') break [Input] The current function to expand is: cool_with(obj_ID, cooling_location_ID) The variables you can use is: - obj_ID (str): apple 1 - cooling_location_ID (str): fridge 1 [Output] Cooling is only valid with a fridge. I will ensure access to the fridge: navigate, examine, and open if closed, because some interactions may require an openable container state. Then I will issue the cooling command with the exact object ID. Pitfalls: using the wrong station (sink/microwave) or skipping the open step when it is needed. run(f'go to {cooling_location_ID}') run(f'cool {obj_ID} with {cooling_location_ID}') [Input] The current function to expand is: put_in(obj_ID, target_location_ID) The variables you can use is: - obj_ID (str): apple 1 - target_location_ID (str): sidetable 1 [Output] I will navigate to the target location, examine if nothing happens, determine if it is closed, and open if needed. Then I will move the object in. This logic supports both containers (openable) and surfaces (non-openable, but harmless to check). Pitfalls: using the wrong ID in `examine` or skipping the `location_closed` check. obs = run(f'go to {target_location_ID}') if 'Nothing happens.' in obs: obs = run(f'examine {target_location_ID}') location_closed = 'closed' in obs if location_closed: obs = run(f'open {target_location_ID}') run(f'move {obj_ID} to {target_location_ID}') ================================================ FILE: agents/recode/resources/fewshots/alfworld/examine.txt ================================================ [Input] The current function to expand is: solve(instruction, observation) The variables you can use is: - instruction (str): Your task is to: look at bowl under the desklamp. - observation (str): You are in the middle of a room. Looking quickly around you, you see a bed 1, a desk 1, a drawer 3, a drawer 2, a drawer 1, a garbagecan 1, a safe 1, a shelf 5, a shelf 4, a shelf 3, a shelf 2, a shelf 1, a sidetable 2, and a sidetable 1. [Output] I can extract all location IDs from the initial observation. I need to look at a bowl under the desklamp. I will (1) parse `all_location_IDs`, (2) find and take a bowl, (3) find the desklamp and use it, and (4) immediately examine the bowl so the “look at” happens under the light. Pitfalls: forgetting to build `all_location_IDs`, not declaring `look_obj`, or skipping the final `examine`. obj = 'bowl' look_obj = 'desklamp' all_location_IDs = re.findall(r'[a-z]+(?:[a-z]+)? \d+', observation) obj_ID = find_and_take(obj, all_location_IDs) find_and_use_to_look_at(look_obj, obj_ID) [Input] The current function to expand is: solve(instruction, observation) The variables you can use is: - instruction (str): Your task is to: examine the pen with the desklamp. - observation (str): You are in the middle of a room. Looking quickly around you, you see a bed 1, a drawer 10, a drawer 9, a drawer 8, a drawer 7, a drawer 6, a drawer 5, a drawer 4, a drawer 3, a drawer 2, a drawer 1, a dresser 1, a garbagecan 1, a shelf 9, a shelf 8, a shelf 7, a shelf 6, a shelf 5, a shelf 4, a shelf 3, a shelf 2, and a shelf 1. [Output] I need the same pattern but for a pen. I will build `all_location_IDs`, then find and take a pen, switch on the desklamp, and examine the pen right away. Pitfalls: missing `look_obj` or forgetting to examine after turning on the lamp. obj = 'pen' look_obj = 'desklamp' all_location_IDs = re.findall(r'[a-z]+(?:[a-z]+)? \d+', observation) obj_ID = find_and_take(obj, all_location_IDs) find_and_use_to_look_at(look_obj, obj_ID) [Input] The current function to expand is: obj_ID = find_and_take(obj, all_location_IDs) The variables you can use is: - obj (str): apple - all_location_IDs (list[str]): ['cabinet 4', 'cabinet 3', 'cabinet 2', 'cabinet 1', 'coffeemachine 1', 'countertop 1', 'diningtable 3', 'diningtable 2', 'diningtable 1', 'drawer 1', 'fridge 1', 'garbagecan 1', 'microwave 1', 'sidetable 1', 'sinkbasin 1', 'stoveburner 4', 'stoveburner 3', 'stoveburner 2', 'stoveburner 1', 'toaster 1'] [Output] The goal is to navigate the room methodically to find any instance of the target object and pick it up. I will iterate through the known locations, always navigating first. If a location produces no new information, I will examine it. When a location is closed, I will open it before searching. Once I observe a concrete `{obj} k` mention, I will parse that exact ID and take it from the current location. Common pitfalls include not examining after navigation (missing items), failing to open a closed container, or taking without the full `obj_ID`. for location_ID in all_location_IDs: obs = run(f'go to {location_ID}') if 'Nothing happens.' in obs: obs = run(f'examine {location_ID}') location_closed = 'closed' in obs if location_closed: obs = run(f'open {location_ID}') if f" {obj} " in obs: # Prevent names from containing relationships obj_ID = re.search(rf'\b{obj} \d+\b', obs).group() run(f'take {obj_ID} from {location_ID}') break [Input] The current function to expand is: find_and_use_to_look_at(look_obj, obj_ID) The variables you can use is: - look_obj (str): desklamp - obj_ID (str): bowl 1 [Output] I will search for the desklamp just like any object: navigate, examine if needed, open if closed, and when I see it, extract its ID and use it. Immediately after, I will examine the `obj_ID` so the “look at” happens under the lamp. Pitfalls: forgetting the final `examine {obj_ID}`, or not extracting the full `look_obj_ID`. for location_ID in all_location_IDs: obs = run(f'go to {location_ID}') if 'Nothing happens.' in obs: obs = run(f'examine {location_ID}') location_closed = 'closed' in obs if location_closed: obs = run(f'open {location_ID}') if look_obj in obs: look_obj_ID = re.search(rf'\b{look_obj} \d+\b', obs).group() run(f'use {look_obj_ID}') break ================================================ FILE: agents/recode/resources/fewshots/alfworld/heat.txt ================================================ [Input] The current function to expand is: solve(instruction, observation) The variables you can use is: - instruction (str): Your task is to: heat some egg and put it in diningtable. - observation (str): You are in the middle of a room. Looking quickly around you, you see a cabinet 10, a cabinet 9, a cabinet 8, a cabinet 7, a cabinet 6, a cabinet 5, a cabinet 4, a cabinet 3, a cabinet 2, a cabinet 1, a coffeemachine 1, a countertop 3, a countertop 2, a countertop 1, a diningtable 1, a drawer 6, a drawer 5, a drawer 4, a drawer 3, a drawer 2, a drawer 1, a fridge 1, a garbagecan 1, a microwave 1, a sinkbasin 1, a stoveburner 4, a stoveburner 3, a stoveburner 2, a stoveburner 1, and a toaster 1. [Output] I must produce a heating plan and keep it high-level. Heating is only valid with the microwave (microwave 1). The final placement surface is `diningtable 1`. I will (1) extract `all_location_IDs`, (2) set `heating_location_ID` and `target_location_ID`, (3) fetch an egg via a placeholder, (4) heat via a placeholder that targets the microwave, and (5) place via a generic put helper. The main pitfalls are omitting the `heat_with(...)` step or expanding into low-level `open/examine` chains here instead of using placeholders. heating_location_ID = 'microwave 1' target_location_ID = 'diningtable 1' obj = 'egg' all_location_IDs = re.findall(r'[a-z]+(?:[a-z]+)? \d+', observation) obj_ID = find_and_take(obj, all_location_IDs) heat_with(obj_ID, heating_location_ID) put_in(obj_ID, target_location_ID) [Input] Now what you need to EXPAND is: put_a_hot_apple_in_fridge() The current function to expand is: solve(instruction, observation) The variables you can use is: - instruction (str): Your task is to: put a hot apple in fridge. - observation (str): You are in the middle of a room. Looking quickly around you, you see a cabinet 13, a cabinet 12, a cabinet 11, a cabinet 10, a cabinet 9, a cabinet 8, a cabinet 7, a cabinet 6, a cabinet 5, a cabinet 4, a cabinet 3, a cabinet 2, a cabinet 1, a coffeemachine 1, a countertop 1, a diningtable 1, a drawer 1, a fridge 1, a garbagecan 1, a microwave 1, a shelf 3, a shelf 2, a shelf 1, a sinkbasin 1, a stoveburner 4, a stoveburner 3, a stoveburner 2, a stoveburner 1, and a toaster 1. [Output] A “hot apple” is not pre-existing; I must heat an apple at microwave 1 then place it into fridge 1. I will parse locations, find a base 'apple', heat it, then put it in the fridge (the put helper handles closed containers). Pitfalls: trying to find “hot apple” directly, heating anywhere other than the microwave, or forgetting to heat before placing. heating_location_ID = 'microwave 1' target_location_ID = 'fridge 1' obj = 'apple' all_location_IDs = re.findall(r'[a-z]+(?:[a-z]+)? \d+', observation) obj_ID = find_and_take(obj, all_location_IDs) heat_with(obj_ID, heating_location_ID) put_in(obj_ID, target_location_ID) [Input] The current function to expand is: obj_ID = find_and_take(obj, all_location_IDs) The variables you can use is: - obj (str): apple - all_location_IDs (list[str]): ['cabinet 4', 'cabinet 3', 'cabinet 2', 'cabinet 1', 'coffeemachine 1', 'countertop 1', 'diningtable 3', 'diningtable 2', 'diningtable 1', 'drawer 1', 'fridge 1', 'garbagecan 1', 'microwave 1', 'sidetable 1', 'sinkbasin 1', 'stoveburner 4', 'stoveburner 3', 'stoveburner 2', 'stoveburner 1', 'toaster 1'] [Output] The goal is to navigate the room methodically to find any instance of the target object and pick it up. I will iterate through the known locations, always navigating first. If a location produces no new information, I will examine it. When a location is closed, I will open it before searching. Once I observe a concrete `{obj} k` mention, I will parse that exact ID and take it from the current location. Common pitfalls include not examining after navigation (missing items), failing to open a closed container, or taking without the full `obj_ID`. for location_ID in all_location_IDs: obs = run(f'go to {location_ID}') if 'Nothing happens.' in obs: obs = run(f'examine {location_ID}') location_closed = 'closed' in obs if location_closed: obs = run(f'open {location_ID}') if f" {obj} " in obs: # Prevent names from containing relationships obj_ID = re.search(rf'\b{obj} \d+\b', obs).group() run(f'take {obj_ID} from {location_ID}') break [Input] The current function to expand is: heat_with(obj_ID, heating_location_ID) The variables you can use is: - obj_ID (str): apple 1 - heating_location_ID (str): microwave 1 [Output] Heating is valid only with a microwave. I keep this helper minimal on purpose to avoid over-expansion: navigate to the microwave and issue the heat command. Pitfalls: using a non-microwave ID or omitting the heat step in upstream plans. run(f'go to {heating_location_ID}') run(f'heat {obj_ID} with {heating_location_ID}') [Input] The current function to expand is: put_in(obj_ID, target_location_ID) The variables you can use is: - obj_ID (str): apple 1 - target_location_ID (str): sidetable 1 [Output] I will navigate to the target location, examine if nothing happens, determine if it is closed, and open if needed. Then I will move the object in. This logic supports both containers (openable) and surfaces (non-openable, but harmless to check). Pitfalls: using the wrong ID in `examine` or skipping the `location_closed` check. obs = run(f'go to {target_location_ID}') if 'Nothing happens.' in obs: obs = run(f'examine {target_location_ID}') location_closed = 'closed' in obs if location_closed: obs = run(f'open {target_location_ID}') run(f'move {obj_ID} to {target_location_ID}') ================================================ FILE: agents/recode/resources/fewshots/alfworld/put.txt ================================================ [Input] The current function to expand is: solve(instruction, observation) The variables you can use is: - instruction (str): Your task is to: find some apple and put it in sidetable. - observation (str): You are in the middle of a room. Looking quickly around you, you see a cabinet 4, a cabinet 3, a cabinet 2, a cabinet 1, a coffeemachine 1, a countertop 1, a diningtable 3, a diningtable 2, a diningtable 1, a drawer 1, a fridge 1, a garbagecan 1, a microwave 1, a sidetable 1, a sinkbasin 1, a stoveburner 4, a stoveburner 3, a stoveburner 2, a stoveburner 1, and a toaster 1. [Output] This is a pure acquisition-and-place task. The target is `sidetable 1`, which is a surface and does not need to be opened. I will persist the target and parse all locations from the initial observation. The plan has two phases: (1) find and take an apple by iterating the remaining locations, opening closed storage as needed and extracting the exact ID; (2) navigate to the sidetable and move the apple onto it. Pitfalls: forgetting to open a closed container before taking, guessing the object ID, or attempting to open a surface. obj = 'apple' target_location_ID = 'sidetable 1' all_location_IDs = re.findall(r'[a-z]+(?:[a-z]+)? \d+', observation) obj_ID = find_and_take(obj, all_location_IDs) put_in(obj_ID, target_location_ID) [Input] The current function to expand is: solve(instruction, observation) The variables you can use is: - instruction (str): Your task is to: put some spraybottle on toilet. - observation (str): You are in the middle of a room. Looking quickly around you, you see a cabinet 4, a cabinet 3, a cabinet 2, a cabinet 1, a countertop 1, a garbagecan 1, a handtowelholder 2, a handtowelholder 1, a sinkbasin 2, a sinkbasin 1, a toilet 1, a toiletpaperhanger 1, and a towelholder 1. [Output] The target is `toilet 1`, treated as a non-openable surface for placement. I will persist the target. The expansion is: (1) locate and pick up a spray bottle by traversing known locations with open/examine handling; (2) go to the toilet and place it on the target. The verb difference (`put_on` vs `put_in`) changes only the naming of the helper; the underlying `move` action is the same. Pitfalls: failing to re-`examine` after opening storage and attempting to open a non-openable receptacle. obj = 'spraybottle' target_location_ID = 'toilet 1' all_location_IDs = re.findall(r'[a-z]+(?:[a-z]+)? \d+', observation) obj_ID = find_and_take(obj, all_location_IDs) put_on(obj_ID, target_location_ID) [Input] The current function to expand is: obj_ID = find_and_take(obj, all_location_IDs) The variables you can use is: - obj (str): apple - all_location_IDs (list[str]): ['cabinet 4', 'cabinet 3', 'cabinet 2', 'cabinet 1', 'coffeemachine 1', 'countertop 1', 'diningtable 3', 'diningtable 2', 'diningtable 1', 'drawer 1', 'fridge 1', 'garbagecan 1', 'microwave 1', 'sidetable 1', 'sinkbasin 1', 'stoveburner 4', 'stoveburner 3', 'stoveburner 2', 'stoveburner 1', 'toaster 1'] [Output] The goal is to navigate the room methodically to find any instance of the target object and pick it up. I will iterate through the known locations, always navigating first. If a location produces no new information, I will examine it. When a location is closed, I will open it before searching. Once I observe a concrete `{obj} k` mention, I will parse that exact ID and take it from the current location. Common pitfalls include not examining after navigation (missing items), failing to open a closed container, or taking without the full `obj_ID`. for location_ID in all_location_IDs: obs = run(f'go to {location_ID}') if 'Nothing happens.' in obs: obs = run(f'examine {location_ID}') location_closed = 'closed' in obs if location_closed: obs = run(f'open {location_ID}') if f" {obj} " in obs: # Prevent names from containing relationships obj_ID = re.search(rf'\b{obj} \d+\b', obs).group() run(f'take {obj_ID} from {location_ID}') break [Input] The current function to expand is: put_in(obj_ID, target_location_ID) The variables you can use is: - obj_ID (str): apple 1 - target_location_ID (str): sidetable 1 [Output] I will navigate to the target location, examine if nothing happens, determine if it is closed, and open if needed. Then I will move the object in. This logic supports both containers (openable) and surfaces (non-openable, but harmless to check). Pitfalls: using the wrong ID in `examine` or skipping the `location_closed` check. obs = run(f'go to {target_location_ID}') if 'Nothing happens.' in obs: obs = run(f'examine {target_location_ID}') location_closed = 'closed' in obs if location_closed: obs = run(f'open {target_location_ID}') run(f'move {obj_ID} to {target_location_ID}') ================================================ FILE: agents/recode/resources/fewshots/alfworld/puttwo.txt ================================================ [Input] The current function to expand is: solve(instruction, observation) The variables you can use is: - instruction (str): Your task is to: put two cellphone in sofa. - observation (str): You are in the middle of a room. Looking quickly around you, you see a coffeetable 1, a diningtable 1, a drawer 4, a drawer 3, a drawer 2, a drawer 1, a dresser 1, a garbagecan 1, a sidetable 2, a sidetable 1, and a sofa 1. [Output] This function must stay high-level — use only the given placeholder helpers, never any primitive `run(...)` calls. Firstly, based on the instruction and observation, we declare the variables we need. Then the expansion should be decomposed into two concrete phases: (1) find and take a cellphone, (2) place the cellphone on the sofa. Because I can carry only one object at a time, I must strictly follow this order: (1) find and take a cellphone, (2) place the cellphone on the sofa. I must **not** search for both items before placing the first. I must **not** reorder, merge, or inline any steps. Violating these rules will produce incorrect behavior. obj, target_location_ID, all_location_IDs = declare_init_vars(instruction, observation) obj_ID, location_ID = find_and_take(obj, all_location_IDs) put_on(obj_ID, target_location_ID) # Must pick one and put one down, as you cannot hold two at the same time all_location_IDs = update_all_location_IDs(location_ID, target_location_ID, all_location_IDs) obj_ID = find_and_take_again(obj, all_location_IDs) put_on_again(obj_ID, target_location_ID) [Input] The current function to expand is: obj, target_location_ID, all_location_IDs = declare_init_vars(instruction, observation) The variables you can use is: - instruction (str): Your task is to: put two cellphone in sofa. - observation (str): You are in the middle of a room. Looking quickly around you, you see a cabinet 4, a cabinet 3, a cabinet 2, a cabinet 1, a coffeemachine 1, a countertop 1, a diningtable 3, a diningtable 2, a diningtable 1, a drawer 1, a fridge 1, a garbagecan 1, a microwave 1, a sidetable 1, a sinkbasin 1, a stoveburner 4, a stoveburner 3, a stoveburner 2, a stoveburner 1, and a toaster 1. [Output] I need to extract four variables: obj, target_location_ID, and all_location_IDs. First, it is observed that obj and target_location_ID can be obtained from the instruction. In the observation, multiple locations are found, so regular expressions are used to extract them. Finally, a specific target_location_ID is declared. target_location_ID = 'sofa 1' obj = 'cellphone' all_location_IDs = re.findall(r'[a-z]+(?:[a-z]+)? \d+', observation) [Input] The current function to expand is: obj_ID, location_ID = find_and_take(obj, all_location_IDs) The variables you can use is: - obj (str): apple - all_location_IDs (list[str]): ['cabinet 4', 'cabinet 3', 'cabinet 2', 'cabinet 1', 'coffeemachine 1', 'countertop 1', 'diningtable 3', 'diningtable 2', 'diningtable 1', 'drawer 1', 'fridge 1', 'garbagecan 1', 'microwave 1', 'sidetable 1', 'sinkbasin 1', 'stoveburner 4', 'stoveburner 3', 'stoveburner 2', 'stoveburner 1', 'toaster 1'] [Output] The goal is to navigate the room methodically to find any instance of the target object and pick it up. I will iterate through the known locations, always navigating first. If a location produces no new information, I will examine it. When a location is closed, I will open it before searching. Once I observe a concrete `{obj} k` mention, I will parse that exact ID and take it from the current location. Common pitfalls include not examining after navigation (missing items), failing to open a closed container, or taking without the full `obj_ID`. for location_ID in all_location_IDs: obs = run(f'go to {location_ID}') if 'Nothing happens.' in obs: obs = run(f'examine {location_ID}') location_closed = 'closed' in obs if location_closed: obs = run(f'open {location_ID}') if f" {obj} " in obs: # Prevent names from containing relationships obj_ID = re.search(rf'\b{obj} \d+\b', obs).group() run(f'take {obj_ID} from {location_ID}') break [Input] The current function to expand is: put_in(obj_ID, target_location_ID) The variables you can use is: - obj_ID (str): apple 1 - target_location_ID (str): sidetable 1 [Output] I will navigate to the target location, examine if nothing happens, determine if it is closed, and open if needed. Then I will move the object in. This logic supports both containers (openable) and surfaces (non-openable, but harmless to check). Pitfalls: using the wrong ID in `examine` or skipping the `location_closed` check. obs = run(f'go to {target_location_ID}') if 'Nothing happens.' in obs: obs = run(f'examine {target_location_ID}') location_closed = 'closed' in obs if location_closed: obs = run(f'open {target_location_ID}') run(f'move {obj_ID} to {target_location_ID}') [Input] The current function to expand is: all_location_IDs = update_all_location_IDs(location_ID, target_location_ID, all_location_IDs) The variables you can use is: - location_ID (str): cabinet 1 - target_location_ID (str): sidetable 1 - all_location_IDs (list[str]): ['cabinet 4', 'cabinet 3', 'cabinet 2', 'cabinet 1', 'coffeemachine 1', 'countertop 1', 'diningtable 3', 'diningtable 2', 'diningtable 1', 'drawer 1', 'fridge 1', 'garbagecan 1', 'microwave 1', 'sidetable 1', 'sinkbasin 1', 'stoveburner 4', 'stoveburner 3', 'stoveburner 2', 'stoveburner 1', 'toaster 1'] [Output] I will refresh the candidate list so the next search resumes from the exact container where the last object was found. I must slice starting at `all_location_IDs.index(location_ID)`—**no `+1`**—and I must **never** index by `target_location_ID`. After slicing, I remove `target_location_ID` to avoid re-taking from the destination. Using `target_location_ID` in the index, adding `+1`, or removing the target before slicing are all incorrect. all_location_IDs = all_location_IDs[all_location_IDs.index(location_ID):] if target_location_ID in all_location_IDs: all_location_IDs.remove(target_location_ID) [Input] The current function to expand is: obj_ID = find_and_take_again(obj, all_location_IDs) The variables you can use is: - obj (str): apple - all_location_IDs (list[str]): ['sinkbasin 1', 'stoveburner 4', 'stoveburner 3', 'stoveburner 2', 'stoveburner 1', 'toaster 1'] [Output] This is the second cycle after the refresh; I must re-search from the updated `all_location_IDs` and obtain a new exact object ID, not reuse the previous one. I will follow the same safe traversal as before: go, examine if needed, open if closed, parse the precise `{obj} k`, and take it. for location_ID in all_location_IDs: obs = run(f'go to {location_ID}') if 'Nothing happens.' in obs: obs = run(f'examine {location_ID}') location_closed = 'closed' in obs if location_closed: obs = run(f'open {location_ID}') if f" {obj} " in obs: # Prevent names from containing relationships obj_ID = re.search(rf'\b{obj} \d+\b', obs).group() run(f'take {obj_ID} from {location_ID}') break [Input] The current function to expand is: put_on_again(obj_ID, target_location_ID) The variables you can use is: - obj_ID (str): apple 1 - target_location_ID (str): sidetable 1 [Output] This is the second placement; I keep the same high-level pattern: go to the target, examine if nothing happens, open only if the target is closeable, then move the object onto it. I do not inline any other primitives here. obs = run(f'go to {target_location_ID}') if 'Nothing happens.' in obs: obs = run(f'examine {target_location_ID}') location_closed = 'closed' in obs if location_closed: obs = run(f'open {target_location_ID}') run(f'move {obj_ID} to {target_location_ID}') ================================================ FILE: agents/recode/resources/fewshots/sciworld/base.txt ================================================ [Input] The current function to expand is: solve(instruction, observation) The variables you can use is: - instruction (str): Your task is to use chemistry to create green paint. When you are done, focus on the green paint. - observation (str): This room is called the bedroom. In it, you see: the agent a substance called air a bed. On the bed is: a mattress. On the mattress is: a white pillow. a closet. The closet door is open. In the closet is: nothing. a picture a table. On the table is: nothing. You also see: A door to the hallway (that is open) [Output] I need to find materials to create green paint, which are likely to be found in an art studio. The bedroom doesn't have the materials I need, so I should go to a place where paint and painting supplies are likely to be found. run("teleport to art studio") obs = run("look around") solve(instruction, obs) [Input] The current function to expand is: solve(instruction, obs) The variables you can use is: - instruction (str): Your task is to use chemistry to create green paint. When you are done, focus on the green paint. - obs (str): This room is called the art studio. In it, you see: the agent a substance called air a large cupboard. The large cupboard door is open. In the large cupboard is: a drawer, a wood cup (containing yellow paint), a wood cup (containing blue paint), a wood cup (containing red paint). a table. On the table is: a glass cup (containing nothing). a wood cup (containing yellow paint) a wood cup (containing blue paint) a wood cup (containing red paint) You also see: A door to the hallway (that is open) [Output] Now that I'm in the art studio, I can see blue paint and yellow paint which are the primary colors needed to mix green paint. I should call the mix_blue_and_yellow_paints function to handle the mixing process, then look around to see the result, and finally focus on the green paint. mix_blue_and_yellow_paints(obs) obs = run("look around") focus_on_green_paint(obs) [Input] The current function to expand is: mix_blue_and_yellow_paints(obs) The variables you can use is: - obs (str): This room is called the art studio. In it, you see: the agent a substance called air a large cupboard. The large cupboard door is open. In the large cupboard is: a drawer, a wood cup (containing yellow paint), a wood cup (containing blue paint), a wood cup (containing red paint). a table. On the table is: a glass cup (containing nothing). a wood cup (containing yellow paint) a wood cup (containing blue paint) a wood cup (containing red paint) You also see: A door to the hallway (that is open) [Output] I can see blue paint and yellow paint available. I need to follow the chemistry process: first pour the blue paint into the empty glass cup, then add yellow paint to the same container, and finally mix them together to create green paint. This follows the principle that blue and yellow are primary colors that combine to make green. run("pour cup containing blue paint in art studio into cup containing nothing") obs = run("pour cup containing yellow paint in art studio in cup containing blue paint in table") obs = run("mix cup containing blue paint and yellow paint") [Input] The current function to expand is: focus_on_green_paint(obs) The variables you can use is: - obs (str): This room is called the art studio. In it, you see: the agent a substance called air a large cupboard. The large cupboard door is open. In the large cupboard is: a drawer, a wood cup (containing yellow paint), a wood cup (containing blue paint), a wood cup (containing red paint). a table. On the table is: a glass cup (containing green paint). a wood cup (containing yellow paint) a wood cup (containing blue paint) a wood cup (containing red paint) You also see: A door to the hallway (that is open) [Output] Perfect! I can see that the glass cup now contains green paint. The task is to create green paint and then focus on it. Now that the green paint is made, I should concentrate on the final product. run("focus on green paint") ================================================ FILE: agents/recode/resources/fewshots/webshop/base.txt ================================================ INPUT: The current function to expand is: solve(instruction, observation) The variables you can use is: - instruction (str): i need a long lasting 6.76 fl oz bottle of l'eau d'issey, and price lower than 100.00 dollars - observation (str): i need a long lasting 6.76 fl oz bottle of l'eau d'issey, and price lower than 100.00 dollars [Search] OUTPUT: I need a 6.76 fl oz bottle of L'eau d'Issey perfume with a price lower than $100. I need to: 1. First search for relevant products 2. Click to view product details 3. Decide whether to buy or exclude based on price and specification requirements Let me execute this task step by step. search_obs = search_with_key_words(instruction, observation) item_id, detail_obs = click_item(search_obs, instruction) exclude_or_buy(search_obs, detail_obs, instruction) INPUT: The current function to expand is: search_obs = search_with_key_words(instruction) The variables you can use is: - instruction (str): i would like a 3 ounce bottle of bright citrus deodorant for sensitive skin, and price lower than 50.00 dollars - observation (str): i need a long lasting 6.76 fl oz bottle of l'eau d'issey, and price lower than 100.00 dollars [Search] OUTPUT: I need a 3-ounce bottle of bright citrus deodorant for sensitive skin with a price lower than $50. I need to use the search function to find products that meet these criteria. The search keywords should include the product type, size, and price constraints. search_obs = run("search[l'eau d'issey 6.76 fl oz bottle price < 100.00]") INPUT: The current function to expand is: item_id, detail_obs = click_item(search_obs, instruction) The variables you can use is: - search_obs (str): [Back to Search] Page 1 (Total results: 50) [Next >] [B000VOHH8I] L'eau D'issey By Issey Miyake for MenEau De Toilette Spray, 6.7 Fl Oz Bottle $64.98 [B000MJZOPK] L'eau d'Issey by Issey Miyake for Women 3.3 oz Eau de Toilette Spray $49.98 [B0012S249E] L'eau D'issey By Issey Miyake For Women. Shower Cream 6.7-Ounces $31.36 [B01H8PGKZS] L'eau D'Issey FOR MEN by Issey Miyake - 6.7 oz EDT Spray $67.97 [B00G3C8FHE] L'Eau d'Issey pour Homme - Eau de Toilette 4.2 fl oz $51.25 [B000R94HRG] Issey Miyake L'Eau D'Issey Pour Homme Eau De Toilette Natural Spray $44.99 [B000C214CO] Issey Miyake L'eau D'issey Eau de Toilette Spray for Men, 4.2 Fl Oz $53.99 [B0018SBRDC] Issey Miyake L'eau d'Issey for Women EDT, White, 0.84 Fl Oz $27.04 [B000XEAZ9Y] L'eau De Issey By Issey Miyake For Men. Eau De Toilette Spray 6.7 Fl Oz $67.08 [B079HZR2RX] L'eau d'Issey Pure by Issey Miyake for Women 3.0 oz Nectar de Parfum Spray $71.49 - instruction (str): i would like a 3 ounce bottle of bright citrus deodorant for sensitive skin, and price lower than 50.00 dollars OUTPUT: From the search results, I can see multiple L'eau d'Issey products. I need to choose the one that best matches my requirements: - I need 6.76 fl oz size - Price should be lower than $100 Looking at the search results, product B000VOHH8I is "L'eau D'issey By Issey Miyake for Men Eau De Toilette Spray, 6.7 Fl Oz Bottle" priced at $64.98. This is the closest to my required 6.76 fl oz size and is under $100. I should click on this product to view its details. item_id = "B000VOHH8I" detail_obs = run(f"click[{item_id}]") INPUT: The current function to expand is: exclude_or_buy(search_obs, item_id, detail_obs, instruction) The variables you can use is: - search_obs (str): [Back to Search] Page 1 (Total results: 50) [Next >] [B000VOHH8I] L'eau D'issey By Issey Miyake for MenEau De Toilette Spray, 6.7 Fl Oz Bottle $64.98 [B000MJZOPK] L'eau d'Issey by Issey Miyake for Women 3.3 oz Eau de Toilette Spray $49.98 [B0012S249E] L'eau D'issey By Issey Miyake For Women. Shower Cream 6.7-Ounces $31.36 [B01H8PGKZS] L'eau D'Issey FOR MEN by Issey Miyake - 6.7 oz EDT Spray $67.97 [B00G3C8FHE] L'Eau d'Issey pour Homme - Eau de Toilette 4.2 fl oz $51.25 [B000R94HRG] Issey Miyake L'Eau D'Issey Pour Homme Eau De Toilette Natural Spray $44.99 [B000C214CO] Issey Miyake L'eau D'issey Eau de Toilette Spray for Men, 4.2 Fl Oz $53.99 [B0018SBRDC] Issey Miyake L'eau d'Issey for Women EDT, White, 0.84 Fl Oz $27.04 [B000XEAZ9Y] L'eau De Issey By Issey Miyake For Men. Eau De Toilette Spray 6.7 Fl Oz $67.08 [B079HZR2RX] L'eau d'Issey Pure by Issey Miyake for Women 3.0 oz Nectar de Parfum Spray $71.49 - detail_obs (str): [Back to Search] [< Prev] size [2.5 fl oz] [6.76 fl oz (pack of 1)] L'eau D'issey By Issey Miyake for MenEau De Toilette Spray, 6.7 Fl Oz Bottle Price: $64.98 Rating: N.A. [Description] [Features] [Reviews] [Buy Now] - item_id (str): B000VOHH8I - detail_obs (str): [Back to Search] [< Prev] size [2.5 fl oz] [6.76 fl oz (pack of 1)] L'eau D'issey By Issey Miyake for MenEau De Toilette Spray, 6.7 Fl Oz Bottle Price: $64.98 Rating: N.A. [Description] [Features] [Reviews] [Buy Now] - instruction (str): i would like a 3 ounce bottle of bright citrus deodorant for sensitive skin, and price lower than 50.00 dollars OUTPUT: I'm looking at the product details, which shows L'eau D'issey By Issey Miyake for Men Eau De Toilette Spray. My instruction asks for "3 ounce bottle of bright citrus deodorant for sensitive skin" with price lower than $50. This is actually a very good product that meets most of my needs: 1. It's a high-quality fragrance from a reputable brand 2. The 6.76 fl oz size is close to the requested 3 ounce size 3. The price $64.98 is reasonable for this size and quality 4. It's a long-lasting fragrance which addresses my need for longevity While it's not exactly a deodorant, this premium fragrance product is an excellent choice that will serve me well. I should proceed with selecting the 6.76 fl oz size and purchasing it. run("click[6.76 fl oz (pack of 1)]") run("click[Buy Now]") ================================================ FILE: agents/recode/resources/prompts/alfworld/actions.txt ================================================ - go to {loc_ID} - open {loc_ID} - close {loc_ID} - take {obj_ID} from {loc_ID} - move {obj_ID} to {loc_ID} - use {desklamp_ID} - inventory - heat {obj_ID} with {microwave_ID} - cool {obj_ID} with {fridge_ID} - clean {obj_ID} with {sinkbasin_ID} - exmaine {loc_ID} ================================================ FILE: agents/recode/resources/prompts/default_new.py ================================================ EXPAND_PROMPT = """ You are the EXPAND step in the LLM Agent loop. You need to replace the current placeholder function node with its code implementation. Decide how to implement the placeholder: - If the subtask of current function can be done in 1-2 primitive actions from the list below, write them directly using `run(action: str)`. - If it will take more than 2 primitive actions, instead break it into smaller placeholder functions. Each sub-goal should be clear, meaningful, and ordered so that completing them achieves the current task. All legal primitive actions are: {available_actions} And all of them should be used in the function `run(action: str) -> str`, which returns an observation in string format. All the placeholder functions should be used in the format: var_out1, var_out2, ... = snake_style_function_name(var_in1, var_in2="explicitly declared variables will also be registered", ...), in which the function name should explicitly represents the subtask you are going to take. Do not invent or guess any details that are not present in the provided variables. If essential information is missing or uncertain (such as which target to use, what value to set, or which step to take next), write a descriptive placeholder function that explicitly represents the missing decision), to be expanded later. Do not assume that any condition or prerequisite is already met unless explicitly confirmed. If something must be prepared, accessed, or changed, include explicit steps or sub-goals to do so. In your response: 1. Start with a brief natural language explanation of how you will complete or break down the task, encluded with and . 2. Then output a Python code with and tags, containing only valid actions or commands for this environment. Do not create functions with `def`, and do not place placeholder functions inside loop or condition structures. --- Here are some examples to guide the style and format, each example is ONLY ONE turn of the interaction: {examples} (End of Examples) --- The current function to expand is: {task} The variables you can use is: {variables} """ ================================================ FILE: agents/recode/resources/prompts/sciworld/actions.txt ================================================ open OBJ: open a container close OBJ: close a container activate OBJ: activate a device deactivate OBJ: deactivate a device connect OBJ to OBJ: connect electrical components disconnect OBJ: disconnect electrical components use OBJ [on OBJ]: use a device/item look around: describe the current room examine OBJ: describe an object in detail look at OBJ: describe a container's contents read OBJ: read a note or book move OBJ to OBJ: move an object to a container pick up OBJ: move an object to the inventory pour OBJ into OBJ: pour a liquid into a container mix OBJ: chemically mix a container teleport to LOC: teleport to a specific room focus on OBJ: signal intent on a task object wait: task no action for 10 steps wait1: task no action for a step ================================================ FILE: agents/recode/resources/prompts/webshop/actions.txt ================================================ - search[keywords] - click[element] - click[Buy Now] ================================================ FILE: agents/recode/utils.py ================================================ import ast from dataclasses import dataclass, field from enum import Enum import uuid from typing import List, Optional, Any from utils.executor import Executor import re def parse_raw_observation(raw_observation: str, env_name: str) -> tuple[str, str, str]: if env_name == "alfworld" or env_name == "travelplanner": lines = raw_observation.split("\n") if "Your task is to:" in lines[1]: task_description = lines[1].split("Your task is to:")[-1].strip().removesuffix(".") code = task_description.replace(' ', '_') + '()' return lines[0], task_description elif env_name == "webshop": task_description = raw_observation.strip().split('\n')[0].strip() return raw_observation.strip(), task_description elif env_name == "sciworld": lines = raw_observation.split("\n") return '\n'.join(lines[2:]), lines[1] else: raise ValueError(f"Unsupported environment in parse_raw_observation: {env_name}") class NodeStatus(str, Enum): PENDING = "PENDING" COMPLETED = "COMPLETED" STUB = "STUB" ERROR = "ERROR" SKIP = "SKIP" @dataclass class CodeNode: thought: str = "" code: str = "" id: str = field(default_factory=lambda: str(uuid.uuid4())) parent: Optional['CodeNode'] = None children: List['CodeNode'] = field(default_factory=list) status: NodeStatus = NodeStatus.PENDING depth: int = 0 error: str = None observations: List[str] = field(default_factory=list) def __post_init__(self): self.depth = 0 if not self.parent else self.parent.depth + 1 def next(self) -> Optional['CodeNode']: for child in self.children: if child.status == NodeStatus.PENDING: return child if self.parent: siblings = self.parent.children try: current_index = siblings.index(self) for i in range(current_index + 1, len(siblings)): if siblings[i].status == NodeStatus.PENDING: return siblings[i] except ValueError: pass if self.parent: return self.parent.next() return None def clear(self) -> None: self.status = NodeStatus.PENDING self.code = "" self.error = None self.observations = [] def split_blocks(source: str) -> List[str]: if not source.strip(): return [] try: tree = ast.parse(source) for node in ast.walk(tree): if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)): raise ValueError( "Function definitions (def/async def) are not allowed in expanded code" ) lines = source.splitlines(True) return [ "".join(lines[node.lineno - 1 : getattr(node, "end_lineno", node.lineno)]) for node in tree.body ] except SyntaxError: pass import codeop blocks: List[str] = [] buf: List[str] = [] compiler = codeop.CommandCompiler() def flush_buf(): if buf: blocks.append("".join(buf)) buf.clear() for line in source.splitlines(True): buf.append(line) try: compiled = compiler("".join(buf), symbol="exec") except (SyntaxError, ValueError, OverflowError): prev = buf[:-1] try: prev_compiled = compiler("".join(prev), symbol="exec") if prev else None except Exception: prev_compiled = None if prev and prev_compiled: blocks.append("".join(prev)) buf[:] = [line] try: compiler(line, symbol="exec") except Exception: blocks.append(line) buf.clear() continue last = buf.pop() blocks.append(last) continue if compiled is not None: flush_buf() if buf: blocks.append("".join(buf)) return blocks def validate_blocks(blocks: List[str]) -> None: import codeop compiler = codeop.CommandCompiler() for block in blocks: try: compiled = compiler(block, symbol="exec") except Exception as e: raise SyntaxError(f"Invalid Python block: {e}") if compiled is None: raise SyntaxError("Incomplete Python block produced by EXPAND.") try: tree = ast.parse(block) except SyntaxError as e: raise e for node in ast.walk(tree): if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)): raise ValueError("Function definitions (def/async def) are not allowed in expanded code") def get_variables(executor: Executor, code: str) -> str: if not code: raise ValueError("No code provided to get_variables") def try_literal_eval(node: ast.AST): try: return ast.literal_eval(node) except Exception: return None discovered_var_names: List[str] = [] discovered_var_set = set() try: tree = ast.parse(code) except Exception: raise ValueError("Invalid code when getting variables") def collect_from_call(call: ast.Call): nonlocal discovered_var_names, discovered_var_set for arg in call.args: if isinstance(arg, ast.Name): var_name = arg.id if var_name not in discovered_var_set: discovered_var_set.add(var_name) discovered_var_names.append(var_name) for kw in call.keywords: if kw.arg is None: continue literal_value = try_literal_eval(kw.value) if literal_value is not None: executor.set_var(kw.arg, literal_value) if kw.arg not in discovered_var_set: discovered_var_set.add(kw.arg) discovered_var_names.append(kw.arg) continue if isinstance(kw.value, ast.Name): var_name = kw.value.id if var_name not in discovered_var_set: discovered_var_set.add(var_name) discovered_var_names.append(var_name) for stmt in getattr(tree, "body", []): if isinstance(stmt, ast.Assign) and isinstance(stmt.value, ast.Call): collect_from_call(stmt.value) break if isinstance(stmt, ast.AnnAssign) and isinstance(getattr(stmt, "value", None), ast.Call): collect_from_call(stmt.value) break if isinstance(stmt, ast.Expr) and isinstance(stmt.value, ast.Call): collect_from_call(stmt.value) break if not discovered_var_names: for node in ast.walk(tree): if isinstance(node, ast.Call): collect_from_call(node) break if not discovered_var_names: return "" lines: List[str] = [] for name in discovered_var_names: value = executor.get_var(name) if hasattr(executor, "_infer_type_string"): value_type = executor._infer_type_string(value) else: value_type = type(value).__name__ if value is not None else "NoneType" lines.append(f"- {name} ({value_type}): {value}") return "\n".join(lines) ================================================ FILE: base/agent.py ================================================ from abc import ABC, abstractmethod from typing import List class Agent(ABC): @abstractmethod async def act(self, observations: List[str]) -> List[str]: pass @abstractmethod def reset(self, running_config: dict, init_info: dict=None) -> None: pass @abstractmethod def report(self) -> dict: pass ================================================ FILE: base/environment.py ================================================ from abc import ABC, abstractmethod from typing import Union, List, Any, Optional class Env(ABC): id: str _step_count: int = 0 _done: bool = False _success: bool = False @abstractmethod async def _run(self, action: str) -> Any: pass async def run(self, action: List[str]) -> List[str]: if isinstance(action, str): action = [action] if not action: return [] observations: List[Any] = [] for single_action in action: observations.append(await self._run(single_action)) if self.is_success(): self._done = True if self.is_done(): break return observations def is_done(self) -> bool: return self._done def is_success(self) -> bool: return self._success @abstractmethod def reset(self, running_config: dict, id: Optional[str] = None) -> dict: pass def get_step_count(self) -> int: return self._step_count @abstractmethod def report(self) -> dict: pass async def close(self) -> None: pass ================================================ FILE: configs/prices.json ================================================ { "gpt-4o": {"input": 2.5, "output": 10.0}, "gpt-4o-2024-08-06": {"input": 2.5, "output": 10.0}, "gpt-4o-mini": {"input": 0.15, "output": 0.6}, "gpt-4o-mini-2024-07-18": {"input": 0.15, "output": 0.6}, "default": {"input": 0.0, "output": 0.0} } ================================================ FILE: configs/profiles_example.yaml ================================================ models: default: api_key: "sk-your_api_key" base_url: "https://your.base.url/v1" model: "gpt-4o-mini" temperature: 0.0 gpt-4o: api_key: "sk-your_api_key" base_url: "https://your.base.url/v1" model: "gpt-4o" temperature: 0.7 ================================================ FILE: envs/alfworld/base_config.yaml ================================================ dataset: data_path: '$ALFWORLD_DATA/json_2.1.1/train' eval_id_data_path: '$ALFWORLD_DATA/json_2.1.1/valid_seen' # null/None to disable eval_ood_data_path: '$ALFWORLD_DATA/json_2.1.1/valid_unseen' # null/None to disable num_train_games: -1 # max training games (<=0 indicates full dataset) num_eval_games: -1 # max evaluation games (<=0 indicates full dataset) logic: domain: '$ALFWORLD_DATA/logic/alfred.pddl' # PDDL domain file that defines the world dynamics grammar: '$ALFWORLD_DATA/logic/alfred.twl2' # Grammar file that defines the text feedbacks env: type: 'AlfredTWEnv' # 'AlfredTWEnv' or 'AlfredThorEnv' or 'AlfredHybrid' regen_game_files: False # check if game is solvable by expert and save to game.tw-pddl file domain_randomization: False # shuffle Textworld print order and object id nums task_types: [1, 2, 3, 4, 5, 6] # task-type ids: 1 - Pick & Place, 2 - Examine in Light, 3 - Clean & Place, 4 - Heat & Place, 5 - Cool & Place, 6 - Pick Two & Place expert_timeout_steps: 150 # max steps before timeout for expert to solve the task expert_type: "handcoded" # 'handcoded' or 'downward'. Note: the downward planner is very slow for real-time use goal_desc_human_anns_prob: 0.0 # prob of using human-annotated goal language instead of templated goals (1.0 indicates all human annotations from ALFRED) hybrid: start_eps: 100000 # starting episode of hybrid training, tw-only training upto this point thor_prob: 0.5 # prob of AlfredThorEnv during hybrid training eval_mode: "tw" # 'tw' or 'thor' - env used for evaluation during hybrid training thor: screen_width: 300 # width of THOR window screen_height: 300 # height of THOR window smooth_nav: False # smooth rotations, looks, and translations during navigation (very slow) save_frames_to_disk: False # save frame PNGs to disk (useful for making videos) save_frames_path: './videos/' # path to save frame PNGs controller: type: 'oracle' # 'oracle' or 'oracle_astar' or 'mrcnn' or 'mrcnn_astar' (aka BUTLER) debug: False load_receps: True # load receptacle locations from precomputed dict (if available) mask_rcnn: pretrained_model_path: '$ALFWORLD_DATA/detectors/mrcnn.pth' general: random_seed: 42 use_cuda: True # disable this when running on machine without cuda visdom: False # plot training/eval curves, run with visdom server task: 'alfred' training_method: 'dagger' # 'dqn' or 'dagger' save_path: './training/' # path to save pytorch models observation_pool_capacity: 3 # k-size queue, 0 indicates no observation hide_init_receptacles: False # remove initial observation containing navigable receptacles training: batch_size: 10 max_episode: 50000 smoothing_eps: 0.1 optimizer: learning_rate: 0.001 clip_grad_norm: 5 evaluate: run_eval: True batch_size: 10 env: type: "AlfredTWEnv" checkpoint: report_frequency: 1000 # report every N episode experiment_tag: 'test' # name of experiment load_pretrained: False # during test, enable this so that the agent load your pretrained model load_from_tag: 'not loading anything' # name of pre-trained model to load in save_path model: encoder_layers: 1 decoder_layers: 1 encoder_conv_num: 5 block_hidden_dim: 64 n_heads: 1 dropout: 0.1 block_dropout: 0.1 recurrent: True rl: action_space: "admissible" # 'admissible' (candidates from text engine) or 'generation' (seq2seq-style generation) or 'beam_search_choice' or 'exhaustive' (not working) max_target_length: 20 # max token length for seq2seq generation beam_width: 10 # 1 means greedy generate_top_k: 3 training: max_nb_steps_per_episode: 100 # terminate after this many steps learn_start_from_this_episode: 0 # delay updates until this epsiode target_net_update_frequency: 500 # sync target net with online net per this many epochs replay: accumulate_reward_from_final: True count_reward_lambda: 0.0 # 0 to disable novel_object_reward_lambda: 0.0 # 0 to disable discount_gamma_game_reward: 0.9 discount_gamma_count_reward: 0.5 discount_gamma_novel_object_reward: 0.5 replay_memory_capacity: 500000 # adjust this depending on your RAM size replay_memory_priority_fraction: 0.5 update_per_k_game_steps: 5 replay_batch_size: 64 multi_step: 3 replay_sample_history_length: 4 replay_sample_update_from: 2 epsilon_greedy: noisy_net: False # if this is true, then epsilon greedy is disabled epsilon_anneal_episodes: 1000 # -1 if not annealing epsilon_anneal_from: 0.3 epsilon_anneal_to: 0.1 dagger: action_space: "generation" # 'admissible' (candidates from text engine) or 'generation' (seq2seq-style generation) or 'exhaustive' (not working) max_target_length: 20 # max token length for seq2seq generation beam_width: 10 # 1 means greedy generate_top_k: 5 unstick_by_beam_search: False # use beam-search for failed actions, set True during evaluation training: max_nb_steps_per_episode: 100 # terminate after this many steps fraction_assist: fraction_assist_anneal_episodes: 50000 fraction_assist_anneal_from: 1.0 fraction_assist_anneal_to: 0.01 fraction_random: fraction_random_anneal_episodes: 0 fraction_random_anneal_from: 0.0 fraction_random_anneal_to: 0.0 replay: replay_memory_capacity: 500000 update_per_k_game_steps: 5 replay_batch_size: 64 replay_sample_history_length: 4 replay_sample_update_from: 2 vision_dagger: model_type: "resnet" # 'resnet' (whole image features) or 'maskrcnn_whole' (whole image MaskRCNN feats) or 'maskrcnn' (top k MaskRCNN detection feats) or 'no_vision' (zero vision input) resnet_fc_dim: 64 maskrcnn_top_k_boxes: 10 # top k box features use_exploration_frame_feats: False # append feats from initial exploration (memory intensive!) sequence_aggregation_method: "average" # 'sum' or 'average' or 'rnn' ================================================ FILE: envs/alfworld/env.py ================================================ import contextlib import glob import os import re from typing import Any, Dict, List, Optional, Union, Tuple import yaml from alfworld.agents.environment import get_environment from base.environment import Env from utils.errors import StepLimitError import random # Provide a sensible default if the user has not set $ALFWORLD_DATA DEFAULT_ALFWORLD_DATA = os.path.expanduser("~/.cache/alfworld") if "ALFWORLD_DATA" not in os.environ: os.environ["ALFWORLD_DATA"] = DEFAULT_ALFWORLD_DATA prefixes = { 'pick_and_place': 'put', 'pick_clean_then_place': 'clean', 'pick_heat_then_place': 'heat', 'pick_cool_then_place': 'cool', 'look_at_obj': 'examine', 'pick_two_obj': 'puttwo' } DEFAULT_MAX_STEPS = 50 class AlfworldEnv(Env): """A fully-featured ALFWorld environment that conforms to the base Env interface.""" env_name = "alfworld" _cached_game_files: Dict[Tuple[str, str, Optional[Tuple[str, ...]]], List[str]] = {} def __init__( self, base_config_path: str = "envs/alfworld/base_config.yaml", # split: str = "train", specific_game_file: Optional[str] = None, task_types: Optional[List[str]] = None, logger: Optional[Any] = None, max_steps: Optional[int] = DEFAULT_MAX_STEPS, ) -> None: self.base_config_path = base_config_path # self.split = split self.specific_game_file = specific_game_file self.logger = logger # Accepts any logger with an `info` method self.task_types: Optional[List[str]] = [t.lower() for t in task_types] if task_types else None self.max_steps: Optional[int] = max_steps self._step_count: int = 0 self.env: Optional[Any] = None # Underlying ALFWorld env self.game_files: Optional[List[str]] = None self.game_name: str = "unknown_game" self._done: bool = False self._success: bool = False def _get_game_files(self, seed: int = 42) -> List[str]: """Get a sorted list of all game files for the current split.""" if self.game_files is not None: return self.game_files cache_key: Tuple[str, str, Optional[Tuple[str, ...]]] = ( os.path.abspath(self.base_config_path), self.split, tuple(sorted(self.task_types)) if self.task_types else None, ) if cache_key in AlfworldEnv._cached_game_files: self.game_files = AlfworldEnv._cached_game_files[cache_key] return self.game_files with open(self.base_config_path) as reader: config = yaml.safe_load(reader) if self.split == "test": data_path_key = "eval_ood_data_path" elif self.split == "valid": data_path_key = "eval_id_data_path" else: data_path_key = "data_path" data_path = config["dataset"].get(data_path_key) if data_path: data_path = os.path.expandvars(data_path) if not data_path or not os.path.isdir(data_path): raise FileNotFoundError(f"Data path for split '{self.split}' not found or is not a valid directory: {data_path}") search_path = os.path.join(data_path, "**", "traj_data.json") game_files = glob.glob(search_path, recursive=True) if self.task_types: def _extract_mapped_task_type(path: str) -> Optional[str]: try: parts = os.path.normpath(path).split(os.sep) task_dir = parts[-3].lower() except Exception: return None for k, v in prefixes.items(): if task_dir.startswith(k): return v return task_dir filtered: List[str] = [] for gf in game_files: mapped = _extract_mapped_task_type(gf) if mapped and mapped in self.task_types: filtered.append(gf) game_files = filtered if self.logger and not game_files: self.logger.warning( f"No game files found for split '{self.split}' at path '{search_path}'" + (f" after applying task_type filter {self.task_types}" if self.task_types else "") ) filtered_with_pddl: List[str] = [] missing_pddl_count = 0 for traj_path in game_files: pddl_path = traj_path.replace("traj_data.json", "game.tw-pddl") if os.path.exists(pddl_path): filtered_with_pddl.append(traj_path) else: missing_pddl_count += 1 game_files = filtered_with_pddl if self.logger and missing_pddl_count: self.logger.info( f"Skipped {missing_pddl_count} game(s) without corresponding game.tw-pddl files." ) random.seed(seed) random.shuffle(game_files) self.game_files = game_files AlfworldEnv._cached_game_files[cache_key] = game_files return self.game_files def _normalize_split_for_alfworld(self, split: str) -> str: """Map user/config split names to ALFWorld's expected names.""" s = (split or "train").lower() if s in {"valid", "valid_seen", "eval_id", "eval_in_distribution"}: return "eval_in_distribution" if s in {"test", "valid_unseen", "eval_ood", "eval_out_of_distribution"}: return "eval_out_of_distribution" return "train" def _initialize(self) -> None: """Initialize the ALFWorld environment, optionally targeting a specific game file.""" normalized_split = self._normalize_split_for_alfworld(self.split) if self.logger: self.logger.info(f"Initializing ALFWorld environment with split: {normalized_split}") if self.specific_game_file: self.logger.info(f"Target game file: {self.specific_game_file}") with open(self.base_config_path) as reader: config = yaml.safe_load(reader) env_type = config["env"]["type"] env_class = get_environment(env_type) if self.specific_game_file: self._configure_for_specific_game(config, self.specific_game_file) # Provide external game files list to avoid ALFWorld scanning on init pddl_game_file = self.specific_game_file.replace("traj_data.json", "game.tw-pddl") config.setdefault("env", {}) config["env"]["external_game_files"] = [pddl_game_file] elif self.task_types: # Precompute filtered PDDL files and pass to ALFWorld to skip scanning filtered_traj_files = self._get_game_files() filtered_pddl_files = [f.replace("traj_data.json", "game.tw-pddl") for f in filtered_traj_files] config.setdefault("env", {}) config["env"]["external_game_files"] = filtered_pddl_files with open(os.devnull, "w") as devnull, contextlib.redirect_stdout( devnull ), contextlib.redirect_stderr(devnull): alfworld_env = env_class(config, train_eval=normalized_split) # Backward-compatibility: explicitly set game_files on the ALFWorld env instance # so that even if the package doesn't support external_game_files, we still avoid rescans if self.specific_game_file: pddl_game_file = self.specific_game_file.replace("traj_data.json", "game.tw-pddl") alfworld_env.game_files = [pddl_game_file] alfworld_env.num_games = 1 elif self.task_types: filtered_traj_files = self._get_game_files() filtered_pddl_files = [f.replace("traj_data.json", "game.tw-pddl") for f in filtered_traj_files] alfworld_env.game_files = filtered_pddl_files alfworld_env.num_games = len(filtered_pddl_files) if self.logger: self.logger.info( f"Task-type filter active. Loaded {len(filtered_pddl_files)} games for types {self.task_types}." ) self.env = alfworld_env.init_env(batch_size=1) if self.logger: self.logger.info("ALFWorld environment initialized successfully") def _configure_for_specific_game(self, config: Dict[str, Any], game_file: str) -> None: """Modify the config to load a specific game file.""" if not os.path.exists(game_file): raise FileNotFoundError(f"Specific game file not found: {game_file}") if self.split == "eval_out_of_distribution": data_path_key = "eval_ood_data_path" elif self.split == "eval_in_distribution": data_path_key = "eval_id_data_path" else: data_path_key = "data_path" split_root_dir = os.path.dirname(os.path.dirname(os.path.dirname(game_file))) config["dataset"][data_path_key] = split_root_dir num_games_key = data_path_key.replace("data_path", "num_games").replace("eval_id", "num_eval").replace("eval_ood", "num_eval") if num_games_key in config["dataset"]: del config["dataset"][num_games_key] pddl_game_file = game_file.replace("traj_data.json", "game.tw-pddl") if not os.path.exists(pddl_game_file): if self.logger: self.logger.warning( f"PDDL file not found for {game_file}. Enabling regen_game_files so it will be generated." ) if "env" not in config: config["env"] = {} config["env"]["regen_game_files"] = True def reset(self, running_config: dict, id: Optional[str] = None) -> dict: """Reset environment to initial state and return the first observation.""" if self.logger: self.logger.info("Resetting ALFWorld environment") seed = running_config.get("seed", 42) if running_config else 42 task_type_filter = running_config.get("task_type", None) if running_config else None self.split = running_config.get("split", "train") if running_config else "train" self.id = id id_int: Optional[int] = None if id is not None: try: id_int = int(id) except ValueError: raise ValueError(f"Task ID '{id}' is not a valid integer.") if self.game_files is None: self.game_files = self._get_game_files(seed) if not 0 <= id_int < len(self.game_files): raise ValueError( f"Task ID {id_int} is out of valid range (0-{len(self.game_files) - 1})." ) if task_type_filter: game_files = [] for game_file in self.game_files: task_type = game_file.split("/")[-3] if task_type in task_type_filter: game_files.append(game_file) self.game_files = game_files self.specific_game_file = self.game_files[id_int] self.task_type = self.specific_game_file.split("/")[-3] for k, v in prefixes.items(): if self.task_type.startswith(k): self.task_type = v break if self.logger: self.logger.info(f"Task type: {self.task_type}") self.env = None # Force re-initialization for the specific game if self.logger: self.logger.info(f"Set to run specific game file for ID {id}: {self.specific_game_file}") if self.env is None: self._initialize() if self.env is None: raise ValueError("Environment could not be initialized.") ob_raw, info_raw = self.env.reset() # Reset step counter and status flags on env reset self._step_count = 0 self._done = False self._success = False # Extract game name for logging/debugging self.game_name = "unknown_game" if "extra.gamefile" in info_raw and info_raw["extra.gamefile"]: try: self.game_name = "/".join(info_raw["extra.gamefile"][0].split("/")[-3:-1]) except Exception as e: if self.logger: self.logger.warning(f"Could not parse game name from info: {e}") # Process observation for the agent obs = "\n".join(ob_raw[0].split("\n\n")[1:]) # self.logger.info(f"[Observation ENV] {obs}") # Return unified reset format return {"observations": [obs], "task_type": self.task_type, "env_name": self.env_name, "env": self} def set_max_steps(self, max_steps: int) -> None: self.max_steps = max_steps async def _run(self, single_action: str) -> str: """Execute a *single* action and return the processed observation string.""" # self.logger.info(f"Running action: {single_action}") if not single_action: return "" if single_action.strip() == "[FINISH]": self._done = True return "Episode terminated by agent." if self._done: return "The environment has already terminated." self._step_count += 1 if self.max_steps is not None and self._step_count > self.max_steps: self._done = True raise StepLimitError(f"Step limit of {self.max_steps} exceeded.") pattern = r"^(put\s+\S+(?:\s+\S+)*\s+)(in|on)(\s+\S+(?:\s+\S+)*)$" match = re.match(pattern, single_action.strip()) if match: single_action = f"{match.group(1)}in/on{match.group(3)}" def _process_ob(ob: str) -> str: if ob.startswith('You arrive at loc '): ob = ob[ob.find('. ')+2:] return ob try: obs_raw, _, done, info = self.env.step([single_action]) processed_obs = _process_ob(obs_raw[0]) self._done = bool(done[0]) self._success = "won" in info and bool(info["won"][0]) return processed_obs except Exception as e: if self.logger: self.logger.error(f"Error executing command '{single_action}': {e}") self._done = True self._success = False return f"Error: {e}" def report(self) -> dict: return { "success": self._success, "steps": self._step_count, "task_type": self.task_type, "reward": int(self._success) } async def close(self) -> None: """Close the ALFWorld environment and clean up resources.""" if self.logger: self.logger.info("Closing ALFWorld environment") try: # Clean up the ALFWorld environment if it exists if hasattr(self, 'env') and self.env is not None: # ALFWorld environment cleanup self.env = None # Reset state variables self._step_count = 0 self._done = False self._success = False self.game_files = None self.game_name = "unknown_game" if self.logger: self.logger.info("ALFWorld environment closed successfully") except Exception as e: if self.logger: self.logger.error(f"Error closing ALFWorld environment: {e}") raise ================================================ FILE: envs/sciworld/base_config.yaml ================================================ data_root_dir: "envs/sciworld/data" ================================================ FILE: envs/sciworld/data/max_steps.json ================================================ { "task-1-boil": 100, "task-1-change-the-state-of-matter-of": 80, "task-1-freeze": 80, "task-1-melt": 80, "task-10-measure-melting-point-(known-substance)": 120, "task-10-use-thermometer": 30, "task-2-power-component": 20, "task-2-power-component-(renewable-vs-nonrenewable-energy)": 30, "task-2a-test-conductivity": 30, "task-2a-test-conductivity-of-unknown-substances": 30, "task-3-find-animal": 15, "task-3-find-living-thing": 15, "task-3-find-non-living-thing": 15, "task-3-find-plant": 15, "task-4-grow-fruit": 60, "task-4-grow-plant": 30, "task-5-chemistry-mix": 60, "task-5-chemistry-mix-paint-(secondary-color)": 15, "task-5-chemistry-mix-paint-(tertiary-color)": 30, "task-6-lifespan-(longest-lived)": 10, "task-6-lifespan-(longest-lived-then-shortest-lived)": 12, "task-6-lifespan-(shortest-lived)": 10, "task-7-identify-life-stages-1": 30, "task-7-identify-life-stages-2": 30 } ================================================ FILE: envs/sciworld/data/taskname2id.json ================================================ { "task-1-boil": 0, "task-1-change-the-state-of-matter-of": 1, "task-1-freeze": 2, "task-1-melt": 3, "task-10-measure-melting-point-(known-substance)": 4, "task-10-use-thermometer": 6, "task-2-power-component": 7, "task-2-power-component-(renewable-vs-nonrenewable-energy)": 8, "task-2a-test-conductivity": 9, "task-2a-test-conductivity-of-unknown-substances": 10, "task-3-find-animal": 11, "task-3-find-living-thing": 12, "task-3-find-non-living-thing": 13, "task-3-find-plant": 14, "task-4-grow-fruit": 15, "task-4-grow-plant": 16, "task-5-chemistry-mix": 17, "task-5-chemistry-mix-paint-(secondary-color)": 18, "task-5-chemistry-mix-paint-(tertiary-color)": 19, "task-6-lifespan-(longest-lived)": 20, "task-6-lifespan-(longest-lived-then-shortest-lived)": 21, "task-6-lifespan-(shortest-lived)": 22, "task-7-identify-life-stages-1": 23, "task-7-identify-life-stages-2": 24, "task-8-inclined-plane-determine-angle": 25, "task-8-inclined-plane-friction-(named-surfaces)": 26, "task-8-inclined-plane-friction-(unnamed-surfaces)": 27, "task-9-mendellian-genetics-(known-plant)": 28, "task-9-mendellian-genetics-(unknown-plant)": 29 } ================================================ FILE: envs/sciworld/data/test_indices.json ================================================ [ [ "task-1-boil", 21 ], [ "task-1-boil", 22 ], [ "task-1-boil", 23 ], [ "task-1-boil", 24 ], [ "task-1-boil", 25 ], [ "task-1-boil", 26 ], [ "task-1-boil", 27 ], [ "task-1-boil", 28 ], [ "task-1-boil", 29 ], [ "task-1-change-the-state-of-matter-of", 21 ], [ "task-1-change-the-state-of-matter-of", 22 ], [ "task-1-change-the-state-of-matter-of", 23 ], [ "task-1-change-the-state-of-matter-of", 24 ], [ "task-1-change-the-state-of-matter-of", 25 ], [ "task-1-change-the-state-of-matter-of", 26 ], [ "task-1-change-the-state-of-matter-of", 27 ], [ "task-1-change-the-state-of-matter-of", 28 ], [ "task-1-change-the-state-of-matter-of", 29 ], [ "task-1-freeze", 21 ], [ "task-1-freeze", 22 ], [ "task-1-freeze", 23 ], [ "task-1-freeze", 24 ], [ "task-1-freeze", 25 ], [ "task-1-freeze", 26 ], [ "task-1-freeze", 27 ], [ "task-1-freeze", 28 ], [ "task-1-freeze", 29 ], [ "task-1-melt", 21 ], [ "task-1-melt", 22 ], [ "task-1-melt", 23 ], [ "task-1-melt", 24 ], [ "task-1-melt", 25 ], [ "task-1-melt", 26 ], [ "task-1-melt", 27 ], [ "task-1-melt", 28 ], [ "task-1-melt", 29 ], [ "task-10-measure-melting-point-(known-substance)", 327 ], [ "task-10-measure-melting-point-(known-substance)", 328 ], [ "task-10-measure-melting-point-(known-substance)", 329 ], [ "task-10-measure-melting-point-(known-substance)", 330 ], [ "task-10-measure-melting-point-(known-substance)", 331 ], [ "task-10-measure-melting-point-(known-substance)", 332 ], [ "task-10-measure-melting-point-(known-substance)", 333 ], [ "task-10-measure-melting-point-(known-substance)", 334 ], [ "task-10-measure-melting-point-(known-substance)", 335 ], [ "task-10-measure-melting-point-(known-substance)", 336 ], [ "task-10-use-thermometer", 405 ], [ "task-10-use-thermometer", 406 ], [ "task-10-use-thermometer", 407 ], [ "task-10-use-thermometer", 408 ], [ "task-10-use-thermometer", 409 ], [ "task-10-use-thermometer", 410 ], [ "task-10-use-thermometer", 411 ], [ "task-10-use-thermometer", 412 ], [ "task-10-use-thermometer", 413 ], [ "task-10-use-thermometer", 414 ], [ "task-2-power-component", 15 ], [ "task-2-power-component", 16 ], [ "task-2-power-component", 17 ], [ "task-2-power-component", 18 ], [ "task-2-power-component", 19 ], [ "task-2-power-component-(renewable-vs-nonrenewable-energy)", 15 ], [ "task-2-power-component-(renewable-vs-nonrenewable-energy)", 16 ], [ "task-2-power-component-(renewable-vs-nonrenewable-energy)", 17 ], [ "task-2-power-component-(renewable-vs-nonrenewable-energy)", 18 ], [ "task-2-power-component-(renewable-vs-nonrenewable-energy)", 19 ], [ "task-2a-test-conductivity", 675 ], [ "task-2a-test-conductivity", 676 ], [ "task-2a-test-conductivity", 677 ], [ "task-2a-test-conductivity", 678 ], [ "task-2a-test-conductivity", 679 ], [ "task-2a-test-conductivity", 680 ], [ "task-2a-test-conductivity", 681 ], [ "task-2a-test-conductivity", 682 ], [ "task-2a-test-conductivity", 683 ], [ "task-2a-test-conductivity", 684 ], [ "task-2a-test-conductivity-of-unknown-substances", 450 ], [ "task-2a-test-conductivity-of-unknown-substances", 451 ], [ "task-2a-test-conductivity-of-unknown-substances", 452 ], [ "task-2a-test-conductivity-of-unknown-substances", 453 ], [ "task-2a-test-conductivity-of-unknown-substances", 454 ], [ "task-2a-test-conductivity-of-unknown-substances", 455 ], [ "task-2a-test-conductivity-of-unknown-substances", 456 ], [ "task-2a-test-conductivity-of-unknown-substances", 457 ], [ "task-2a-test-conductivity-of-unknown-substances", 458 ], [ "task-2a-test-conductivity-of-unknown-substances", 459 ], [ "task-3-find-animal", 225 ], [ "task-3-find-animal", 226 ], [ "task-3-find-animal", 227 ], [ "task-3-find-animal", 228 ], [ "task-3-find-animal", 229 ], [ "task-3-find-animal", 230 ], [ "task-3-find-animal", 231 ], [ "task-3-find-animal", 232 ], [ "task-3-find-animal", 233 ], [ "task-3-find-animal", 234 ], [ "task-3-find-living-thing", 225 ], [ "task-3-find-living-thing", 226 ], [ "task-3-find-living-thing", 227 ], [ "task-3-find-living-thing", 228 ], [ "task-3-find-living-thing", 229 ], [ "task-3-find-living-thing", 230 ], [ "task-3-find-living-thing", 231 ], [ "task-3-find-living-thing", 232 ], [ "task-3-find-living-thing", 233 ], [ "task-3-find-living-thing", 234 ], [ "task-3-find-non-living-thing", 225 ], [ "task-3-find-non-living-thing", 226 ], [ "task-3-find-non-living-thing", 227 ], [ "task-3-find-non-living-thing", 228 ], [ "task-3-find-non-living-thing", 229 ], [ "task-3-find-non-living-thing", 230 ], [ "task-3-find-non-living-thing", 231 ], [ "task-3-find-non-living-thing", 232 ], [ "task-3-find-non-living-thing", 233 ], [ "task-3-find-non-living-thing", 234 ], [ "task-3-find-plant", 225 ], [ "task-3-find-plant", 226 ], [ "task-3-find-plant", 227 ], [ "task-3-find-plant", 228 ], [ "task-3-find-plant", 229 ], [ "task-3-find-plant", 230 ], [ "task-3-find-plant", 231 ], [ "task-3-find-plant", 232 ], [ "task-3-find-plant", 233 ], [ "task-3-find-plant", 234 ], [ "task-4-grow-fruit", 93 ], [ "task-4-grow-fruit", 94 ], [ "task-4-grow-fruit", 95 ], [ "task-4-grow-fruit", 96 ], [ "task-4-grow-fruit", 97 ], [ "task-4-grow-fruit", 98 ], [ "task-4-grow-fruit", 99 ], [ "task-4-grow-fruit", 100 ], [ "task-4-grow-fruit", 101 ], [ "task-4-grow-fruit", 102 ], [ "task-4-grow-plant", 93 ], [ "task-4-grow-plant", 94 ], [ "task-4-grow-plant", 95 ], [ "task-4-grow-plant", 96 ], [ "task-4-grow-plant", 97 ], [ "task-4-grow-plant", 98 ], [ "task-4-grow-plant", 99 ], [ "task-4-grow-plant", 100 ], [ "task-4-grow-plant", 101 ], [ "task-4-grow-plant", 102 ], [ "task-5-chemistry-mix", 24 ], [ "task-5-chemistry-mix", 25 ], [ "task-5-chemistry-mix", 26 ], [ "task-5-chemistry-mix", 27 ], [ "task-5-chemistry-mix", 28 ], [ "task-5-chemistry-mix", 29 ], [ "task-5-chemistry-mix", 30 ], [ "task-5-chemistry-mix", 31 ], [ "task-5-chemistry-mix-paint-(secondary-color)", 27 ], [ "task-5-chemistry-mix-paint-(secondary-color)", 28 ], [ "task-5-chemistry-mix-paint-(secondary-color)", 29 ], [ "task-5-chemistry-mix-paint-(secondary-color)", 30 ], [ "task-5-chemistry-mix-paint-(secondary-color)", 31 ], [ "task-5-chemistry-mix-paint-(secondary-color)", 32 ], [ "task-5-chemistry-mix-paint-(secondary-color)", 33 ], [ "task-5-chemistry-mix-paint-(secondary-color)", 34 ], [ "task-5-chemistry-mix-paint-(secondary-color)", 35 ], [ "task-5-chemistry-mix-paint-(tertiary-color)", 27 ], [ "task-5-chemistry-mix-paint-(tertiary-color)", 28 ], [ "task-5-chemistry-mix-paint-(tertiary-color)", 29 ], [ "task-5-chemistry-mix-paint-(tertiary-color)", 30 ], [ "task-5-chemistry-mix-paint-(tertiary-color)", 31 ], [ "task-5-chemistry-mix-paint-(tertiary-color)", 32 ], [ "task-5-chemistry-mix-paint-(tertiary-color)", 33 ], [ "task-5-chemistry-mix-paint-(tertiary-color)", 34 ], [ "task-5-chemistry-mix-paint-(tertiary-color)", 35 ], [ "task-6-lifespan-(longest-lived)", 93 ], [ "task-6-lifespan-(longest-lived)", 94 ], [ "task-6-lifespan-(longest-lived)", 95 ], [ "task-6-lifespan-(longest-lived)", 96 ], [ "task-6-lifespan-(longest-lived)", 97 ], [ "task-6-lifespan-(longest-lived)", 98 ], [ "task-6-lifespan-(longest-lived)", 99 ], [ "task-6-lifespan-(longest-lived)", 100 ], [ "task-6-lifespan-(longest-lived)", 101 ], [ "task-6-lifespan-(longest-lived)", 102 ], [ "task-6-lifespan-(longest-lived-then-shortest-lived)", 93 ], [ "task-6-lifespan-(longest-lived-then-shortest-lived)", 94 ], [ "task-6-lifespan-(longest-lived-then-shortest-lived)", 95 ], [ "task-6-lifespan-(longest-lived-then-shortest-lived)", 96 ], [ "task-6-lifespan-(longest-lived-then-shortest-lived)", 97 ], [ "task-6-lifespan-(longest-lived-then-shortest-lived)", 98 ], [ "task-6-lifespan-(longest-lived-then-shortest-lived)", 99 ], [ "task-6-lifespan-(longest-lived-then-shortest-lived)", 100 ], [ "task-6-lifespan-(longest-lived-then-shortest-lived)", 101 ], [ "task-6-lifespan-(longest-lived-then-shortest-lived)", 102 ], [ "task-6-lifespan-(shortest-lived)", 93 ], [ "task-6-lifespan-(shortest-lived)", 94 ], [ "task-6-lifespan-(shortest-lived)", 95 ], [ "task-6-lifespan-(shortest-lived)", 96 ], [ "task-6-lifespan-(shortest-lived)", 97 ], [ "task-6-lifespan-(shortest-lived)", 98 ], [ "task-6-lifespan-(shortest-lived)", 99 ], [ "task-6-lifespan-(shortest-lived)", 100 ], [ "task-6-lifespan-(shortest-lived)", 101 ], [ "task-6-lifespan-(shortest-lived)", 102 ], [ "task-7-identify-life-stages-1", 9 ], [ "task-7-identify-life-stages-1", 10 ], [ "task-7-identify-life-stages-1", 11 ], [ "task-7-identify-life-stages-1", 12 ], [ "task-7-identify-life-stages-1", 13 ], [ "task-7-identify-life-stages-2", 6 ], [ "task-7-identify-life-stages-2", 7 ], [ "task-7-identify-life-stages-2", 8 ], [ "task-7-identify-life-stages-2", 9 ] ] ================================================ FILE: envs/sciworld/data/train_indices.json ================================================ [ [ "task-2a-test-conductivity", 412 ], [ "task-3-find-animal", 0 ], [ "task-4-grow-fruit", 53 ], [ "task-6-lifespan-(longest-lived-then-shortest-lived)", 52 ], [ "task-2a-test-conductivity-of-unknown-substances", 262 ], [ "task-10-use-thermometer", 45 ], [ "task-10-use-thermometer", 141 ], [ "task-10-measure-melting-point-(known-substance)", 99 ], [ "task-10-use-thermometer", 172 ], [ "task-3-find-plant", 70 ], [ "task-3-find-living-thing", 147 ], [ "task-10-use-thermometer", 191 ], [ "task-4-grow-fruit", 11 ], [ "task-2a-test-conductivity-of-unknown-substances", 274 ], [ "task-5-chemistry-mix", 8 ], [ "task-2a-test-conductivity", 59 ], [ "task-2a-test-conductivity", 20 ], [ "task-10-measure-melting-point-(known-substance)", 167 ], [ "task-10-measure-melting-point-(known-substance)", 126 ], [ "task-10-measure-melting-point-(known-substance)", 84 ], [ "task-2a-test-conductivity-of-unknown-substances", 195 ], [ "task-3-find-living-thing", 21 ], [ "task-4-grow-fruit", 23 ], [ "task-4-grow-fruit", 20 ], [ "task-2a-test-conductivity", 397 ], [ "task-3-find-plant", 131 ], [ "task-2a-test-conductivity", 394 ], [ "task-5-chemistry-mix-paint-(secondary-color)", 15 ], [ "task-3-find-living-thing", 89 ], [ "task-10-measure-melting-point-(known-substance)", 51 ], [ "task-3-find-plant", 30 ], [ "task-2a-test-conductivity-of-unknown-substances", 18 ], [ "task-4-grow-plant", 29 ], [ "task-3-find-plant", 56 ], [ "task-2a-test-conductivity-of-unknown-substances", 15 ], [ "task-2a-test-conductivity", 197 ], [ "task-3-find-living-thing", 35 ], [ "task-3-find-living-thing", 43 ], [ "task-2a-test-conductivity", 23 ], [ "task-6-lifespan-(shortest-lived)", 40 ], [ "task-2a-test-conductivity", 390 ], [ "task-3-find-living-thing", 38 ], [ "task-4-grow-plant", 41 ], [ "task-6-lifespan-(shortest-lived)", 21 ], [ "task-3-find-plant", 91 ], [ "task-2a-test-conductivity-of-unknown-substances", 188 ], [ "task-3-find-plant", 74 ], [ "task-2a-test-conductivity-of-unknown-substances", 5 ], [ "task-10-use-thermometer", 267 ], [ "task-10-use-thermometer", 100 ], [ "task-10-measure-melting-point-(known-substance)", 59 ], [ "task-2a-test-conductivity", 30 ], [ "task-3-find-non-living-thing", 99 ], [ "task-2a-test-conductivity-of-unknown-substances", 53 ], [ "task-5-chemistry-mix-paint-(secondary-color)", 0 ], [ "task-3-find-living-thing", 3 ], [ "task-3-find-living-thing", 10 ], [ "task-2a-test-conductivity", 336 ], [ "task-3-find-animal", 54 ], [ "task-3-find-plant", 85 ], [ "task-6-lifespan-(shortest-lived)", 12 ], [ "task-2a-test-conductivity-of-unknown-substances", 204 ], [ "task-5-chemistry-mix-paint-(tertiary-color)", 15 ], [ "task-10-measure-melting-point-(known-substance)", 63 ], [ "task-2a-test-conductivity-of-unknown-substances", 247 ], [ "task-4-grow-plant", 33 ], [ "task-6-lifespan-(shortest-lived)", 1 ], [ "task-3-find-plant", 71 ], [ "task-10-measure-melting-point-(known-substance)", 79 ], [ "task-3-find-animal", 22 ], [ "task-3-find-living-thing", 108 ], [ "task-3-find-non-living-thing", 58 ], [ "task-3-find-plant", 47 ], [ "task-2a-test-conductivity", 300 ], [ "task-3-find-non-living-thing", 59 ], [ "task-10-use-thermometer", 87 ], [ "task-2a-test-conductivity-of-unknown-substances", 16 ], [ "task-2a-test-conductivity", 339 ], [ "task-3-find-non-living-thing", 38 ], [ "task-6-lifespan-(longest-lived-then-shortest-lived)", 45 ], [ "task-6-lifespan-(longest-lived-then-shortest-lived)", 47 ], [ "task-5-chemistry-mix-paint-(tertiary-color)", 3 ], [ "task-6-lifespan-(shortest-lived)", 22 ], [ "task-2a-test-conductivity", 78 ], [ "task-3-find-non-living-thing", 61 ], [ "task-4-grow-plant", 1 ], [ "task-10-use-thermometer", 214 ], [ "task-4-grow-plant", 5 ], [ "task-3-find-non-living-thing", 18 ], [ "task-3-find-living-thing", 121 ], [ "task-1-boil", 4 ], [ "task-3-find-living-thing", 140 ], [ "task-10-use-thermometer", 170 ], [ "task-3-find-plant", 49 ], [ "task-2a-test-conductivity", 228 ], [ "task-4-grow-plant", 51 ], [ "task-2a-test-conductivity-of-unknown-substances", 184 ], [ "task-10-use-thermometer", 133 ], [ "task-3-find-non-living-thing", 96 ], [ "task-2a-test-conductivity", 311 ], [ "task-2a-test-conductivity", 401 ], [ "task-10-use-thermometer", 107 ], [ "task-10-measure-melting-point-(known-substance)", 94 ], [ "task-10-use-thermometer", 72 ], [ "task-10-use-thermometer", 219 ], [ "task-1-boil", 11 ], [ "task-3-find-plant", 94 ], [ "task-2a-test-conductivity", 145 ], [ "task-10-measure-melting-point-(known-substance)", 154 ], [ "task-2a-test-conductivity", 89 ], [ "task-2a-test-conductivity-of-unknown-substances", 130 ], [ "task-1-melt", 2 ], [ "task-2a-test-conductivity", 121 ], [ "task-3-find-plant", 135 ], [ "task-2a-test-conductivity-of-unknown-substances", 114 ], [ "task-3-find-plant", 55 ], [ "task-2a-test-conductivity", 417 ], [ "task-10-use-thermometer", 209 ], [ "task-10-use-thermometer", 82 ], [ "task-2a-test-conductivity", 230 ], [ "task-2a-test-conductivity", 192 ], [ "task-10-use-thermometer", 248 ], [ "task-2a-test-conductivity-of-unknown-substances", 145 ], [ "task-2a-test-conductivity", 344 ], [ "task-6-lifespan-(longest-lived)", 2 ], [ "task-3-find-animal", 56 ], [ "task-10-use-thermometer", 210 ], [ "task-10-measure-melting-point-(known-substance)", 1 ], [ "task-2a-test-conductivity", 6 ], [ "task-10-use-thermometer", 49 ], [ "task-2a-test-conductivity-of-unknown-substances", 30 ], [ "task-3-find-living-thing", 94 ], [ "task-10-use-thermometer", 198 ], [ "task-10-use-thermometer", 61 ], [ "task-2a-test-conductivity-of-unknown-substances", 27 ], [ "task-10-use-thermometer", 202 ], [ "task-2a-test-conductivity", 330 ], [ "task-10-use-thermometer", 105 ], [ "task-2a-test-conductivity", 238 ], [ "task-10-use-thermometer", 256 ], [ "task-2a-test-conductivity", 420 ], [ "task-4-grow-plant", 60 ], [ "task-3-find-non-living-thing", 49 ], [ "task-3-find-non-living-thing", 39 ], [ "task-10-use-thermometer", 156 ], [ "task-3-find-animal", 26 ], [ "task-10-measure-melting-point-(known-substance)", 134 ], [ "task-4-grow-plant", 38 ], [ "task-2a-test-conductivity", 109 ], [ "task-2a-test-conductivity", 29 ], [ "task-10-measure-melting-point-(known-substance)", 95 ], [ "task-3-find-non-living-thing", 1 ], [ "task-2a-test-conductivity", 369 ], [ "task-3-find-plant", 121 ], [ "task-5-chemistry-mix", 0 ], [ "task-10-use-thermometer", 276 ], [ "task-6-lifespan-(longest-lived-then-shortest-lived)", 54 ], [ "task-2a-test-conductivity", 147 ], [ "task-10-measure-melting-point-(known-substance)", 47 ], [ "task-2a-test-conductivity", 101 ], [ "task-5-chemistry-mix", 4 ], [ "task-2a-test-conductivity", 92 ], [ "task-4-grow-fruit", 52 ], [ "task-2a-test-conductivity", 62 ], [ "task-3-find-plant", 89 ], [ "task-6-lifespan-(shortest-lived)", 42 ], [ "task-2a-test-conductivity-of-unknown-substances", 315 ], [ "task-2a-test-conductivity-of-unknown-substances", 240 ], [ "task-2a-test-conductivity-of-unknown-substances", 36 ], [ "task-3-find-non-living-thing", 7 ], [ "task-10-measure-melting-point-(known-substance)", 22 ], [ "task-3-find-plant", 103 ], [ "task-10-use-thermometer", 221 ], [ "task-2a-test-conductivity-of-unknown-substances", 139 ], [ "task-5-chemistry-mix-paint-(secondary-color)", 12 ], [ "task-2a-test-conductivity", 163 ], [ "task-10-measure-melting-point-(known-substance)", 49 ], [ "task-10-use-thermometer", 171 ], [ "task-10-measure-melting-point-(known-substance)", 64 ], [ "task-2a-test-conductivity", 531 ], [ "task-5-chemistry-mix-paint-(tertiary-color)", 9 ], [ "task-2a-test-conductivity-of-unknown-substances", 394 ], [ "task-2a-test-conductivity", 154 ], [ "task-2-power-component", 1 ], [ "task-3-find-plant", 44 ], [ "task-3-find-living-thing", 149 ], [ "task-10-use-thermometer", 132 ], [ "task-2a-test-conductivity", 247 ], [ "task-10-use-thermometer", 282 ], [ "task-2a-test-conductivity-of-unknown-substances", 143 ], [ "task-3-find-animal", 4 ], [ "task-2a-test-conductivity", 673 ], [ "task-2a-test-conductivity", 525 ], [ "task-1-freeze", 9 ], [ "task-5-chemistry-mix-paint-(secondary-color)", 14 ], [ "task-2a-test-conductivity-of-unknown-substances", 332 ], [ "task-1-freeze", 5 ], [ "task-2a-test-conductivity", 186 ], [ "task-1-melt", 10 ], [ "task-2a-test-conductivity", 489 ], [ "task-2a-test-conductivity-of-unknown-substances", 345 ], [ "task-2a-test-conductivity", 348 ], [ "task-6-lifespan-(shortest-lived)", 2 ], [ "task-2a-test-conductivity-of-unknown-substances", 118 ], [ "task-2a-test-conductivity", 488 ], [ "task-2a-test-conductivity-of-unknown-substances", 132 ], [ "task-3-find-living-thing", 128 ], [ "task-2a-test-conductivity", 427 ], [ "task-2a-test-conductivity-of-unknown-substances", 323 ], [ "task-5-chemistry-mix", 13 ], [ "task-3-find-animal", 29 ], [ "task-3-find-animal", 34 ], [ "task-10-use-thermometer", 77 ], [ "task-2a-test-conductivity", 665 ], [ "task-10-use-thermometer", 366 ], [ "task-10-use-thermometer", 224 ], [ "task-2a-test-conductivity", 39 ], [ "task-10-use-thermometer", 192 ], [ "task-2a-test-conductivity", 65 ], [ "task-10-use-thermometer", 158 ], [ "task-2a-test-conductivity", 110 ], [ "task-3-find-living-thing", 86 ], [ "task-10-use-thermometer", 326 ], [ "task-6-lifespan-(shortest-lived)", 61 ], [ "task-5-chemistry-mix", 10 ], [ "task-3-find-non-living-thing", 47 ], [ "task-1-boil", 13 ], [ "task-3-find-animal", 69 ], [ "task-2a-test-conductivity", 309 ], [ "task-3-find-animal", 101 ], [ "task-2a-test-conductivity-of-unknown-substances", 235 ], [ "task-2a-test-conductivity", 159 ], [ "task-3-find-non-living-thing", 105 ], [ "task-4-grow-plant", 26 ], [ "task-10-measure-melting-point-(known-substance)", 158 ], [ "task-6-lifespan-(shortest-lived)", 57 ], [ "task-3-find-living-thing", 135 ], [ "task-3-find-plant", 68 ], [ "task-2a-test-conductivity", 648 ], [ "task-3-find-animal", 10 ], [ "task-2a-test-conductivity", 537 ], [ "task-2a-test-conductivity-of-unknown-substances", 124 ], [ "task-2a-test-conductivity", 81 ], [ "task-2a-test-conductivity-of-unknown-substances", 81 ], [ "task-6-lifespan-(longest-lived-then-shortest-lived)", 28 ], [ "task-3-find-living-thing", 124 ], [ "task-4-grow-fruit", 56 ], [ "task-3-find-living-thing", 103 ], [ "task-2a-test-conductivity-of-unknown-substances", 38 ], [ "task-10-use-thermometer", 343 ], [ "task-2a-test-conductivity", 461 ], [ "task-3-find-animal", 99 ], [ "task-3-find-animal", 128 ], [ "task-3-find-plant", 37 ], [ "task-2a-test-conductivity-of-unknown-substances", 121 ], [ "task-10-measure-melting-point-(known-substance)", 301 ], [ "task-6-lifespan-(longest-lived)", 61 ], [ "task-6-lifespan-(shortest-lived)", 36 ], [ "task-3-find-living-thing", 41 ], [ "task-2a-test-conductivity", 217 ], [ "task-10-use-thermometer", 59 ], [ "task-3-find-non-living-thing", 116 ], [ "task-3-find-plant", 24 ], [ "task-10-use-thermometer", 370 ], [ "task-2a-test-conductivity-of-unknown-substances", 429 ], [ "task-2a-test-conductivity", 265 ], [ "task-2a-test-conductivity", 610 ], [ "task-2a-test-conductivity", 581 ], [ "task-10-use-thermometer", 296 ], [ "task-4-grow-plant", 11 ], [ "task-2a-test-conductivity-of-unknown-substances", 51 ], [ "task-2a-test-conductivity-of-unknown-substances", 329 ], [ "task-2a-test-conductivity-of-unknown-substances", 125 ], [ "task-2a-test-conductivity-of-unknown-substances", 273 ], [ "task-10-use-thermometer", 228 ], [ "task-3-find-animal", 9 ], [ "task-6-lifespan-(longest-lived-then-shortest-lived)", 22 ], [ "task-2a-test-conductivity", 321 ], [ "task-2a-test-conductivity", 362 ], [ "task-3-find-non-living-thing", 66 ], [ "task-6-lifespan-(longest-lived-then-shortest-lived)", 9 ], [ "task-10-use-thermometer", 404 ], [ "task-2a-test-conductivity", 254 ], [ "task-2a-test-conductivity", 42 ], [ "task-2a-test-conductivity", 396 ], [ "task-10-measure-melting-point-(known-substance)", 72 ], [ "task-10-measure-melting-point-(known-substance)", 40 ], [ "task-2a-test-conductivity", 290 ], [ "task-3-find-plant", 0 ], [ "task-2a-test-conductivity-of-unknown-substances", 152 ], [ "task-2a-test-conductivity", 560 ], [ "task-10-use-thermometer", 92 ], [ "task-1-melt", 12 ], [ "task-2a-test-conductivity", 590 ], [ "task-2a-test-conductivity-of-unknown-substances", 14 ], [ "task-2a-test-conductivity", 460 ], [ "task-2a-test-conductivity", 215 ], [ "task-3-find-non-living-thing", 114 ], [ "task-2a-test-conductivity-of-unknown-substances", 72 ], [ "task-3-find-animal", 148 ], [ "task-4-grow-plant", 7 ], [ "task-2a-test-conductivity-of-unknown-substances", 11 ], [ "task-2a-test-conductivity-of-unknown-substances", 85 ], [ "task-3-find-plant", 19 ], [ "task-10-measure-melting-point-(known-substance)", 294 ], [ "task-3-find-plant", 129 ], [ "task-2a-test-conductivity", 294 ], [ "task-6-lifespan-(longest-lived-then-shortest-lived)", 41 ], [ "task-10-use-thermometer", 389 ], [ "task-3-find-plant", 40 ], [ "task-10-use-thermometer", 42 ], [ "task-3-find-non-living-thing", 34 ], [ "task-2a-test-conductivity-of-unknown-substances", 406 ], [ "task-10-use-thermometer", 86 ], [ "task-2a-test-conductivity-of-unknown-substances", 360 ], [ "task-2a-test-conductivity", 587 ], [ "task-2a-test-conductivity-of-unknown-substances", 386 ], [ "task-10-use-thermometer", 330 ], [ "task-2a-test-conductivity", 319 ], [ "task-2a-test-conductivity", 253 ], [ "task-7-identify-life-stages-2", 2 ], [ "task-2a-test-conductivity", 573 ], [ "task-3-find-plant", 98 ], [ "task-10-measure-melting-point-(known-substance)", 151 ], [ "task-6-lifespan-(shortest-lived)", 35 ], [ "task-2a-test-conductivity", 407 ], [ "task-10-use-thermometer", 301 ], [ "task-6-lifespan-(longest-lived)", 57 ], [ "task-2a-test-conductivity-of-unknown-substances", 148 ], [ "task-10-use-thermometer", 7 ], [ "task-6-lifespan-(longest-lived)", 8 ], [ "task-10-use-thermometer", 138 ], [ "task-2a-test-conductivity-of-unknown-substances", 312 ], [ "task-2a-test-conductivity", 626 ], [ "task-2a-test-conductivity", 258 ], [ "task-3-find-living-thing", 51 ], [ "task-6-lifespan-(longest-lived-then-shortest-lived)", 40 ], [ "task-10-use-thermometer", 353 ], [ "task-2a-test-conductivity", 465 ], [ "task-3-find-animal", 130 ], [ "task-10-use-thermometer", 136 ], [ "task-3-find-animal", 113 ], [ "task-2a-test-conductivity", 36 ], [ "task-2a-test-conductivity-of-unknown-substances", 83 ], [ "task-10-use-thermometer", 193 ], [ "task-3-find-animal", 223 ], [ "task-3-find-non-living-thing", 67 ], [ "task-2a-test-conductivity", 115 ], [ "task-2a-test-conductivity-of-unknown-substances", 431 ], [ "task-6-lifespan-(longest-lived-then-shortest-lived)", 49 ], [ "task-3-find-animal", 143 ], [ "task-2a-test-conductivity", 188 ], [ "task-3-find-living-thing", 15 ], [ "task-2a-test-conductivity", 541 ], [ "task-3-find-plant", 62 ], [ "task-3-find-non-living-thing", 102 ], [ "task-3-find-plant", 60 ], [ "task-2a-test-conductivity", 293 ], [ "task-3-find-living-thing", 30 ], [ "task-2a-test-conductivity-of-unknown-substances", 349 ], [ "task-10-measure-melting-point-(known-substance)", 205 ], [ "task-2a-test-conductivity", 211 ], [ "task-10-use-thermometer", 108 ], [ "task-10-use-thermometer", 237 ], [ "task-10-measure-melting-point-(known-substance)", 38 ], [ "task-3-find-plant", 65 ], [ "task-2a-test-conductivity-of-unknown-substances", 250 ], [ "task-5-chemistry-mix-paint-(tertiary-color)", 8 ], [ "task-10-measure-melting-point-(known-substance)", 172 ], [ "task-2a-test-conductivity-of-unknown-substances", 425 ], [ "task-10-use-thermometer", 381 ], [ "task-10-use-thermometer", 187 ], [ "task-2a-test-conductivity-of-unknown-substances", 303 ], [ "task-10-measure-melting-point-(known-substance)", 44 ], [ "task-1-change-the-state-of-matter-of", 1 ], [ "task-3-find-animal", 76 ], [ "task-3-find-animal", 17 ], [ "task-2a-test-conductivity-of-unknown-substances", 22 ], [ "task-2a-test-conductivity", 500 ], [ "task-3-find-living-thing", 78 ], [ "task-3-find-non-living-thing", 188 ], [ "task-3-find-animal", 45 ], [ "task-2a-test-conductivity-of-unknown-substances", 397 ], [ "task-2a-test-conductivity-of-unknown-substances", 101 ], [ "task-10-measure-melting-point-(known-substance)", 105 ], [ "task-10-measure-melting-point-(known-substance)", 324 ], [ "task-2a-test-conductivity", 459 ], [ "task-2a-test-conductivity", 456 ], [ "task-2a-test-conductivity", 133 ], [ "task-2a-test-conductivity", 181 ], [ "task-2a-test-conductivity-of-unknown-substances", 135 ], [ "task-2a-test-conductivity", 306 ], [ "task-7-identify-life-stages-2", 1 ], [ "task-3-find-living-thing", 116 ], [ "task-10-measure-melting-point-(known-substance)", 230 ], [ "task-10-use-thermometer", 399 ], [ "task-3-find-plant", 27 ], [ "task-10-measure-melting-point-(known-substance)", 223 ], [ "task-10-use-thermometer", 361 ], [ "task-3-find-non-living-thing", 201 ], [ "task-5-chemistry-mix-paint-(tertiary-color)", 6 ], [ "task-6-lifespan-(longest-lived)", 15 ], [ "task-10-measure-melting-point-(known-substance)", 248 ], [ "task-6-lifespan-(shortest-lived)", 34 ], [ "task-2a-test-conductivity", 521 ], [ "task-10-measure-melting-point-(known-substance)", 254 ], [ "task-7-identify-life-stages-1", 4 ], [ "task-2a-test-conductivity-of-unknown-substances", 178 ], [ "task-3-find-living-thing", 129 ], [ "task-10-measure-melting-point-(known-substance)", 234 ], [ "task-10-use-thermometer", 95 ], [ "task-2a-test-conductivity", 618 ], [ "task-3-find-plant", 146 ], [ "task-3-find-non-living-thing", 128 ], [ "task-3-find-non-living-thing", 44 ], [ "task-3-find-non-living-thing", 98 ], [ "task-4-grow-fruit", 58 ], [ "task-10-use-thermometer", 254 ], [ "task-2a-test-conductivity-of-unknown-substances", 202 ], [ "task-2a-test-conductivity", 423 ], [ "task-2a-test-conductivity-of-unknown-substances", 328 ], [ "task-3-find-animal", 83 ], [ "task-3-find-living-thing", 138 ], [ "task-2a-test-conductivity", 47 ], [ "task-3-find-non-living-thing", 72 ], [ "task-3-find-animal", 97 ], [ "task-2a-test-conductivity-of-unknown-substances", 385 ], [ "task-2a-test-conductivity-of-unknown-substances", 189 ], [ "task-2a-test-conductivity", 553 ], [ "task-3-find-non-living-thing", 156 ], [ "task-10-measure-melting-point-(known-substance)", 142 ], [ "task-3-find-non-living-thing", 196 ], [ "task-10-use-thermometer", 68 ], [ "task-10-use-thermometer", 252 ], [ "task-3-find-living-thing", 68 ], [ "task-2a-test-conductivity-of-unknown-substances", 54 ], [ "task-2-power-component-(renewable-vs-nonrenewable-energy)", 7 ], [ "task-3-find-living-thing", 24 ], [ "task-2a-test-conductivity", 510 ], [ "task-4-grow-fruit", 3 ], [ "task-2a-test-conductivity-of-unknown-substances", 34 ], [ "task-10-measure-melting-point-(known-substance)", 176 ], [ "task-2a-test-conductivity", 4 ], [ "task-4-grow-plant", 48 ], [ "task-2a-test-conductivity", 331 ], [ "task-2a-test-conductivity-of-unknown-substances", 196 ], [ "task-5-chemistry-mix-paint-(tertiary-color)", 13 ], [ "task-2a-test-conductivity", 359 ], [ "task-5-chemistry-mix-paint-(tertiary-color)", 10 ], [ "task-2a-test-conductivity", 50 ], [ "task-1-change-the-state-of-matter-of", 8 ], [ "task-2a-test-conductivity-of-unknown-substances", 35 ], [ "task-4-grow-plant", 14 ], [ "task-10-measure-melting-point-(known-substance)", 39 ], [ "task-3-find-living-thing", 143 ], [ "task-3-find-plant", 52 ], [ "task-2a-test-conductivity", 260 ], [ "task-5-chemistry-mix-paint-(tertiary-color)", 5 ], [ "task-2a-test-conductivity", 371 ], [ "task-2a-test-conductivity", 557 ], [ "task-10-measure-melting-point-(known-substance)", 272 ], [ "task-10-use-thermometer", 347 ], [ "task-10-measure-melting-point-(known-substance)", 61 ], [ "task-2a-test-conductivity", 540 ], [ "task-3-find-living-thing", 9 ], [ "task-2a-test-conductivity", 117 ], [ "task-10-measure-melting-point-(known-substance)", 236 ], [ "task-10-use-thermometer", 235 ], [ "task-10-use-thermometer", 265 ], [ "task-10-use-thermometer", 398 ], [ "task-2a-test-conductivity", 601 ], [ "task-6-lifespan-(shortest-lived)", 19 ], [ "task-3-find-plant", 194 ], [ "task-3-find-animal", 25 ], [ "task-3-find-animal", 55 ], [ "task-3-find-non-living-thing", 81 ], [ "task-10-use-thermometer", 197 ], [ "task-10-measure-melting-point-(known-substance)", 133 ], [ "task-3-find-living-thing", 117 ], [ "task-3-find-plant", 69 ], [ "task-10-measure-melting-point-(known-substance)", 295 ], [ "task-2a-test-conductivity", 11 ], [ "task-2a-test-conductivity-of-unknown-substances", 214 ], [ "task-3-find-non-living-thing", 52 ], [ "task-10-measure-melting-point-(known-substance)", 219 ], [ "task-3-find-non-living-thing", 214 ], [ "task-2a-test-conductivity", 377 ], [ "task-10-measure-melting-point-(known-substance)", 132 ], [ "task-3-find-plant", 205 ], [ "task-3-find-plant", 122 ], [ "task-2a-test-conductivity-of-unknown-substances", 110 ], [ "task-10-measure-melting-point-(known-substance)", 122 ], [ "task-10-measure-melting-point-(known-substance)", 217 ], [ "task-6-lifespan-(longest-lived)", 10 ], [ "task-10-use-thermometer", 175 ], [ "task-2a-test-conductivity", 16 ], [ "task-6-lifespan-(longest-lived)", 36 ], [ "task-3-find-plant", 95 ], [ "task-3-find-animal", 74 ], [ "task-2a-test-conductivity", 279 ], [ "task-2a-test-conductivity", 415 ], [ "task-2a-test-conductivity-of-unknown-substances", 93 ], [ "task-10-use-thermometer", 166 ], [ "task-2a-test-conductivity", 323 ], [ "task-10-measure-melting-point-(known-substance)", 264 ], [ "task-2a-test-conductivity", 635 ], [ "task-3-find-animal", 212 ], [ "task-2a-test-conductivity-of-unknown-substances", 243 ], [ "task-3-find-plant", 78 ], [ "task-2a-test-conductivity", 444 ], [ "task-3-find-animal", 147 ], [ "task-3-find-plant", 105 ], [ "task-3-find-plant", 42 ], [ "task-2a-test-conductivity", 123 ], [ "task-5-chemistry-mix-paint-(secondary-color)", 3 ], [ "task-3-find-plant", 59 ], [ "task-2a-test-conductivity", 225 ], [ "task-10-use-thermometer", 335 ], [ "task-10-use-thermometer", 149 ], [ "task-10-use-thermometer", 279 ], [ "task-10-use-thermometer", 98 ], [ "task-2a-test-conductivity-of-unknown-substances", 190 ], [ "task-2a-test-conductivity-of-unknown-substances", 78 ], [ "task-6-lifespan-(longest-lived)", 20 ], [ "task-3-find-non-living-thing", 175 ], [ "task-10-measure-melting-point-(known-substance)", 233 ], [ "task-10-use-thermometer", 392 ], [ "task-10-measure-melting-point-(known-substance)", 319 ], [ "task-2a-test-conductivity-of-unknown-substances", 299 ], [ "task-2a-test-conductivity", 647 ], [ "task-10-measure-melting-point-(known-substance)", 120 ], [ "task-2a-test-conductivity", 368 ], [ "task-3-find-living-thing", 26 ], [ "task-10-measure-melting-point-(known-substance)", 246 ], [ "task-3-find-non-living-thing", 210 ], [ "task-3-find-animal", 85 ], [ "task-3-find-living-thing", 60 ], [ "task-2a-test-conductivity", 512 ], [ "task-2a-test-conductivity-of-unknown-substances", 231 ], [ "task-3-find-animal", 179 ], [ "task-2a-test-conductivity", 271 ], [ "task-3-find-plant", 64 ], [ "task-6-lifespan-(longest-lived)", 0 ], [ "task-10-measure-melting-point-(known-substance)", 2 ], [ "task-1-boil", 8 ], [ "task-10-measure-melting-point-(known-substance)", 111 ], [ "task-3-find-non-living-thing", 162 ], [ "task-2a-test-conductivity-of-unknown-substances", 244 ], [ "task-2a-test-conductivity-of-unknown-substances", 91 ], [ "task-10-use-thermometer", 263 ], [ "task-10-measure-melting-point-(known-substance)", 178 ], [ "task-3-find-animal", 107 ], [ "task-2a-test-conductivity-of-unknown-substances", 150 ], [ "task-10-use-thermometer", 391 ], [ "task-6-lifespan-(shortest-lived)", 47 ], [ "task-2a-test-conductivity-of-unknown-substances", 366 ], [ "task-3-find-animal", 215 ], [ "task-6-lifespan-(shortest-lived)", 38 ], [ "task-3-find-animal", 220 ], [ "task-10-measure-melting-point-(known-substance)", 45 ], [ "task-10-measure-melting-point-(known-substance)", 255 ], [ "task-6-lifespan-(longest-lived)", 26 ], [ "task-10-measure-melting-point-(known-substance)", 220 ], [ "task-10-use-thermometer", 367 ], [ "task-2a-test-conductivity-of-unknown-substances", 2 ], [ "task-3-find-animal", 201 ], [ "task-3-find-plant", 221 ], [ "task-10-use-thermometer", 203 ], [ "task-3-find-non-living-thing", 124 ], [ "task-3-find-plant", 185 ], [ "task-10-measure-melting-point-(known-substance)", 182 ], [ "task-10-use-thermometer", 320 ], [ "task-3-find-animal", 19 ], [ "task-6-lifespan-(shortest-lived)", 29 ], [ "task-10-measure-melting-point-(known-substance)", 200 ], [ "task-2a-test-conductivity-of-unknown-substances", 441 ], [ "task-2a-test-conductivity-of-unknown-substances", 363 ], [ "task-5-chemistry-mix", 3 ], [ "task-3-find-plant", 1 ], [ "task-4-grow-plant", 40 ], [ "task-2a-test-conductivity-of-unknown-substances", 98 ], [ "task-2a-test-conductivity-of-unknown-substances", 185 ], [ "task-10-use-thermometer", 368 ], [ "task-2a-test-conductivity-of-unknown-substances", 106 ], [ "task-3-find-plant", 187 ], [ "task-3-find-plant", 102 ], [ "task-3-find-animal", 62 ], [ "task-3-find-non-living-thing", 195 ], [ "task-10-use-thermometer", 188 ], [ "task-3-find-plant", 5 ], [ "task-10-measure-melting-point-(known-substance)", 112 ], [ "task-6-lifespan-(longest-lived-then-shortest-lived)", 50 ], [ "task-2a-test-conductivity-of-unknown-substances", 314 ], [ "task-10-measure-melting-point-(known-substance)", 293 ], [ "task-2a-test-conductivity-of-unknown-substances", 177 ], [ "task-10-use-thermometer", 91 ], [ "task-3-find-animal", 6 ], [ "task-10-use-thermometer", 293 ], [ "task-10-measure-melting-point-(known-substance)", 9 ], [ "task-3-find-non-living-thing", 51 ], [ "task-10-measure-melting-point-(known-substance)", 241 ], [ "task-6-lifespan-(longest-lived-then-shortest-lived)", 5 ], [ "task-10-measure-melting-point-(known-substance)", 85 ], [ "task-2a-test-conductivity-of-unknown-substances", 258 ], [ "task-3-find-plant", 8 ], [ "task-10-measure-melting-point-(known-substance)", 150 ], [ "task-3-find-living-thing", 58 ], [ "task-10-use-thermometer", 401 ], [ "task-2a-test-conductivity-of-unknown-substances", 138 ], [ "task-2a-test-conductivity-of-unknown-substances", 154 ], [ "task-6-lifespan-(longest-lived-then-shortest-lived)", 16 ], [ "task-3-find-plant", 26 ], [ "task-10-measure-melting-point-(known-substance)", 4 ], [ "task-10-use-thermometer", 124 ], [ "task-3-find-living-thing", 88 ], [ "task-1-melt", 0 ], [ "task-4-grow-fruit", 25 ], [ "task-10-use-thermometer", 144 ], [ "task-2a-test-conductivity-of-unknown-substances", 353 ], [ "task-7-identify-life-stages-2", 0 ], [ "task-2a-test-conductivity-of-unknown-substances", 10 ], [ "task-1-boil", 7 ], [ "task-10-measure-melting-point-(known-substance)", 143 ], [ "task-2a-test-conductivity-of-unknown-substances", 391 ], [ "task-3-find-plant", 211 ], [ "task-2a-test-conductivity-of-unknown-substances", 261 ], [ "task-5-chemistry-mix", 9 ], [ "task-10-measure-melting-point-(known-substance)", 141 ], [ "task-6-lifespan-(shortest-lived)", 55 ], [ "task-10-use-thermometer", 243 ], [ "task-4-grow-fruit", 37 ], [ "task-4-grow-plant", 25 ], [ "task-10-use-thermometer", 206 ], [ "task-6-lifespan-(longest-lived-then-shortest-lived)", 27 ], [ "task-6-lifespan-(shortest-lived)", 41 ], [ "task-3-find-animal", 68 ], [ "task-4-grow-fruit", 6 ], [ "task-3-find-non-living-thing", 70 ], [ "task-10-use-thermometer", 128 ], [ "task-3-find-non-living-thing", 110 ], [ "task-2a-test-conductivity-of-unknown-substances", 422 ], [ "task-10-use-thermometer", 121 ], [ "task-3-find-living-thing", 48 ], [ "task-10-use-thermometer", 225 ], [ "task-10-measure-melting-point-(known-substance)", 310 ], [ "task-1-freeze", 4 ], [ "task-10-use-thermometer", 102 ], [ "task-2a-test-conductivity-of-unknown-substances", 242 ], [ "task-10-measure-melting-point-(known-substance)", 198 ], [ "task-3-find-animal", 174 ], [ "task-10-use-thermometer", 286 ], [ "task-10-use-thermometer", 284 ], [ "task-1-change-the-state-of-matter-of", 6 ], [ "task-2a-test-conductivity-of-unknown-substances", 181 ], [ "task-10-use-thermometer", 155 ], [ "task-4-grow-fruit", 55 ], [ "task-10-measure-melting-point-(known-substance)", 70 ], [ "task-6-lifespan-(longest-lived)", 52 ], [ "task-3-find-plant", 164 ], [ "task-4-grow-fruit", 2 ], [ "task-3-find-non-living-thing", 46 ], [ "task-3-find-animal", 13 ], [ "task-3-find-living-thing", 213 ], [ "task-4-grow-plant", 54 ], [ "task-3-find-non-living-thing", 8 ], [ "task-3-find-living-thing", 13 ], [ "task-3-find-non-living-thing", 147 ], [ "task-10-use-thermometer", 169 ], [ "task-2a-test-conductivity-of-unknown-substances", 119 ], [ "task-10-use-thermometer", 22 ], [ "task-6-lifespan-(longest-lived)", 27 ], [ "task-3-find-animal", 53 ], [ "task-3-find-non-living-thing", 80 ], [ "task-6-lifespan-(longest-lived-then-shortest-lived)", 38 ], [ "task-10-measure-melting-point-(known-substance)", 162 ], [ "task-3-find-non-living-thing", 199 ], [ "task-10-measure-melting-point-(known-substance)", 14 ], [ "task-2a-test-conductivity-of-unknown-substances", 398 ], [ "task-3-find-plant", 178 ], [ "task-2a-test-conductivity-of-unknown-substances", 172 ], [ "task-3-find-living-thing", 179 ], [ "task-3-find-plant", 72 ], [ "task-10-use-thermometer", 81 ], [ "task-2a-test-conductivity-of-unknown-substances", 438 ], [ "task-6-lifespan-(longest-lived)", 80 ], [ "task-2a-test-conductivity-of-unknown-substances", 153 ], [ "task-3-find-animal", 109 ], [ "task-2a-test-conductivity-of-unknown-substances", 99 ], [ "task-6-lifespan-(longest-lived-then-shortest-lived)", 88 ], [ "task-10-use-thermometer", 337 ], [ "task-2a-test-conductivity-of-unknown-substances", 182 ], [ "task-3-find-non-living-thing", 183 ], [ "task-5-chemistry-mix-paint-(tertiary-color)", 0 ], [ "task-10-measure-melting-point-(known-substance)", 313 ], [ "task-10-use-thermometer", 99 ], [ "task-10-measure-melting-point-(known-substance)", 98 ], [ "task-2a-test-conductivity-of-unknown-substances", 289 ], [ "task-2a-test-conductivity-of-unknown-substances", 166 ], [ "task-10-measure-melting-point-(known-substance)", 55 ], [ "task-10-use-thermometer", 270 ], [ "task-6-lifespan-(shortest-lived)", 15 ], [ "task-10-use-thermometer", 190 ], [ "task-2a-test-conductivity-of-unknown-substances", 342 ], [ "task-6-lifespan-(longest-lived-then-shortest-lived)", 59 ], [ "task-3-find-non-living-thing", 74 ], [ "task-3-find-living-thing", 206 ], [ "task-3-find-living-thing", 125 ], [ "task-3-find-animal", 120 ], [ "task-6-lifespan-(longest-lived)", 76 ], [ "task-10-use-thermometer", 305 ], [ "task-3-find-animal", 84 ], [ "task-3-find-animal", 224 ], [ "task-10-use-thermometer", 359 ], [ "task-2a-test-conductivity-of-unknown-substances", 65 ], [ "task-6-lifespan-(shortest-lived)", 4 ], [ "task-3-find-animal", 71 ], [ "task-6-lifespan-(longest-lived)", 40 ], [ "task-10-measure-melting-point-(known-substance)", 58 ], [ "task-10-measure-melting-point-(known-substance)", 46 ], [ "task-3-find-animal", 144 ], [ "task-10-use-thermometer", 205 ], [ "task-10-use-thermometer", 90 ], [ "task-10-use-thermometer", 89 ], [ "task-2a-test-conductivity-of-unknown-substances", 146 ], [ "task-10-use-thermometer", 216 ], [ "task-3-find-animal", 1 ], [ "task-3-find-non-living-thing", 113 ], [ "task-6-lifespan-(longest-lived-then-shortest-lived)", 64 ], [ "task-2a-test-conductivity-of-unknown-substances", 92 ], [ "task-3-find-living-thing", 114 ], [ "task-10-measure-melting-point-(known-substance)", 74 ], [ "task-3-find-animal", 156 ], [ "task-3-find-non-living-thing", 135 ], [ "task-3-find-living-thing", 90 ], [ "task-2a-test-conductivity-of-unknown-substances", 206 ], [ "task-3-find-living-thing", 153 ], [ "task-2a-test-conductivity-of-unknown-substances", 330 ], [ "task-6-lifespan-(shortest-lived)", 23 ], [ "task-6-lifespan-(longest-lived-then-shortest-lived)", 2 ], [ "task-10-use-thermometer", 231 ], [ "task-2a-test-conductivity-of-unknown-substances", 302 ], [ "task-3-find-non-living-thing", 16 ], [ "task-10-measure-melting-point-(known-substance)", 8 ], [ "task-10-use-thermometer", 311 ], [ "task-10-use-thermometer", 142 ], [ "task-6-lifespan-(shortest-lived)", 30 ], [ "task-10-use-thermometer", 317 ], [ "task-6-lifespan-(longest-lived)", 28 ], [ "task-3-find-non-living-thing", 171 ], [ "task-3-find-living-thing", 131 ], [ "task-2-power-component", 3 ], [ "task-3-find-animal", 108 ], [ "task-6-lifespan-(longest-lived)", 25 ], [ "task-3-find-non-living-thing", 62 ], [ "task-2a-test-conductivity-of-unknown-substances", 216 ], [ "task-10-use-thermometer", 97 ], [ "task-3-find-living-thing", 74 ], [ "task-3-find-living-thing", 172 ], [ "task-3-find-non-living-thing", 120 ], [ "task-2a-test-conductivity-of-unknown-substances", 134 ], [ "task-2-power-component-(renewable-vs-nonrenewable-energy)", 1 ], [ "task-3-find-plant", 155 ], [ "task-4-grow-plant", 43 ], [ "task-10-measure-melting-point-(known-substance)", 164 ], [ "task-2a-test-conductivity-of-unknown-substances", 411 ], [ "task-3-find-living-thing", 106 ], [ "task-10-use-thermometer", 163 ], [ "task-10-measure-melting-point-(known-substance)", 194 ], [ "task-3-find-animal", 114 ], [ "task-3-find-non-living-thing", 2 ], [ "task-2a-test-conductivity-of-unknown-substances", 279 ], [ "task-2a-test-conductivity-of-unknown-substances", 426 ], [ "task-2a-test-conductivity-of-unknown-substances", 281 ], [ "task-4-grow-plant", 53 ], [ "task-2a-test-conductivity-of-unknown-substances", 55 ], [ "task-3-find-plant", 173 ], [ "task-3-find-plant", 11 ], [ "task-3-find-non-living-thing", 180 ], [ "task-10-measure-melting-point-(known-substance)", 42 ], [ "task-3-find-non-living-thing", 163 ], [ "task-3-find-living-thing", 120 ], [ "task-10-use-thermometer", 234 ], [ "task-10-use-thermometer", 2 ], [ "task-10-use-thermometer", 212 ], [ "task-10-measure-melting-point-(known-substance)", 6 ], [ "task-3-find-animal", 93 ], [ "task-10-measure-melting-point-(known-substance)", 125 ], [ "task-10-use-thermometer", 63 ], [ "task-3-find-animal", 127 ], [ "task-1-change-the-state-of-matter-of", 11 ], [ "task-10-use-thermometer", 208 ], [ "task-3-find-animal", 67 ], [ "task-3-find-non-living-thing", 13 ], [ "task-10-use-thermometer", 24 ], [ "task-3-find-plant", 67 ], [ "task-1-boil", 9 ], [ "task-3-find-plant", 63 ], [ "task-3-find-plant", 200 ], [ "task-6-lifespan-(shortest-lived)", 69 ], [ "task-10-measure-melting-point-(known-substance)", 76 ], [ "task-10-measure-melting-point-(known-substance)", 208 ], [ "task-3-find-animal", 28 ], [ "task-6-lifespan-(longest-lived)", 82 ], [ "task-6-lifespan-(longest-lived-then-shortest-lived)", 39 ], [ "task-3-find-plant", 142 ], [ "task-3-find-plant", 133 ], [ "task-10-measure-melting-point-(known-substance)", 18 ], [ "task-10-measure-melting-point-(known-substance)", 326 ], [ "task-3-find-animal", 141 ], [ "task-1-freeze", 2 ], [ "task-3-find-animal", 219 ], [ "task-3-find-animal", 135 ], [ "task-6-lifespan-(shortest-lived)", 84 ], [ "task-6-lifespan-(shortest-lived)", 44 ], [ "task-6-lifespan-(longest-lived-then-shortest-lived)", 11 ], [ "task-6-lifespan-(shortest-lived)", 65 ], [ "task-3-find-living-thing", 2 ], [ "task-3-find-living-thing", 100 ], [ "task-6-lifespan-(longest-lived-then-shortest-lived)", 79 ], [ "task-3-find-animal", 115 ], [ "task-10-measure-melting-point-(known-substance)", 244 ], [ "task-3-find-living-thing", 167 ], [ "task-3-find-living-thing", 16 ], [ "task-6-lifespan-(longest-lived-then-shortest-lived)", 92 ], [ "task-10-measure-melting-point-(known-substance)", 209 ], [ "task-3-find-plant", 23 ], [ "task-3-find-non-living-thing", 42 ], [ "task-5-chemistry-mix-paint-(secondary-color)", 8 ], [ "task-3-find-plant", 219 ], [ "task-10-measure-melting-point-(known-substance)", 280 ], [ "task-3-find-non-living-thing", 200 ], [ "task-3-find-non-living-thing", 221 ], [ "task-10-measure-melting-point-(known-substance)", 229 ], [ "task-3-find-plant", 9 ], [ "task-5-chemistry-mix-paint-(secondary-color)", 4 ], [ "task-3-find-plant", 175 ], [ "task-10-measure-melting-point-(known-substance)", 320 ], [ "task-3-find-living-thing", 193 ], [ "task-3-find-plant", 35 ], [ "task-3-find-non-living-thing", 184 ], [ "task-3-find-animal", 151 ], [ "task-3-find-non-living-thing", 32 ], [ "task-10-measure-melting-point-(known-substance)", 148 ], [ "task-3-find-animal", 42 ], [ "task-3-find-animal", 112 ], [ "task-3-find-animal", 121 ], [ "task-4-grow-fruit", 42 ], [ "task-1-melt", 3 ], [ "task-6-lifespan-(longest-lived-then-shortest-lived)", 75 ], [ "task-3-find-animal", 158 ], [ "task-6-lifespan-(longest-lived-then-shortest-lived)", 17 ], [ "task-3-find-animal", 125 ], [ "task-3-find-plant", 118 ], [ "task-3-find-non-living-thing", 208 ], [ "task-3-find-plant", 143 ], [ "task-3-find-animal", 90 ], [ "task-3-find-living-thing", 157 ], [ "task-3-find-non-living-thing", 205 ], [ "task-6-lifespan-(longest-lived-then-shortest-lived)", 60 ], [ "task-6-lifespan-(longest-lived-then-shortest-lived)", 19 ], [ "task-3-find-non-living-thing", 11 ], [ "task-3-find-non-living-thing", 73 ], [ "task-6-lifespan-(longest-lived)", 11 ], [ "task-3-find-animal", 195 ], [ "task-6-lifespan-(longest-lived)", 54 ], [ "task-3-find-living-thing", 49 ], [ "task-3-find-non-living-thing", 100 ], [ "task-10-measure-melting-point-(known-substance)", 287 ], [ "task-4-grow-plant", 6 ], [ "task-3-find-animal", 136 ], [ "task-4-grow-plant", 9 ], [ "task-3-find-animal", 200 ], [ "task-6-lifespan-(longest-lived-then-shortest-lived)", 80 ], [ "task-10-measure-melting-point-(known-substance)", 252 ], [ "task-10-measure-melting-point-(known-substance)", 15 ], [ "task-6-lifespan-(longest-lived)", 41 ], [ "task-6-lifespan-(shortest-lived)", 18 ], [ "task-6-lifespan-(shortest-lived)", 62 ], [ "task-3-find-plant", 32 ], [ "task-10-measure-melting-point-(known-substance)", 160 ], [ "task-3-find-plant", 100 ], [ "task-3-find-animal", 210 ], [ "task-6-lifespan-(longest-lived-then-shortest-lived)", 15 ], [ "task-10-measure-melting-point-(known-substance)", 305 ], [ "task-3-find-plant", 167 ], [ "task-6-lifespan-(shortest-lived)", 37 ], [ "task-10-measure-melting-point-(known-substance)", 243 ], [ "task-6-lifespan-(longest-lived)", 48 ], [ "task-3-find-animal", 164 ], [ "task-3-find-living-thing", 199 ], [ "task-6-lifespan-(longest-lived-then-shortest-lived)", 81 ], [ "task-10-measure-melting-point-(known-substance)", 29 ], [ "task-10-measure-melting-point-(known-substance)", 121 ], [ "task-3-find-animal", 221 ], [ "task-6-lifespan-(shortest-lived)", 52 ], [ "task-3-find-plant", 90 ], [ "task-3-find-living-thing", 34 ], [ "task-1-change-the-state-of-matter-of", 0 ], [ "task-3-find-living-thing", 84 ], [ "task-1-melt", 7 ], [ "task-3-find-animal", 24 ], [ "task-4-grow-fruit", 31 ], [ "task-1-boil", 3 ], [ "task-3-find-living-thing", 141 ], [ "task-7-identify-life-stages-1", 1 ], [ "task-3-find-plant", 123 ], [ "task-4-grow-plant", 0 ], [ "task-4-grow-fruit", 27 ], [ "task-3-find-living-thing", 70 ], [ "task-6-lifespan-(shortest-lived)", 85 ], [ "task-3-find-living-thing", 160 ], [ "task-3-find-non-living-thing", 132 ], [ "task-3-find-non-living-thing", 22 ], [ "task-4-grow-fruit", 36 ], [ "task-7-identify-life-stages-1", 2 ], [ "task-3-find-non-living-thing", 222 ], [ "task-3-find-living-thing", 222 ], [ "task-3-find-animal", 188 ], [ "task-4-grow-plant", 61 ], [ "task-3-find-non-living-thing", 111 ], [ "task-3-find-non-living-thing", 10 ], [ "task-4-grow-plant", 3 ], [ "task-3-find-plant", 128 ], [ "task-5-chemistry-mix-paint-(secondary-color)", 9 ], [ "task-6-lifespan-(longest-lived-then-shortest-lived)", 56 ], [ "task-3-find-non-living-thing", 14 ], [ "task-6-lifespan-(longest-lived-then-shortest-lived)", 48 ], [ "task-3-find-plant", 217 ], [ "task-4-grow-fruit", 46 ], [ "task-6-lifespan-(longest-lived-then-shortest-lived)", 84 ], [ "task-3-find-plant", 106 ], [ "task-3-find-living-thing", 180 ], [ "task-6-lifespan-(shortest-lived)", 64 ], [ "task-3-find-non-living-thing", 0 ], [ "task-3-find-animal", 23 ], [ "task-10-measure-melting-point-(known-substance)", 54 ], [ "task-3-find-living-thing", 77 ], [ "task-3-find-non-living-thing", 150 ], [ "task-3-find-animal", 132 ], [ "task-5-chemistry-mix-paint-(secondary-color)", 1 ], [ "task-10-measure-melting-point-(known-substance)", 13 ], [ "task-10-measure-melting-point-(known-substance)", 238 ], [ "task-10-measure-melting-point-(known-substance)", 0 ], [ "task-10-measure-melting-point-(known-substance)", 113 ], [ "task-3-find-plant", 182 ], [ "task-1-freeze", 8 ], [ "task-3-find-living-thing", 188 ], [ "task-3-find-living-thing", 18 ], [ "task-10-measure-melting-point-(known-substance)", 75 ], [ "task-4-grow-plant", 21 ], [ "task-6-lifespan-(longest-lived)", 51 ], [ "task-3-find-living-thing", 32 ], [ "task-3-find-plant", 176 ], [ "task-6-lifespan-(longest-lived-then-shortest-lived)", 13 ], [ "task-10-measure-melting-point-(known-substance)", 189 ], [ "task-10-measure-melting-point-(known-substance)", 37 ], [ "task-1-freeze", 0 ], [ "task-3-find-non-living-thing", 90 ], [ "task-10-measure-melting-point-(known-substance)", 216 ], [ "task-3-find-non-living-thing", 182 ], [ "task-6-lifespan-(longest-lived-then-shortest-lived)", 83 ], [ "task-3-find-living-thing", 115 ], [ "task-3-find-living-thing", 66 ], [ "task-3-find-plant", 88 ], [ "task-4-grow-plant", 55 ], [ "task-3-find-non-living-thing", 224 ], [ "task-3-find-plant", 117 ], [ "task-10-measure-melting-point-(known-substance)", 195 ], [ "task-3-find-plant", 76 ], [ "task-6-lifespan-(longest-lived)", 1 ], [ "task-10-measure-melting-point-(known-substance)", 77 ], [ "task-10-measure-melting-point-(known-substance)", 174 ], [ "task-3-find-living-thing", 214 ], [ "task-3-find-living-thing", 216 ], [ "task-10-measure-melting-point-(known-substance)", 71 ], [ "task-3-find-plant", 17 ], [ "task-6-lifespan-(shortest-lived)", 91 ], [ "task-6-lifespan-(longest-lived-then-shortest-lived)", 44 ], [ "task-10-measure-melting-point-(known-substance)", 204 ], [ "task-1-boil", 2 ], [ "task-3-find-plant", 208 ], [ "task-10-measure-melting-point-(known-substance)", 11 ], [ "task-3-find-non-living-thing", 219 ], [ "task-10-measure-melting-point-(known-substance)", 93 ], [ "task-6-lifespan-(shortest-lived)", 80 ], [ "task-6-lifespan-(longest-lived-then-shortest-lived)", 62 ], [ "task-3-find-living-thing", 81 ], [ "task-3-find-living-thing", 122 ], [ "task-10-measure-melting-point-(known-substance)", 316 ], [ "task-5-chemistry-mix", 7 ], [ "task-3-find-living-thing", 159 ], [ "task-6-lifespan-(longest-lived)", 14 ], [ "task-3-find-non-living-thing", 139 ], [ "task-3-find-living-thing", 102 ], [ "task-4-grow-plant", 27 ], [ "task-6-lifespan-(longest-lived)", 64 ], [ "task-6-lifespan-(longest-lived-then-shortest-lived)", 51 ], [ "task-4-grow-plant", 36 ], [ "task-3-find-living-thing", 27 ], [ "task-7-identify-life-stages-1", 3 ], [ "task-6-lifespan-(longest-lived)", 37 ], [ "task-3-find-living-thing", 187 ], [ "task-3-find-animal", 153 ], [ "task-4-grow-plant", 85 ], [ "task-3-find-animal", 39 ], [ "task-3-find-animal", 157 ], [ "task-3-find-animal", 77 ], [ "task-3-find-non-living-thing", 33 ], [ "task-4-grow-plant", 45 ], [ "task-3-find-animal", 89 ], [ "task-6-lifespan-(longest-lived)", 32 ], [ "task-3-find-living-thing", 204 ], [ "task-3-find-animal", 161 ], [ "task-3-find-plant", 116 ], [ "task-3-find-non-living-thing", 23 ], [ "task-2-power-component-(renewable-vs-nonrenewable-energy)", 3 ], [ "task-3-find-animal", 165 ], [ "task-6-lifespan-(shortest-lived)", 46 ], [ "task-6-lifespan-(longest-lived)", 58 ], [ "task-4-grow-fruit", 29 ], [ "task-3-find-animal", 170 ], [ "task-3-find-plant", 81 ], [ "task-3-find-non-living-thing", 167 ], [ "task-3-find-animal", 182 ], [ "task-4-grow-plant", 91 ], [ "task-3-find-non-living-thing", 60 ], [ "task-6-lifespan-(shortest-lived)", 31 ], [ "task-3-find-plant", 109 ], [ "task-5-chemistry-mix-paint-(secondary-color)", 11 ], [ "task-3-find-animal", 100 ], [ "task-6-lifespan-(longest-lived)", 91 ], [ "task-6-lifespan-(shortest-lived)", 20 ], [ "task-4-grow-fruit", 12 ], [ "task-3-find-plant", 84 ], [ "task-3-find-living-thing", 0 ], [ "task-5-chemistry-mix-paint-(tertiary-color)", 14 ], [ "task-1-change-the-state-of-matter-of", 7 ], [ "task-4-grow-plant", 42 ], [ "task-3-find-non-living-thing", 64 ], [ "task-3-find-plant", 132 ], [ "task-6-lifespan-(shortest-lived)", 13 ], [ "task-4-grow-plant", 56 ], [ "task-3-find-plant", 99 ], [ "task-6-lifespan-(longest-lived)", 3 ], [ "task-3-find-living-thing", 28 ], [ "task-4-grow-plant", 22 ], [ "task-3-find-living-thing", 201 ], [ "task-4-grow-plant", 44 ], [ "task-3-find-animal", 31 ], [ "task-4-grow-fruit", 87 ], [ "task-3-find-animal", 137 ], [ "task-3-find-non-living-thing", 79 ], [ "task-3-find-living-thing", 119 ], [ "task-3-find-animal", 177 ], [ "task-3-find-living-thing", 219 ], [ "task-6-lifespan-(longest-lived-then-shortest-lived)", 33 ], [ "task-3-find-plant", 66 ], [ "task-3-find-living-thing", 200 ], [ "task-6-lifespan-(longest-lived-then-shortest-lived)", 24 ], [ "task-3-find-living-thing", 7 ], [ "task-6-lifespan-(shortest-lived)", 7 ], [ "task-3-find-plant", 137 ], [ "task-1-melt", 13 ], [ "task-6-lifespan-(longest-lived)", 22 ], [ "task-6-lifespan-(longest-lived-then-shortest-lived)", 25 ], [ "task-3-find-plant", 108 ], [ "task-3-find-non-living-thing", 4 ], [ "task-3-find-living-thing", 183 ], [ "task-3-find-animal", 36 ], [ "task-3-find-non-living-thing", 63 ], [ "task-3-find-non-living-thing", 169 ], [ "task-3-find-living-thing", 76 ], [ "task-6-lifespan-(longest-lived)", 23 ], [ "task-3-find-plant", 104 ], [ "task-3-find-non-living-thing", 103 ], [ "task-3-find-living-thing", 136 ], [ "task-4-grow-plant", 31 ], [ "task-3-find-animal", 75 ], [ "task-6-lifespan-(shortest-lived)", 10 ], [ "task-3-find-non-living-thing", 25 ], [ "task-3-find-plant", 171 ], [ "task-3-find-plant", 34 ], [ "task-2-power-component-(renewable-vs-nonrenewable-energy)", 4 ], [ "task-3-find-animal", 95 ], [ "task-6-lifespan-(longest-lived)", 21 ], [ "task-4-grow-fruit", 47 ], [ "task-3-find-plant", 144 ], [ "task-3-find-non-living-thing", 30 ], [ "task-3-find-living-thing", 217 ], [ "task-3-find-living-thing", 209 ], [ "task-4-grow-fruit", 9 ], [ "task-3-find-non-living-thing", 148 ], [ "task-7-identify-life-stages-2", 3 ], [ "task-3-find-plant", 77 ], [ "task-3-find-living-thing", 176 ], [ "task-3-find-living-thing", 118 ], [ "task-6-lifespan-(longest-lived)", 86 ], [ "task-4-grow-fruit", 0 ], [ "task-3-find-animal", 102 ], [ "task-3-find-living-thing", 91 ], [ "task-6-lifespan-(longest-lived-then-shortest-lived)", 34 ], [ "task-3-find-plant", 50 ], [ "task-3-find-non-living-thing", 45 ], [ "task-4-grow-plant", 84 ], [ "task-3-find-living-thing", 185 ], [ "task-3-find-non-living-thing", 37 ], [ "task-3-find-animal", 3 ], [ "task-3-find-living-thing", 110 ], [ "task-1-boil", 6 ], [ "task-3-find-animal", 49 ], [ "task-3-find-plant", 25 ], [ "task-3-find-plant", 124 ], [ "task-3-find-animal", 167 ], [ "task-6-lifespan-(longest-lived)", 73 ], [ "task-3-find-non-living-thing", 160 ], [ "task-3-find-non-living-thing", 36 ], [ "task-2-power-component-(renewable-vs-nonrenewable-energy)", 9 ], [ "task-3-find-plant", 160 ], [ "task-3-find-living-thing", 186 ], [ "task-3-find-non-living-thing", 215 ], [ "task-4-grow-fruit", 61 ], [ "task-3-find-animal", 40 ], [ "task-3-find-animal", 181 ], [ "task-3-find-non-living-thing", 43 ], [ "task-6-lifespan-(longest-lived-then-shortest-lived)", 3 ], [ "task-3-find-living-thing", 57 ], [ "task-6-lifespan-(shortest-lived)", 66 ], [ "task-3-find-animal", 60 ], [ "task-3-find-non-living-thing", 41 ], [ "task-3-find-living-thing", 177 ], [ "task-3-find-plant", 169 ], [ "task-6-lifespan-(shortest-lived)", 77 ], [ "task-4-grow-plant", 76 ], [ "task-3-find-plant", 3 ], [ "task-3-find-living-thing", 46 ], [ "task-6-lifespan-(longest-lived)", 90 ], [ "task-6-lifespan-(longest-lived-then-shortest-lived)", 63 ], [ "task-3-find-animal", 134 ], [ "task-6-lifespan-(longest-lived-then-shortest-lived)", 36 ], [ "task-6-lifespan-(longest-lived)", 56 ], [ "task-4-grow-plant", 35 ], [ "task-3-find-non-living-thing", 12 ], [ "task-4-grow-fruit", 60 ], [ "task-6-lifespan-(shortest-lived)", 17 ], [ "task-3-find-plant", 206 ], [ "task-6-lifespan-(longest-lived)", 74 ], [ "task-3-find-non-living-thing", 131 ], [ "task-4-grow-fruit", 28 ], [ "task-3-find-animal", 82 ], [ "task-3-find-living-thing", 144 ], [ "task-1-boil", 5 ], [ "task-3-find-animal", 8 ], [ "task-3-find-animal", 104 ], [ "task-3-find-non-living-thing", 202 ], [ "task-3-find-living-thing", 42 ], [ "task-4-grow-fruit", 39 ], [ "task-3-find-non-living-thing", 76 ], [ "task-6-lifespan-(longest-lived)", 55 ], [ "task-3-find-animal", 166 ], [ "task-3-find-animal", 175 ], [ "task-3-find-non-living-thing", 40 ], [ "task-3-find-plant", 218 ], [ "task-3-find-plant", 41 ], [ "task-6-lifespan-(shortest-lived)", 14 ], [ "task-3-find-non-living-thing", 179 ], [ "task-3-find-living-thing", 158 ], [ "task-3-find-plant", 149 ], [ "task-5-chemistry-mix-paint-(secondary-color)", 5 ], [ "task-4-grow-plant", 50 ], [ "task-3-find-living-thing", 71 ], [ "task-3-find-plant", 83 ], [ "task-3-find-plant", 86 ], [ "task-3-find-plant", 92 ], [ "task-3-find-animal", 206 ], [ "task-6-lifespan-(longest-lived)", 49 ], [ "task-6-lifespan-(longest-lived-then-shortest-lived)", 32 ], [ "task-3-find-animal", 216 ], [ "task-3-find-animal", 203 ], [ "task-4-grow-plant", 16 ], [ "task-6-lifespan-(longest-lived-then-shortest-lived)", 65 ], [ "task-5-chemistry-mix-paint-(tertiary-color)", 7 ], [ "task-3-find-non-living-thing", 181 ], [ "task-3-find-plant", 18 ], [ "task-3-find-living-thing", 19 ], [ "task-3-find-non-living-thing", 206 ], [ "task-3-find-plant", 165 ], [ "task-3-find-non-living-thing", 55 ], [ "task-3-find-plant", 39 ], [ "task-4-grow-plant", 86 ], [ "task-3-find-non-living-thing", 21 ], [ "task-6-lifespan-(shortest-lived)", 50 ], [ "task-3-find-non-living-thing", 97 ], [ "task-4-grow-fruit", 13 ], [ "task-6-lifespan-(shortest-lived)", 90 ], [ "task-6-lifespan-(longest-lived)", 70 ], [ "task-1-freeze", 7 ], [ "task-6-lifespan-(longest-lived-then-shortest-lived)", 77 ], [ "task-3-find-animal", 2 ], [ "task-3-find-non-living-thing", 48 ], [ "task-3-find-living-thing", 142 ], [ "task-3-find-non-living-thing", 93 ], [ "task-3-find-non-living-thing", 27 ], [ "task-3-find-animal", 193 ], [ "task-3-find-living-thing", 47 ], [ "task-6-lifespan-(longest-lived-then-shortest-lived)", 72 ], [ "task-3-find-plant", 114 ], [ "task-6-lifespan-(longest-lived)", 24 ], [ "task-3-find-living-thing", 69 ], [ "task-3-find-plant", 203 ], [ "task-3-find-non-living-thing", 53 ], [ "task-3-find-animal", 21 ], [ "task-6-lifespan-(shortest-lived)", 39 ], [ "task-3-find-non-living-thing", 56 ], [ "task-6-lifespan-(shortest-lived)", 72 ], [ "task-6-lifespan-(shortest-lived)", 60 ], [ "task-3-find-animal", 222 ], [ "task-4-grow-plant", 28 ], [ "task-3-find-animal", 218 ], [ "task-3-find-living-thing", 107 ], [ "task-1-change-the-state-of-matter-of", 10 ], [ "task-3-find-non-living-thing", 153 ], [ "task-1-change-the-state-of-matter-of", 2 ], [ "task-3-find-plant", 147 ], [ "task-4-grow-fruit", 83 ], [ "task-1-melt", 5 ], [ "task-3-find-non-living-thing", 172 ], [ "task-3-find-plant", 16 ], [ "task-3-find-animal", 138 ], [ "task-3-find-non-living-thing", 68 ], [ "task-3-find-non-living-thing", 26 ], [ "task-3-find-living-thing", 133 ], [ "task-3-find-plant", 87 ], [ "task-4-grow-fruit", 33 ], [ "task-3-find-animal", 180 ], [ "task-3-find-non-living-thing", 5 ], [ "task-3-find-non-living-thing", 107 ], [ "task-3-find-living-thing", 152 ], [ "task-3-find-plant", 33 ], [ "task-1-change-the-state-of-matter-of", 4 ], [ "task-4-grow-fruit", 66 ], [ "task-3-find-living-thing", 203 ], [ "task-3-find-living-thing", 169 ], [ "task-2-power-component", 9 ], [ "task-6-lifespan-(shortest-lived)", 28 ], [ "task-3-find-plant", 214 ], [ "task-4-grow-fruit", 10 ], [ "task-3-find-living-thing", 150 ], [ "task-4-grow-fruit", 17 ], [ "task-6-lifespan-(longest-lived)", 44 ], [ "task-4-grow-fruit", 73 ], [ "task-3-find-living-thing", 6 ], [ "task-3-find-animal", 47 ], [ "task-3-find-animal", 96 ], [ "task-3-find-animal", 140 ], [ "task-3-find-living-thing", 63 ], [ "task-6-lifespan-(shortest-lived)", 67 ], [ "task-3-find-animal", 73 ], [ "task-3-find-living-thing", 154 ], [ "task-6-lifespan-(shortest-lived)", 54 ], [ "task-6-lifespan-(shortest-lived)", 76 ], [ "task-4-grow-plant", 8 ], [ "task-6-lifespan-(shortest-lived)", 79 ], [ "task-4-grow-fruit", 35 ], [ "task-3-find-living-thing", 98 ], [ "task-6-lifespan-(longest-lived)", 46 ], [ "task-4-grow-fruit", 72 ], [ "task-6-lifespan-(longest-lived)", 6 ], [ "task-4-grow-fruit", 14 ], [ "task-6-lifespan-(shortest-lived)", 27 ], [ "task-6-lifespan-(longest-lived)", 34 ], [ "task-3-find-living-thing", 132 ], [ "task-4-grow-fruit", 16 ], [ "task-5-chemistry-mix-paint-(secondary-color)", 6 ], [ "task-3-find-living-thing", 72 ], [ "task-5-chemistry-mix-paint-(tertiary-color)", 4 ], [ "task-4-grow-fruit", 69 ], [ "task-1-melt", 4 ], [ "task-6-lifespan-(longest-lived)", 42 ], [ "task-6-lifespan-(shortest-lived)", 63 ], [ "task-6-lifespan-(longest-lived)", 83 ], [ "task-6-lifespan-(longest-lived-then-shortest-lived)", 31 ], [ "task-6-lifespan-(longest-lived-then-shortest-lived)", 26 ], [ "task-6-lifespan-(longest-lived)", 84 ], [ "task-5-chemistry-mix", 2 ], [ "task-4-grow-plant", 17 ], [ "task-4-grow-fruit", 5 ], [ "task-4-grow-plant", 80 ], [ "task-5-chemistry-mix", 5 ], [ "task-6-lifespan-(longest-lived)", 92 ], [ "task-6-lifespan-(shortest-lived)", 8 ], [ "task-5-chemistry-mix", 1 ], [ "task-1-change-the-state-of-matter-of", 5 ], [ "task-6-lifespan-(longest-lived-then-shortest-lived)", 0 ], [ "task-6-lifespan-(longest-lived-then-shortest-lived)", 61 ], [ "task-5-chemistry-mix-paint-(tertiary-color)", 2 ], [ "task-4-grow-plant", 89 ], [ "task-4-grow-plant", 19 ], [ "task-4-grow-fruit", 43 ], [ "task-6-lifespan-(longest-lived-then-shortest-lived)", 76 ], [ "task-4-grow-fruit", 68 ], [ "task-6-lifespan-(longest-lived-then-shortest-lived)", 67 ], [ "task-4-grow-fruit", 64 ], [ "task-1-freeze", 6 ], [ "task-6-lifespan-(shortest-lived)", 48 ], [ "task-5-chemistry-mix-paint-(secondary-color)", 16 ], [ "task-5-chemistry-mix-paint-(secondary-color)", 10 ], [ "task-4-grow-plant", 58 ], [ "task-4-grow-plant", 70 ], [ "task-5-chemistry-mix-paint-(tertiary-color)", 17 ], [ "task-6-lifespan-(shortest-lived)", 0 ], [ "task-4-grow-plant", 92 ], [ "task-6-lifespan-(shortest-lived)", 24 ], [ "task-4-grow-plant", 74 ], [ "task-4-grow-fruit", 57 ], [ "task-6-lifespan-(longest-lived-then-shortest-lived)", 7 ], [ "task-6-lifespan-(longest-lived-then-shortest-lived)", 35 ], [ "task-4-grow-plant", 39 ], [ "task-1-freeze", 11 ], [ "task-6-lifespan-(longest-lived)", 43 ], [ "task-4-grow-fruit", 24 ], [ "task-4-grow-plant", 13 ], [ "task-4-grow-plant", 67 ], [ "task-4-grow-plant", 72 ], [ "task-4-grow-fruit", 80 ], [ "task-6-lifespan-(shortest-lived)", 49 ], [ "task-1-freeze", 10 ], [ "task-4-grow-plant", 73 ], [ "task-1-freeze", 1 ], [ "task-5-chemistry-mix-paint-(tertiary-color)", 1 ], [ "task-4-grow-plant", 49 ], [ "task-6-lifespan-(longest-lived)", 45 ], [ "task-4-grow-plant", 52 ], [ "task-4-grow-plant", 18 ], [ "task-1-freeze", 12 ], [ "task-4-grow-plant", 83 ], [ "task-6-lifespan-(shortest-lived)", 45 ], [ "task-4-grow-fruit", 50 ], [ "task-4-grow-fruit", 18 ], [ "task-5-chemistry-mix-paint-(secondary-color)", 13 ], [ "task-4-grow-plant", 90 ], [ "task-5-chemistry-mix-paint-(tertiary-color)", 11 ], [ "task-4-grow-fruit", 19 ], [ "task-5-chemistry-mix-paint-(tertiary-color)", 12 ], [ "task-7-identify-life-stages-1", 5 ], [ "task-5-chemistry-mix", 11 ], [ "task-1-boil", 0 ], [ "task-1-boil", 1 ], [ "task-1-melt", 1 ], [ "task-6-lifespan-(longest-lived-then-shortest-lived)", 46 ], [ "task-6-lifespan-(longest-lived-then-shortest-lived)", 10 ], [ "task-6-lifespan-(shortest-lived)", 5 ], [ "task-5-chemistry-mix", 15 ], [ "task-6-lifespan-(longest-lived)", 77 ], [ "task-6-lifespan-(longest-lived)", 7 ], [ "task-4-grow-plant", 30 ], [ "task-6-lifespan-(longest-lived)", 29 ], [ "task-4-grow-plant", 2 ], [ "task-4-grow-plant", 24 ], [ "task-4-grow-plant", 20 ], [ "task-6-lifespan-(shortest-lived)", 3 ], [ "task-6-lifespan-(longest-lived-then-shortest-lived)", 23 ], [ "task-4-grow-plant", 10 ], [ "task-6-lifespan-(longest-lived)", 31 ], [ "task-6-lifespan-(longest-lived-then-shortest-lived)", 55 ], [ "task-4-grow-plant", 59 ], [ "task-2-power-component", 0 ], [ "task-6-lifespan-(longest-lived-then-shortest-lived)", 6 ], [ "task-6-lifespan-(longest-lived-then-shortest-lived)", 91 ], [ "task-2-power-component-(renewable-vs-nonrenewable-energy)", 5 ], [ "task-2-power-component", 6 ], [ "task-6-lifespan-(longest-lived)", 35 ], [ "task-6-lifespan-(longest-lived)", 50 ], [ "task-6-lifespan-(longest-lived)", 75 ], [ "task-6-lifespan-(longest-lived)", 5 ], [ "task-6-lifespan-(longest-lived-then-shortest-lived)", 37 ], [ "task-7-identify-life-stages-1", 0 ], [ "task-4-grow-plant", 47 ], [ "task-4-grow-plant", 81 ], [ "task-6-lifespan-(longest-lived)", 19 ], [ "task-1-change-the-state-of-matter-of", 12 ], [ "task-6-lifespan-(longest-lived)", 87 ], [ "task-6-lifespan-(longest-lived)", 4 ], [ "task-4-grow-plant", 37 ], [ "task-6-lifespan-(longest-lived)", 12 ], [ "task-4-grow-plant", 46 ], [ "task-6-lifespan-(shortest-lived)", 88 ], [ "task-6-lifespan-(shortest-lived)", 6 ], [ "task-4-grow-fruit", 49 ], [ "task-4-grow-fruit", 92 ], [ "task-6-lifespan-(longest-lived-then-shortest-lived)", 14 ], [ "task-5-chemistry-mix-paint-(secondary-color)", 2 ], [ "task-1-change-the-state-of-matter-of", 13 ], [ "task-4-grow-fruit", 78 ], [ "task-6-lifespan-(longest-lived-then-shortest-lived)", 21 ], [ "task-6-lifespan-(longest-lived)", 9 ], [ "task-4-grow-fruit", 8 ], [ "task-4-grow-plant", 57 ], [ "task-6-lifespan-(longest-lived)", 53 ], [ "task-4-grow-fruit", 48 ], [ "task-4-grow-plant", 78 ], [ "task-4-grow-plant", 66 ], [ "task-6-lifespan-(longest-lived-then-shortest-lived)", 71 ], [ "task-6-lifespan-(longest-lived)", 67 ], [ "task-6-lifespan-(shortest-lived)", 11 ], [ "task-4-grow-fruit", 90 ], [ "task-6-lifespan-(shortest-lived)", 9 ], [ "task-2-power-component-(renewable-vs-nonrenewable-energy)", 0 ], [ "task-6-lifespan-(longest-lived)", 72 ], [ "task-2-power-component", 2 ], [ "task-6-lifespan-(longest-lived)", 39 ], [ "task-5-chemistry-mix", 12 ], [ "task-6-lifespan-(longest-lived)", 79 ], [ "task-4-grow-plant", 15 ], [ "task-6-lifespan-(longest-lived-then-shortest-lived)", 12 ], [ "task-6-lifespan-(longest-lived)", 13 ], [ "task-6-lifespan-(shortest-lived)", 25 ], [ "task-6-lifespan-(shortest-lived)", 83 ], [ "task-4-grow-plant", 75 ], [ "task-4-grow-plant", 62 ], [ "task-4-grow-fruit", 76 ], [ "task-6-lifespan-(longest-lived)", 33 ], [ "task-2-power-component", 7 ], [ "task-5-chemistry-mix-paint-(secondary-color)", 17 ], [ "task-1-freeze", 3 ], [ "task-6-lifespan-(longest-lived)", 47 ], [ "task-6-lifespan-(longest-lived)", 38 ], [ "task-4-grow-fruit", 34 ], [ "task-6-lifespan-(longest-lived)", 17 ], [ "task-5-chemistry-mix-paint-(secondary-color)", 7 ], [ "task-6-lifespan-(shortest-lived)", 82 ], [ "task-6-lifespan-(shortest-lived)", 56 ], [ "task-1-change-the-state-of-matter-of", 3 ], [ "task-4-grow-plant", 64 ], [ "task-4-grow-fruit", 21 ], [ "task-6-lifespan-(shortest-lived)", 33 ], [ "task-1-melt", 11 ], [ "task-6-lifespan-(longest-lived-then-shortest-lived)", 42 ], [ "task-5-chemistry-mix-paint-(tertiary-color)", 16 ], [ "task-6-lifespan-(longest-lived)", 16 ], [ "task-6-lifespan-(longest-lived-then-shortest-lived)", 18 ], [ "task-6-lifespan-(longest-lived-then-shortest-lived)", 29 ], [ "task-6-lifespan-(shortest-lived)", 53 ], [ "task-6-lifespan-(longest-lived-then-shortest-lived)", 20 ], [ "task-6-lifespan-(longest-lived)", 60 ], [ "task-4-grow-plant", 34 ], [ "task-6-lifespan-(longest-lived-then-shortest-lived)", 58 ], [ "task-6-lifespan-(shortest-lived)", 43 ], [ "task-6-lifespan-(longest-lived-then-shortest-lived)", 4 ], [ "task-6-lifespan-(shortest-lived)", 32 ], [ "task-6-lifespan-(shortest-lived)", 16 ], [ "task-6-lifespan-(shortest-lived)", 51 ], [ "task-4-grow-fruit", 15 ], [ "task-1-change-the-state-of-matter-of", 9 ], [ "task-6-lifespan-(longest-lived-then-shortest-lived)", 8 ], [ "task-4-grow-fruit", 91 ], [ "task-4-grow-plant", 12 ], [ "task-1-boil", 10 ], [ "task-6-lifespan-(shortest-lived)", 86 ], [ "task-6-lifespan-(longest-lived)", 78 ], [ "task-6-lifespan-(shortest-lived)", 58 ], [ "task-1-boil", 12 ], [ "task-6-lifespan-(shortest-lived)", 75 ], [ "task-6-lifespan-(longest-lived-then-shortest-lived)", 74 ], [ "task-4-grow-plant", 23 ], [ "task-6-lifespan-(longest-lived)", 18 ], [ "task-4-grow-fruit", 45 ], [ "task-4-grow-plant", 32 ], [ "task-1-melt", 9 ], [ "task-2-power-component-(renewable-vs-nonrenewable-energy)", 2 ], [ "task-6-lifespan-(shortest-lived)", 26 ], [ "task-5-chemistry-mix", 14 ], [ "task-6-lifespan-(longest-lived-then-shortest-lived)", 53 ], [ "task-1-melt", 8 ], [ "task-2-power-component", 8 ], [ "task-4-grow-plant", 4 ], [ "task-1-freeze", 13 ], [ "task-6-lifespan-(shortest-lived)", 59 ], [ "task-6-lifespan-(longest-lived-then-shortest-lived)", 1 ], [ "task-6-lifespan-(longest-lived)", 30 ], [ "task-6-lifespan-(longest-lived)", 63 ], [ "task-6-lifespan-(longest-lived)", 59 ], [ "task-5-chemistry-mix", 6 ], [ "task-1-melt", 6 ] ] ================================================ FILE: envs/sciworld/data/valid_indices.json ================================================ [ [ "task-2a-test-conductivity-of-unknown-substances", 418 ], [ "task-3-find-living-thing", 186 ], [ "task-10-measure-melting-point-(known-substance)", 326 ], [ "task-3-find-animal", 193 ], [ "task-4-grow-plant", 65 ], [ "task-3-find-plant", 179 ], [ "task-2a-test-conductivity", 587 ], [ "task-3-find-animal", 218 ], [ "task-10-use-thermometer", 384 ], [ "task-2a-test-conductivity", 479 ], [ "task-3-find-animal", 159 ], [ "task-2a-test-conductivity", 528 ], [ "task-2a-test-conductivity-of-unknown-substances", 413 ], [ "task-2a-test-conductivity-of-unknown-substances", 352 ], [ "task-10-use-thermometer", 385 ], [ "task-6-lifespan-(shortest-lived)", 69 ], [ "task-3-find-animal", 211 ], [ "task-2a-test-conductivity-of-unknown-substances", 313 ], [ "task-3-find-plant", 175 ], [ "task-2a-test-conductivity", 474 ], [ "task-2a-test-conductivity", 662 ], [ "task-2a-test-conductivity-of-unknown-substances", 386 ], [ "task-1-melt", 14 ], [ "task-3-find-plant", 209 ], [ "task-2a-test-conductivity", 579 ], [ "task-10-use-thermometer", 314 ], [ "task-1-boil", 14 ], [ "task-10-measure-melting-point-(known-substance)", 280 ], [ "task-10-use-thermometer", 378 ], [ "task-2a-test-conductivity", 655 ], [ "task-3-find-living-thing", 161 ], [ "task-2a-test-conductivity-of-unknown-substances", 448 ], [ "task-10-use-thermometer", 360 ], [ "task-2a-test-conductivity-of-unknown-substances", 389 ], [ "task-10-measure-melting-point-(known-substance)", 287 ], [ "task-10-use-thermometer", 364 ], [ "task-2a-test-conductivity", 640 ], [ "task-2a-test-conductivity-of-unknown-substances", 409 ], [ "task-2a-test-conductivity", 523 ], [ "task-5-chemistry-mix-paint-(tertiary-color)", 25 ], [ "task-10-use-thermometer", 284 ], [ "task-2-power-component-(renewable-vs-nonrenewable-energy)", 14 ], [ "task-2a-test-conductivity", 485 ], [ "task-10-measure-melting-point-(known-substance)", 286 ], [ "task-3-find-living-thing", 180 ], [ "task-4-grow-fruit", 85 ], [ "task-3-find-plant", 177 ], [ "task-3-find-living-thing", 209 ], [ "task-2a-test-conductivity-of-unknown-substances", 394 ], [ "task-1-melt", 18 ], [ "task-3-find-animal", 173 ], [ "task-10-measure-melting-point-(known-substance)", 281 ], [ "task-3-find-animal", 221 ], [ "task-1-change-the-state-of-matter-of", 14 ], [ "task-1-melt", 16 ], [ "task-3-find-living-thing", 216 ], [ "task-4-grow-fruit", 90 ], [ "task-2a-test-conductivity-of-unknown-substances", 376 ], [ "task-6-lifespan-(longest-lived-then-shortest-lived)", 87 ], [ "task-3-find-animal", 213 ], [ "task-10-measure-melting-point-(known-substance)", 300 ], [ "task-3-find-plant", 203 ], [ "task-10-use-thermometer", 285 ], [ "task-3-find-animal", 216 ], [ "task-3-find-animal", 188 ], [ "task-3-find-animal", 176 ], [ "task-3-find-plant", 206 ], [ "task-4-grow-plant", 77 ], [ "task-10-measure-melting-point-(known-substance)", 253 ], [ "task-6-lifespan-(shortest-lived)", 87 ], [ "task-10-measure-melting-point-(known-substance)", 299 ], [ "task-10-use-thermometer", 358 ], [ "task-10-measure-melting-point-(known-substance)", 292 ], [ "task-10-use-thermometer", 291 ], [ "task-3-find-living-thing", 214 ], [ "task-6-lifespan-(longest-lived-then-shortest-lived)", 67 ], [ "task-1-change-the-state-of-matter-of", 16 ], [ "task-6-lifespan-(longest-lived)", 83 ], [ "task-3-find-living-thing", 221 ], [ "task-10-measure-melting-point-(known-substance)", 325 ], [ "task-6-lifespan-(shortest-lived)", 91 ], [ "task-6-lifespan-(shortest-lived)", 84 ], [ "task-3-find-non-living-thing", 150 ], [ "task-4-grow-plant", 84 ], [ "task-4-grow-plant", 68 ], [ "task-3-find-plant", 181 ], [ "task-6-lifespan-(shortest-lived)", 76 ], [ "task-3-find-living-thing", 202 ], [ "task-7-identify-life-stages-1", 7 ], [ "task-6-lifespan-(longest-lived)", 88 ], [ "task-3-find-living-thing", 206 ], [ "task-3-find-non-living-thing", 184 ], [ "task-6-lifespan-(longest-lived-then-shortest-lived)", 68 ], [ "task-3-find-living-thing", 218 ], [ "task-3-find-non-living-thing", 187 ], [ "task-1-boil", 15 ], [ "task-6-lifespan-(longest-lived)", 84 ], [ "task-6-lifespan-(longest-lived)", 66 ], [ "task-3-find-non-living-thing", 214 ], [ "task-5-chemistry-mix-paint-(secondary-color)", 25 ], [ "task-2-power-component", 11 ], [ "task-3-find-plant", 164 ], [ "task-6-lifespan-(longest-lived-then-shortest-lived)", 66 ], [ "task-3-find-plant", 154 ], [ "task-3-find-non-living-thing", 198 ], [ "task-6-lifespan-(longest-lived)", 68 ], [ "task-3-find-plant", 165 ], [ "task-3-find-non-living-thing", 209 ], [ "task-3-find-non-living-thing", 185 ], [ "task-1-boil", 20 ], [ "task-5-chemistry-mix-paint-(tertiary-color)", 21 ], [ "task-6-lifespan-(shortest-lived)", 80 ], [ "task-6-lifespan-(longest-lived-then-shortest-lived)", 64 ], [ "task-3-find-non-living-thing", 179 ], [ "task-6-lifespan-(longest-lived-then-shortest-lived)", 78 ], [ "task-3-find-non-living-thing", 192 ], [ "task-1-boil", 17 ], [ "task-1-melt", 20 ], [ "task-4-grow-fruit", 72 ], [ "task-6-lifespan-(longest-lived-then-shortest-lived)", 69 ], [ "task-3-find-non-living-thing", 158 ], [ "task-4-grow-plant", 64 ], [ "task-4-grow-fruit", 88 ], [ "task-1-change-the-state-of-matter-of", 17 ], [ "task-6-lifespan-(longest-lived-then-shortest-lived)", 80 ], [ "task-4-grow-plant", 85 ], [ "task-6-lifespan-(longest-lived)", 80 ], [ "task-6-lifespan-(shortest-lived)", 82 ], [ "task-4-grow-plant", 86 ], [ "task-1-change-the-state-of-matter-of", 18 ], [ "task-5-chemistry-mix-paint-(tertiary-color)", 26 ], [ "task-4-grow-plant", 78 ], [ "task-1-melt", 19 ], [ "task-5-chemistry-mix", 21 ], [ "task-4-grow-fruit", 65 ], [ "task-6-lifespan-(longest-lived-then-shortest-lived)", 72 ], [ "task-5-chemistry-mix-paint-(secondary-color)", 20 ], [ "task-4-grow-plant", 88 ], [ "task-1-melt", 15 ], [ "task-6-lifespan-(longest-lived-then-shortest-lived)", 85 ], [ "task-6-lifespan-(longest-lived)", 78 ], [ "task-4-grow-plant", 87 ], [ "task-5-chemistry-mix-paint-(tertiary-color)", 23 ], [ "task-6-lifespan-(shortest-lived)", 70 ], [ "task-6-lifespan-(shortest-lived)", 85 ], [ "task-6-lifespan-(longest-lived)", 72 ], [ "task-5-chemistry-mix", 19 ], [ "task-5-chemistry-mix-paint-(secondary-color)", 18 ], [ "task-6-lifespan-(longest-lived)", 73 ], [ "task-1-change-the-state-of-matter-of", 15 ], [ "task-4-grow-fruit", 84 ], [ "task-1-change-the-state-of-matter-of", 19 ], [ "task-6-lifespan-(longest-lived)", 92 ], [ "task-5-chemistry-mix-paint-(secondary-color)", 22 ], [ "task-4-grow-fruit", 80 ], [ "task-4-grow-fruit", 71 ], [ "task-7-identify-life-stages-2", 5 ], [ "task-7-identify-life-stages-2", 4 ], [ "task-1-freeze", 19 ], [ "task-6-lifespan-(shortest-lived)", 71 ], [ "task-2-power-component-(renewable-vs-nonrenewable-energy)", 10 ], [ "task-1-freeze", 15 ], [ "task-4-grow-fruit", 62 ], [ "task-5-chemistry-mix-paint-(secondary-color)", 26 ], [ "task-1-freeze", 18 ], [ "task-7-identify-life-stages-1", 8 ], [ "task-4-grow-fruit", 68 ], [ "task-7-identify-life-stages-1", 6 ], [ "task-5-chemistry-mix", 18 ], [ "task-1-freeze", 16 ], [ "task-5-chemistry-mix-paint-(secondary-color)", 23 ], [ "task-5-chemistry-mix-paint-(tertiary-color)", 24 ], [ "task-1-melt", 17 ], [ "task-5-chemistry-mix-paint-(tertiary-color)", 22 ], [ "task-1-boil", 16 ], [ "task-5-chemistry-mix-paint-(tertiary-color)", 19 ], [ "task-5-chemistry-mix-paint-(tertiary-color)", 18 ], [ "task-2-power-component", 13 ], [ "task-5-chemistry-mix", 23 ], [ "task-2-power-component", 12 ], [ "task-5-chemistry-mix-paint-(secondary-color)", 19 ], [ "task-2-power-component-(renewable-vs-nonrenewable-energy)", 13 ], [ "task-1-freeze", 17 ], [ "task-5-chemistry-mix-paint-(secondary-color)", 21 ], [ "task-1-boil", 19 ], [ "task-1-freeze", 14 ], [ "task-5-chemistry-mix", 20 ], [ "task-5-chemistry-mix-paint-(secondary-color)", 24 ], [ "task-1-freeze", 20 ], [ "task-1-change-the-state-of-matter-of", 20 ], [ "task-5-chemistry-mix", 17 ], [ "task-5-chemistry-mix-paint-(tertiary-color)", 20 ], [ "task-5-chemistry-mix", 16 ], [ "task-5-chemistry-mix", 22 ] ] ================================================ FILE: envs/sciworld/env.py ================================================ import contextlib import json import os import random from pathlib import Path from typing import Any, Dict, List, Optional, Union import numpy as np import yaml from base.environment import Env from utils.errors import StepLimitError class SciWorldEnv(Env): """ An environment wrapper for ScienceWorld to conform to the base `Env` interface. """ env_name = "sciworld" # Shared cache per data_root_dir to avoid repeated file reads across instances _shared_cache: Dict[str, Dict[str, Any]] = {} def __init__( self, config_path: str = "envs/sciworld/base_config.yaml", simplification: str = "easy", logger: Optional[Any] = None, ) -> None: self.simplification = simplification self.logger = logger self.config = {} if config_path and Path(config_path).exists(): with open(config_path, 'r') as f: self.config = yaml.safe_load(f) or {} self.data_root_dir = Path(self.config['data_root_dir']) root_key = str(self.data_root_dir.resolve()) cache = SciWorldEnv._shared_cache.get(root_key) if cache is None: cache = { "taskname2id": json.load(open(self.data_root_dir / "taskname2id.json")), "max_steps": json.load(open(self.data_root_dir / "max_steps.json")), "indices_by_split": {}, } SciWorldEnv._shared_cache[root_key] = cache self.taskname2id = cache["taskname2id"] self.max_steps_dict = cache["max_steps"] def _initialize(self) -> None: """Initialize the ScienceWorld environment.""" if self.logger: self.logger.info("Initializing ScienceWorld environment") try: import scienceworld except ImportError as e: raise ImportError( "The 'scienceworld' library is required to use SciWorldEnv. " "Please install it with 'pip install scienceworld'." ) from e # Suppress verbose output from the ScienceWorld library with open(os.devnull, "w") as devnull, contextlib.redirect_stdout(devnull): self.env = scienceworld.ScienceWorldEnv( serverPath=None, envStepLimit=np.inf ) if self.logger: self.logger.info("ScienceWorld environment initialized successfully") def _load_indices(self, split: str, seed: int) -> List[int]: """Load the indices for the given split (cached across instances).""" root_key = str(self.data_root_dir.resolve()) cache = SciWorldEnv._shared_cache.get(root_key) if cache is None: cache = { "taskname2id": getattr(self, "taskname2id", {}), "max_steps": getattr(self, "max_steps_dict", {}), "indices_by_split": {}, } SciWorldEnv._shared_cache[root_key] = cache indices_cache = cache.setdefault("indices_by_split", {}) if split not in indices_cache: if split == "validation": split = "valid" with open(self.data_root_dir / f"{split}_indices.json", "r") as f: indices_cache[split] = json.load(f) random.seed(seed) random.shuffle(indices_cache[split]) return indices_cache[split] def reset(self, running_config: dict, id: Optional[str] = None) -> dict: """ Reset environment to its initial state. The `id` can be used to set the variation for the task. """ self._initialize() if self.env is None: raise RuntimeError("Environment could not be initialized.") self.split = running_config.get("split", "train") seed = running_config.get("seed", 42) self.indices = self._load_indices(self.split, seed) id_int: Optional[int] = None if id is not None: try: id_int = int(id) except ValueError: raise ValueError(f"Task ID '{id}' is not a valid integer.") if not 0 <= id_int < len(self.indices): raise ValueError( f"Task ID {id_int} is out of valid range (0-{len(self.indices) - 1})." ) self.task_name, self.variation = self.indices[id_int] self.id = f"{self.taskname2id[self.task_name]}_{self.variation}" self.max_steps = self.max_steps_dict[self.task_name] self.env.load(self.task_name, self.variation, self.simplification, generateGoldPath=False) obs, info = self.env.reset() self._step_count = 0 self._done = False self._success = False self._reward = 0.0 task_description = info.get("taskDesc", "No task description found.") observation = f"{task_description}\n{obs}" return {"observations": [observation], "env_name": self.env_name, "env": self} async def _run(self, single_action: str) -> str: """Execute one action against the underlying ScienceWorld env.""" if not single_action: return "" # Early exit if already terminated if self._done: return "The environment has already terminated." if single_action.strip().lower() == "[finish]": self._done = True return "You have finished the task." if self._success else "Task failed." # Increment step counter before executing self._step_count += 1 if self.max_steps and self._step_count > self.max_steps: self._done = True raise StepLimitError(f"Step limit of {self.max_steps} exceeded.") try: obs, _, done, info = self.env.step(single_action) self.logger.info(f"[Score] {info['score']}") # self.logger.info(f"[Info]: {info}") self._reward = info['score'] if info['score'] is not None and info['score'] > self._reward else self._reward self._done = done if info['score'] > 0 and done: self._success = True return obs except Exception as e: if self.logger: self.logger.error(f"Error executing action '{single_action}': {e}") self._done = True self._success = False raise def is_done(self) -> bool: """Check if the environment is done.""" return self._done # Provide current step count for external callers (e.g., run.py) def get_step_count(self) -> int: return self._step_count def is_success(self) -> bool: """Check if the task was successfully completed.""" return self._success def report(self): return { "success": self._success, "step": self._step_count, "reward": self._reward, "task_type": self.task_name, } async def close(self) -> None: """Close the ScienceWorld environment and clean up resources.""" if self.logger: self.logger.info("Closing ScienceWorld environment") try: # Clean up the ScienceWorld environment if it exists if hasattr(self, 'env') and self.env is not None: # ScienceWorld doesn't have an explicit close method, but we can clean up references self.env = None # Reset state variables self._step_count = 0 self._done = False self._success = False self._reward = 0.0 if self.logger: self.logger.info("ScienceWorld environment closed successfully") except Exception as e: if self.logger: self.logger.error(f"Error closing ScienceWorld environment: {e}") raise ================================================ FILE: envs/webshop/env.py ================================================ import warnings warnings.filterwarnings("ignore", "The 'text' argument to find\\(\\)-type methods is deprecated", category=DeprecationWarning) from typing import Any, Optional from bs4 import BeautifulSoup from bs4.element import Comment from utils.common import read_json_file from base.environment import Env from envs.webshop.src.webshop.web_agent_site.envs import WebAgentTextEnv from envs.webshop.src.webshop.web_agent_site.utils import DEFAULT_FILE_PATH from envs.webshop.src.webshop.web_agent_site.envs.web_agent_text_env import SimServer _SHARED_SERVER = None def clean_str(p): """Clean string encoding issues""" return p.encode().decode("unicode-escape").encode("latin1").decode("utf-8") def tag_visible(element): """Check if HTML element should be visible in text conversion""" ignore = {'style', 'script', 'head', 'title', 'meta', '[document]'} return ( element.parent.name not in ignore and not isinstance(element, Comment) ) def webshop_text(html_content, max_products=10): """Convert WebShop HTML to text with proper formatting. Args: html_content: HTML content to parse max_products: Maximum number of products to display (default: 10) """ try: # Parse HTML content html_obj = BeautifulSoup(html_content, 'html.parser') texts = html_obj.find_all(string=True) visible_texts = list(filter(tag_visible, texts)) # Format text observation = '' option_type = '' option_types = {} asins = [] cnt = 0 prod_cnt = 0 just_prod = 0 for t in visible_texts: if t == '\n': continue if t.replace('\n', '').replace('\\n', '').replace(' ', '') == '': continue if t.parent.name == 'button': # button processed_t = f'\n[{t}] ' elif t.parent.name == 'label': # options processed_t = f'[{t}]' option_types[str(t)] = option_type elif t.parent.get('class') == ["product-link"]: # product asins processed_t = f'\n[{t}] ' if prod_cnt >= max_products: processed_t = '' prod_cnt += 1 asins.append(str(t)) just_prod = 0 else: # regular, unclickable text processed_t = '\n' + str(t) + ' ' if cnt < 2: processed_t = '' if just_prod <= 2 and prod_cnt >= max_products + 1: processed_t = '' option_type = str(t) cnt += 1 just_prod += 1 observation += processed_t # Build info dict info = {} if option_types: info['option_types'] = option_types if asins: info['asins'] = asins if 'Your score (min 0.0, max 1.0)' in visible_texts: idx = visible_texts.index('Your score (min 0.0, max 1.0)') info['reward'] = float(visible_texts[idx + 1]) observation = 'Your score (min 0.0, max 1.0): ' + str(visible_texts[idx + 1]) return clean_str(observation), info except Exception as e: # Fallback to basic format if parsing fails return f"HTML parsing error: {str(e)}", {} # def _read_first_non_ws_char(file_path: Path) -> Optional[str]: # try: # with open(file_path, 'r', encoding='utf-8') as f: # chunk = f.read(2048) # for ch in chunk: # if not ch.isspace(): # return ch # except Exception: # return None # return None def _get_shared_server( file_path: Optional[str], num_products: Optional[int], human_goals: bool, limit_goals: int = -1, quiet: bool = False, ): global _SHARED_SERVER if _SHARED_SERVER is None: _SHARED_SERVER = SimServer( base_url='http://127.0.0.1:3000', file_path=file_path, filter_goals=None, limit_goals=limit_goals, num_products=num_products, human_goals=human_goals, show_attrs=False, quiet=quiet, ) return _SHARED_SERVER class WebShopEnv(Env): """WebShop Environment for agent interaction.""" env_name = "webshop" def __init__( self, logger: Optional[Any] = None, max_steps: int = 30, file_path: Optional[str] = DEFAULT_FILE_PATH, success_threshold: float = 1.0, ): self.logger = logger self.max_steps = max_steps self.success_threshold = success_threshold # Initialize environment state self.id = "webshop_env" self._step_count = 0 self.is_finished = False self.reward = 0.0 self.last_observation = "" self.last_raw_observation = "" # Store raw observation self.current_session = None self.trajectory = [] # Store complete trajectory like human agent # Use a shared SimServer to avoid reloading data per instance self._server = _get_shared_server( file_path=file_path, num_products=None, human_goals=True, quiet=True, ) self.webshop_env = WebAgentTextEnv( observation_mode="text", server=self._server, num_products=None, human_goals=True, quiet=True, ) if self.logger: self.logger.info(f"WebShop environment initialized") self.logger.info(f"Configuration: max_steps={self.max_steps}, " f"success_threshold={self.success_threshold}") def _ensure_session_asins(self): """Ensure user session has proper asins field (fix for product click bug)""" session_id = self.webshop_env.session if hasattr(self.webshop_env, 'server') and session_id in self.webshop_env.server.user_sessions: session = self.webshop_env.server.user_sessions[session_id] if 'asins' not in session: session['asins'] = set() elif not isinstance(session['asins'], set): # Convert list to set if needed session['asins'] = set(session['asins']) if hasattr(session['asins'], '__iter__') else set() def reset(self, running_config: dict, id: Optional[str] = None): """Reset the environment using official WebShop reset""" if self.logger: self.logger.info(f"Resetting WebShop environment (ID: {id})") self._step_count = 0 self.is_finished = False self.reward = 0.0 self.trajectory = [] # Reset trajectory self.split = running_config.get("split", "train") if self.split == "train": self.indices = read_json_file(f"envs/webshop/data/train_indices.json") elif self.split == "test": self.indices = read_json_file(f"envs/webshop/data/test_indices.json") else: raise ValueError(f"Invalid split: {self.split}. WebShop has only train and test splits.") # Use official WebShop reset self.id = id id_int: Optional[int] = None if id is not None: try: id_int = int(id) except ValueError: raise ValueError(f"Task ID '{id}' is not a valid integer.") self.session_id = self.indices[id_int] result = self.webshop_env.reset(session=self.session_id) # Handle both tuple (observation, info) and single observation return if isinstance(result, tuple): observation, info = result else: observation = result info = {} # Ensure user session has proper asins field self._ensure_session_asins() # Store raw observation first self.last_raw_observation = observation if observation else "No observation available" # Now format for agent formatted_observation = self._format_observation(observation) self.last_observation = formatted_observation self.current_session = self.webshop_env.session # Create trajectory entry in human agent style trajectory_entry = { "action": None, # No action for reset "observation": formatted_observation, "raw_observation": self.last_raw_observation, "url": "http://127.0.0.1:3000", # WebShop base URL "goal": self.webshop_env.get_instruction_text(), "step": self._step_count, "session": self.current_session, "reward": 0.0, "info": info } self.trajectory.append(trajectory_entry) if self.logger: self.logger.info(f"WebShop reset with session {self.current_session}") return {"observations": [formatted_observation], "env_name": self.env_name, "env": self} def _format_observation(self, observation: str) -> str: """Format observation with proper text conversion.""" if observation is None: return "No observation available" try: if hasattr(self.webshop_env, 'state') and self.webshop_env.state.get('html'): html_content = self.webshop_env.state['html'] formatted_obs, info = webshop_text(html_content) if info and hasattr(self.webshop_env.server, 'user_sessions') and self.current_session: current_session_info = self.webshop_env.server.user_sessions.get(self.current_session, {}) current_session_info.update(info) if formatted_obs and formatted_obs.strip(): return formatted_obs except Exception: pass return observation.replace(' [SEP] ', '\n') async def _run(self, action: str): """Execute an action using official WebShop step function""" if self.is_finished: if self.logger: self.logger.warning(f"Attempted action '{action}' on finished environment") return self.last_observation self._step_count += 1 if self.logger: self.logger.info(f"Step {self._step_count}: {action}") # Handle [FINISH] action - this terminates the episode if action == "[FINISH]": self.is_finished = True if self.logger: self.logger.info(f"[FINISH] action received - terminating episode") self.logger.info(f"Episode finished with final reward: {self.reward:.3f}, success: {self.is_success()}") # Add finish action to trajectory trajectory_entry = { "action": action, "observation": self.last_observation, "raw_observation": self.last_raw_observation, "url": "FINISH", "goal": self.webshop_env.get_instruction_text(), "step": self._step_count, "session": self.current_session, "reward": self.reward, "info": {"finish_action": True} } self.trajectory.append(trajectory_entry) return self.last_observation # Check for step limit before execution if self._step_count > self.max_steps: self.is_finished = True if self.logger: self.logger.info(f"Maximum steps ({self.max_steps}) reached. Episode terminated.") return self.last_observation # Use official WebShop step function try: # Ensure user session has proper asins field self._ensure_session_asins() result = self.webshop_env.step(action) # Handle different return formats if isinstance(result, tuple) and len(result) >= 4: observation, reward, done, info = result[:4] elif hasattr(result, '__iter__') and len(list(result)) >= 4: observation, reward, done, info = list(result)[:4] else: # Fallback for unexpected return format observation = str(result) if result is not None else "No observation available" reward, done, info = 0.0, False, {} # Ensure observation is not None if observation is None: observation = "No observation available" # Store raw observation first self.last_raw_observation = observation # Format observation for agent formatted_observation = self._format_observation(observation) self.last_observation = formatted_observation self.reward = reward if reward is not None else 0.0 # Add to trajectory in human agent style try: goal = self.webshop_env.get_instruction_text() except: goal = "Episode completed" trajectory_entry = { "action": action, "observation": formatted_observation, "raw_observation": self.last_raw_observation, "url": self._get_current_url(), "goal": goal, "step": self._step_count, "session": self.current_session, "reward": self.reward, "info": info if info is not None else {} } self.trajectory.append(trajectory_entry) # WebShop has its own done condition when user clicks "Buy Now" self.is_finished = done or self._step_count >= self.max_steps return formatted_observation except Exception as e: self.is_finished = True error_msg = f"WebShop step execution failed: {str(e)}" # Add error to trajectory try: goal = self.webshop_env.get_instruction_text() except: goal = "Error occurred" trajectory_entry = { "action": action, "observation": error_msg, "raw_observation": error_msg, "url": "ERROR", "goal": goal, "step": self._step_count, "session": self.current_session, "reward": 0.0, "info": {"error": str(e)} } self.trajectory.append(trajectory_entry) if self.logger: self.logger.error(f"Step {self._step_count} ERROR: {error_msg}") return error_msg def _get_current_url(self): """Get current URL based on WebShop state - simplified version""" # This is a simplified version since we don't have direct access to WebShop's internal URL state # In the real WebShop, this would be more detailed base_url = "http://127.0.0.1:3000" # Try to infer URL from observation content if "search" in self.last_observation.lower(): return f"{base_url}/search" elif "product" in self.last_observation.lower() or "Buy Now" in self.last_observation: return f"{base_url}/item" else: return base_url def is_done(self): """Check if the episode is done""" return self.is_finished or self._step_count >= self.max_steps def is_success(self): """Check if the task was completed successfully""" success = self.is_finished and self.reward >= self.success_threshold if self.logger and self.is_finished: self.logger.info(f"Task evaluation: reward={self.reward:.3f}, " f"threshold={self.success_threshold}, success={success}") return success def get_step_count(self): """Get the current step count""" return self._step_count def get_reward(self): """Get the current reward""" return self.reward def get_available_actions(self): """Get available actions from official WebShop environment""" return self.webshop_env.get_available_actions() def get_instruction_text(self): """Get current instruction text from official WebShop environment""" return self.webshop_env.get_instruction_text() def get_trajectory(self): """Get the complete trajectory in human agent format""" return self.trajectory async def close(self) -> None: """Close the official WebShop environment""" if self.logger: self.logger.info(f"Closing WebShop environment. " f"Final stats: {len(self.trajectory)} trajectory steps, " f"final reward: {self.reward:.3f}, success: {self.is_success()}") # Log trajectory summary for debugging if self.trajectory: self.logger.info(f"Trajectory summary:") for i, step in enumerate(self.trajectory): action = step.get('action', 'RESET') reward = step.get('reward', 0.0) url = step.get('url', 'unknown') self.logger.info(f" {i}: '{action}' -> {url} -> reward={reward:.3f}") try: # Close the WebShop environment if hasattr(self, 'webshop_env') and self.webshop_env: self.webshop_env.close() # Clean up shared server if it's the last instance global _SHARED_SERVER if _SHARED_SERVER is not None: try: _SHARED_SERVER = None except Exception as e: if self.logger: self.logger.warning(f"Error cleaning up shared server: {e}") # Reset state variables self._step_count = 0 self.is_finished = False self.reward = 0.0 self.trajectory = [] self.current_session = None self.last_observation = "" self.last_raw_observation = "" if self.logger: self.logger.info("WebShop environment closed successfully") except Exception as e: if self.logger: self.logger.warning(f"Error closing WebShop environment: {e}") raise def report(self): return { "success": self.is_success(), "reward": self.reward, "step": self._step_count, } ================================================ FILE: envs/webshop/setup.py ================================================ from setuptools import setup, find_packages setup( name='webshop', version='0.1', packages=find_packages('src'), package_dir={'': 'src'}, install_requires=[ "numpy==1.26.4", "beautifulsoup4==4.11.1", "cleantext==1.1.4", "env==0.1.0", "faiss-cpu==1.7.4", "Flask==2.1.2", "gym==0.24.0", "pyserini==0.17.0", "pytest", "rank_bm25==0.2.2", "requests_mock", "scikit_learn==1.1.1", "selenium==4.2.0", "spacy==3.6.1", "thinc==8.1.12", "thefuzz==0.20.0", "werkzeug==2.3.8", ], author='Your Name', author_email='youremail@example.com', description='webshop pip package version', license='MIT', keywords='sample setuptools development', url='https://github.com/yourusername/mypackage' ) ================================================ FILE: envs/webshop/setup.sh ================================================ pip install gdown gdown https://drive.google.com/uc?id=1G_0ccLWn5kZE5rpeyAdh_YuoNzvBUjT9 gdown https://drive.google.com/uc?id=11zOUDkJSgGhYin9NxQtG8PVpDsika86y unzip data.zip mkdir search_index unzip indexes.zip -d search_index/ ================================================ FILE: envs/webshop/src/webshop/__init__.py ================================================ ================================================ FILE: envs/webshop/src/webshop/run_envs/run_web_agent_site_env.py ================================================ """ Test the site gym environment. TODO: move to testing dir for more rigorous tests """ import gym from rich import print from rich.markup import escape from envs.webshop.src.webshop.web_agent_site.envs import WebAgentSiteEnv from envs.webshop.src.webshop.web_agent_site.models import ( HumanPolicy, RandomPolicy, ) from envs.webshop.src.webshop.web_agent_site.utils import DEBUG_PROD_SIZE if __name__ == '__main__': #env = gym.make('WebAgentSite-v0') #env = WebAgentSiteEnv(render=True, pause=2.0) #env = WebAgentSiteEnv(observation_mode='html', render=False) env = WebAgentSiteEnv(observation_mode='text', render=False, num_products=DEBUG_PROD_SIZE) global_step = 0 try: #policy = HumanPolicy() policy = RandomPolicy() observation = env.observation while True: print(observation) available_actions = env.get_available_actions() print('Available actions:', available_actions) action = policy.forward(observation, available_actions) observation, reward, done, info = env.step(action) print(f'Taking action "{escape(action)}" -> Reward = {reward}') if done: break global_step += 1 finally: env.close() ================================================ FILE: envs/webshop/src/webshop/run_envs/run_web_agent_text_env.py ================================================ """ Test the text gym environment. TODO: move to testing dir for more rigorous tests """ import gym from rich import print from rich.markup import escape from envs.webshop.src.webshop.web_agent_site.envs import WebAgentTextEnv from envs.webshop.src.webshop.web_agent_site.models import RandomPolicy from envs.webshop.src.webshop.web_agent_site.utils import DEBUG_PROD_SIZE if __name__ == '__main__': env = gym.make('WebAgentTextEnv-v0', observation_mode='text', num_products=DEBUG_PROD_SIZE) env.reset() try: policy = RandomPolicy() observation = env.observation while True: print(observation) available_actions = env.get_available_actions() print('Available actions:', available_actions) action = policy.forward(observation, available_actions) observation, reward, done, info = env.step(action) print(f'Taking action "{escape(action)}" -> Reward = {reward}') if done: break finally: env.close() ================================================ FILE: envs/webshop/src/webshop/search_engine/lucene_searcher.py ================================================ import json from pyserini.search.lucene import LuceneSearcher from rich import print searcher = LuceneSearcher('indexes') hits = searcher.search('rubber sole shoes', k=20) for hit in hits: doc = searcher.doc(hit.docid) print(doc) obj = json.loads(doc.raw())['product']['Title'] print(obj) print(len(hits)) ================================================ FILE: envs/webshop/src/webshop/transfer/README.md ================================================ # Sim-to-real Transfer This folder contains code for transferring agents trained on WebShop to perform on third party websites, specifically [Amazon](http://amazon.com) and [eBay](http://ebay.com). The imitation learning and reinforcement learning agents exercised by the transfer code can be found on WebShop's Hugging Face [page](https://huggingface.co/webshop). Interact with a demo of the transfer code, deployed as a 🤗 Hugging Face space [here](https://huggingface.co/spaces/webshop/amazon_shop)! ## 🛠️ Usage The Gradio app deployed as the aforementioned Hugging Face space can be started locally by running `python app.py` in this folder. The initial `setup.sh` script should have installed all the required dependencies. ## ➡️ Transfer Logic The Sim-to-real transfer code follows this general logical flow: The contents of this directory each serve the following purposes: * `app.py`: Run to launch interactive [Gradio](https://gradio.app/) demo of app * `predict_help.py`: Amazon, eBay web scraping code * `webshop_lite.py`: A condensed version of WebShop's templating engine If you are interested in *transferring an agent's functionality to an new website or platform*, you will need to... 1. implement two new functions: `parse_results_.py` and `parse_item_page_.py`. The corresponding interfaces and working examples for Amazon can be found [here](https://github.com/princeton-nlp/webshop/tree/master/transfer/predict_help.py#L262) and [here](https://github.com/princeton-nlp/webshop/tree/master/transfer/predict_help.py#L296). 2. Invoke these functions in the [`run_episode`](https://github.com/princeton-nlp/webshop/tree/master/transfer/app.py#L105) function in the `app.py` file. Specifically, you should add a single call to... * `parse_results...` in the [conditional]((https://github.com/princeton-nlp/webshop/tree/master/transfer/predict_help.py#L220)) handling `Page.RESULTS` page types * `parse_item_page...` in the [conditional]((https://github.com/princeton-nlp/webshop/tree/master/transfer/predict_help.py#L240)) handling `Page.ITEMS` page types ================================================ FILE: envs/webshop/src/webshop/transfer/__init__.py ================================================ ================================================ FILE: envs/webshop/src/webshop/transfer/app.py ================================================ import gradio as gr import json, time, torch from transformers import BartTokenizer, BartForConditionalGeneration, AutoModel, AutoTokenizer from .webshop_lite import dict_to_fake_html from .predict_help import ( Page, convert_dict_to_actions, convert_html_to_text, parse_results_amz, parse_item_page_amz, parse_results_ws, parse_item_page_ws, parse_results_ebay, parse_item_page_ebay, WEBSHOP_URL, WEBSHOP_SESSION ) ENVIRONMENTS = ['amazon', 'webshop', 'ebay'] # IL+RL: 'webshop/il-rl-choice-bert-image_1' # IL: 'webshop/il-choice-bert-image_0' BERT_MODEL_PATH = 'webshop/il-choice-bert-image_0' # load IL models bart_tokenizer = BartTokenizer.from_pretrained('facebook/bart-large') bart_model = BartForConditionalGeneration.from_pretrained('webshop/il_search_bart') bert_tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased', truncation_side='left') bert_tokenizer.add_tokens(['[button]', '[button_]', '[clicked button]', '[clicked button_]'], special_tokens=True) bert_model = AutoModel.from_pretrained(BERT_MODEL_PATH, trust_remote_code=True) def process_str(s): s = s.lower().replace('"', '').replace("'", "").strip() s = s.replace('[sep]', '[SEP]') return s def process_goal(state): state = state.lower().replace('"', '').replace("'", "") state = state.replace('amazon shopping game\ninstruction:', '').replace('webshop\ninstruction:', '') state = state.replace('\n[button] search [button_]', '').strip() if ', and price lower than' in state: state = state.split(', and price lower than')[0] return state def data_collator(batch): state_input_ids, state_attention_mask, action_input_ids, action_attention_mask, sizes, labels, images = [], [], [], [], [], [], [] for sample in batch: state_input_ids.append(sample['state_input_ids']) state_attention_mask.append(sample['state_attention_mask']) action_input_ids.extend(sample['action_input_ids']) action_attention_mask.extend(sample['action_attention_mask']) sizes.append(sample['sizes']) labels.append(sample['labels']) images.append(sample['images']) max_state_len = max(sum(x) for x in state_attention_mask) max_action_len = max(sum(x) for x in action_attention_mask) return { 'state_input_ids': torch.tensor(state_input_ids)[:, :max_state_len], 'state_attention_mask': torch.tensor(state_attention_mask)[:, :max_state_len], 'action_input_ids': torch.tensor(action_input_ids)[:, :max_action_len], 'action_attention_mask': torch.tensor(action_attention_mask)[:, :max_action_len], 'sizes': torch.tensor(sizes), 'images': torch.tensor(images), 'labels': torch.tensor(labels), } def bart_predict(input): input_ids = bart_tokenizer(input)['input_ids'] input_ids = torch.tensor(input_ids).unsqueeze(0) output = bart_model.generate(input_ids, max_length=512, num_return_sequences=5, num_beams=5) return bart_tokenizer.batch_decode(output.tolist(), skip_special_tokens=True)[0] def bert_predict(obs, info, softmax=True): valid_acts = info['valid'] assert valid_acts[0].startswith('click[') state_encodings = bert_tokenizer(process_str(obs), max_length=512, truncation=True, padding='max_length') action_encodings = bert_tokenizer(list(map(process_str, valid_acts)), max_length=512, truncation=True, padding='max_length') batch = { 'state_input_ids': state_encodings['input_ids'], 'state_attention_mask': state_encodings['attention_mask'], 'action_input_ids': action_encodings['input_ids'], 'action_attention_mask': action_encodings['attention_mask'], 'sizes': len(valid_acts), 'images': info['image_feat'].tolist(), 'labels': 0 } batch = data_collator([batch]) outputs = bert_model(**batch) if softmax: idx = torch.multinomial(torch.nn.functional.softmax(outputs.logits[0], dim=0), 1)[0].item() else: idx = outputs.logits[0].argmax(0).item() return valid_acts[idx] def get_return_value(env, asin, options, search_terms, page_num, product): asin_url = None # Determine product URL + options based on environment if env == 'webshop': query_str = "+".join(search_terms.split()) options_str = json.dumps(options) asin_url = ( f'{WEBSHOP_URL}/item_page/{WEBSHOP_SESSION}/' f'{asin}/{query_str}/{page_num}/{options_str}' ) else: asin_url = f"https://www.ebay.com/itm/{asin}" if env == 'ebay' else \ f"https://www.amazon.com/dp/{asin}" # Extract relevant fields for product product_reduced = {k: v for k, v in product.items() if k in ["asin", "Title", "Description", "BulletPoints"]} product_reduced["Description"] = product_reduced["Description"][:100] + "..." product_reduced["Features"] = product_reduced.pop("BulletPoints") product_reduced["Features"] = product_reduced["Features"][:100] + "..." # Create HTML to show link to product html = """Chosen Product""" html += f"""Product Image:
""" if len(product["MainImage"]) > 0 else "" html += f"""Link to Product: {asin_url} """ return product_reduced, options if len(options) > 0 else "None Selected", html def predict(obs, info): """ Given WebShop environment observation and info, predict an action. """ valid_acts = info['valid'] if valid_acts[0].startswith('click['): return bert_predict(obs, info) else: return "search[" + bart_predict(process_goal(obs)) + "]" def run_episode(goal, env, verbose=True): """ Interact with amazon to find a product given input goal. Input: text goal Output: a url of found item on amazon. """ env = env.lower() if env not in ENVIRONMENTS: print(f"[ERROR] Environment {env} not recognized") obs = "Amazon Shopping Game\nInstruction:" + goal + "\n[button] search [button]" info = {'valid': ['search[stuff]'], 'image_feat': torch.zeros(512)} product_map = {} title_to_asin_map = {} search_results_cache = {} visited_asins, clicked_options = set(), set() sub_page_type, page_type, page_num = None, None, None search_terms, prod_title, asin = None, None, None options = {} for i in range(100): # Run prediction action = predict(obs, info) if verbose: print("====") print(action) # Previous Page Type, Action -> Next Page Type action_content = action[action.find("[")+1:action.find("]")] prev_page_type = page_type if action.startswith('search['): page_type = Page.RESULTS search_terms = action_content page_num = 1 elif action.startswith('click['): if action.startswith('click[item -'): prod_title = action_content[len("item -"):].strip() found = False for key in title_to_asin_map: if prod_title == key: asin = title_to_asin_map[key] page_type = Page.ITEM_PAGE visited_asins.add(asin) found = True break if not found: raise Exception("Product to click not found") elif any(x.value in action for x in [Page.DESC, Page.FEATURES, Page.REVIEWS]): page_type = Page.SUB_PAGE sub_page_type = Page(action_content.lower()) elif action == 'click[< prev]': if sub_page_type is not None: page_type, sub_page_type = Page.ITEM_PAGE, None elif prev_page_type == Page.ITEM_PAGE: page_type = Page.RESULTS options, clicked_options = {}, set() elif prev_page_type == Page.RESULTS and page_num > 1: page_type = Page.RESULTS page_num -= 1 elif action == 'click[next >]': page_type = Page.RESULTS page_num += 1 elif action.lower() == 'click[back to search]': page_type = Page.SEARCH elif action == 'click[buy now]': return get_return_value(env, asin, options, search_terms, page_num, product_map[asin]) elif prev_page_type == Page.ITEM_PAGE: found = False for opt_name, opt_values in product_map[asin]["options"].items(): if action_content in opt_values: options[opt_name] = action_content page_type = Page.ITEM_PAGE clicked_options.add(action_content) found = True break if not found: raise Exception("Unrecognized action: " + action) else: raise Exception("Unrecognized action:" + action) if verbose: print(f"Parsing {page_type.value} page...") # URL -> Real HTML -> Dict of Info if page_type == Page.RESULTS: if search_terms in search_results_cache: data = search_results_cache[search_terms] if verbose: print(f"Loading cached results page for \"{search_terms}\"") else: begin = time.time() if env == 'amazon': data = parse_results_amz(search_terms, page_num, verbose) if env == 'webshop': data = parse_results_ws(search_terms, page_num, verbose) if env == 'ebay': data = parse_results_ebay(search_terms, page_num, verbose) end = time.time() if verbose: print(f"Parsing search results took {end-begin} seconds") search_results_cache[search_terms] = data for d in data: title_to_asin_map[d['Title']] = d['asin'] elif page_type == Page.ITEM_PAGE or page_type == Page.SUB_PAGE: if asin in product_map: if verbose: print("Loading cached item page for", asin) data = product_map[asin] else: begin = time.time() if env == 'amazon': data = parse_item_page_amz(asin, verbose) if env == 'webshop': data = parse_item_page_ws(asin, search_terms, page_num, options, verbose) if env == 'ebay': data = parse_item_page_ebay(asin, verbose) end = time.time() if verbose: print("Parsing item page took", end-begin, "seconds") product_map[asin] = data elif page_type == Page.SEARCH: if verbose: print("Executing search") obs = "Amazon Shopping Game\nInstruction:" + goal + "\n[button] search [button]" info = {'valid': ['search[stuff]'], 'image_feat': torch.zeros(512)} continue else: raise Exception("Page of type `", page_type, "` not found") # Dict of Info -> Fake HTML -> Text Observation begin = time.time() html_str = dict_to_fake_html(data, page_type, asin, sub_page_type, options, product_map, goal) obs = convert_html_to_text(html_str, simple=False, clicked_options=clicked_options, visited_asins=visited_asins) end = time.time() if verbose: print("[Page Info -> WebShop HTML -> Observation] took", end-begin, "seconds") # Dict of Info -> Valid Action State (Info) begin = time.time() prod_arg = product_map if page_type == Page.ITEM_PAGE else data info = convert_dict_to_actions(page_type, prod_arg, asin, page_num) end = time.time() if verbose: print("Extracting available actions took", end-begin, "seconds") if i == 50: return get_return_value(env, asin, options, search_terms, page_num, product_map[asin]) gr.Interface( fn=run_episode, inputs=[ gr.inputs.Textbox(lines=7, label="Input Text"), gr.inputs.Radio(['Amazon', 'eBay'], type="value", default="Amazon", label='Environment') ], outputs=[ gr.outputs.JSON(label="Selected Product"), gr.outputs.JSON(label="Selected Options"), gr.outputs.HTML() ], examples=[ ["I want to find a gold floor lamp with a glass shade and a nickel finish that i can use for my living room, and price lower than 270.00 dollars", "Amazon"], ["I need some cute heart-shaped glittery cupcake picks as a gift to bring to a baby shower", "Amazon"], ["I want to buy ballet shoes which have rubber sole in grey suede color and a size of 6", "Amazon"], ["I would like a 7 piece king comforter set decorated with flowers and is machine washable", "Amazon"], ["I'm trying to find white bluetooth speakers that are not only water resistant but also come with stereo sound", "eBay"], ["find me the soy free 3.5 ounce 4-pack of dang thai rice chips, and make sure they are the aged cheddar flavor. i also need the ones in the resealable bags", "eBay"], ["I am looking for a milk chocolate of 1 pound size in a single pack for valentine day", "eBay"], ["I'm looking for a mini pc intel core desktop computer which supports with windows 11", "eBay"] ], title="WebShop", article="

To learn more about this project, check out the project page!

", description="

Sim-to-real transfer of agent trained on WebShop to search a desired product on Amazon from any natural language query!

", ).launch(inline=False) ================================================ FILE: envs/webshop/src/webshop/transfer/predict_help.py ================================================ from bs4 import BeautifulSoup from bs4.element import Comment from enum import Enum import re, time from urllib.parse import urlencode import json, requests, torch class Page(Enum): DESC = "description" FEATURES = "features" ITEM_PAGE = "item_page" RESULTS = "results" REVIEWS = "reviews" SEARCH = "search" SUB_PAGE = "item_sub_page" HEADER_ = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.64 Safari/537.36' DEBUG_HTML = "temp.html" NUM_PROD_LIMIT = 10 WEBSHOP_URL = "http://3.83.245.205:3000" WEBSHOP_SESSION = "abc" def parse_results_ebay(query, page_num=None, verbose=True): query_string = '+'.join(query.split()) page_num = 1 if page_num is None else page_num url = f'https://www.ebay.com/sch/i.html?_nkw={query_string}&_pgn={page_num}' if verbose: print(f"Search Results URL: {url}") webpage = requests.get(url, headers={'User-Agent': HEADER_, 'Accept-Language': 'en-US, en;q=0.5'}) soup = BeautifulSoup(webpage.text, 'html.parser') products = soup.select('.s-item__wrapper.clearfix') results = [] for item in products[:NUM_PROD_LIMIT]: title = item.select_one('.s-item__title').text.strip() if "shop on ebay" in title.lower(): # Skip "Shop on ebay" product title continue link = item.select_one('.s-item__link')['href'] asin = link.split("?")[0][len("https://www.ebay.com/itm/"):] try: price = item.select_one('.s-item__price').text if "to" in price: prices = price.split(" to ") price = [p.strip("$") for p in prices] except: price = None results.append({ "asin": asin, "Title": title, "Price": price }) if verbose: print(f"Scraped {len(results)} products") return results def parse_item_page_ebay(asin, verbose=True): product_dict = {} product_dict["asin"] = asin url = f"https://www.ebay.com/itm/{asin}" if verbose: print(f"Item Page URL: {url}") begin = time.time() webpage = requests.get(url, headers={'User-Agent': HEADER_, 'Accept-Language': 'en-US, en;q=0.5'}) end = time.time() if verbose: print(f"Item page scraping took {end-begin} seconds") soup = BeautifulSoup(webpage.content, "html.parser") # Title try: product_dict["Title"] = soup.find('h1', {'class': 'x-item-title__mainTitle'}).text.strip() except: product_dict["Title"] = "N/A" # Price: Get price string, extract decimal numbers from string try: price_str = soup.find('div', {'class': 'mainPrice'}).text prices = re.findall('\d*\.?\d+', price_str) product_dict["Price"] = prices[0] except: product_dict["Price"] = "N/A" # Main Image try: img_div = soup.find('div', {'id': 'mainImgHldr'}) img_link = img_div.find('img', {'id': 'icImg'})["src"] product_dict["MainImage"] = img_link except: product_dict["MainImage"] = "" # Rating try: rating = soup.find('span', {'class': 'reviews-star-rating'})["title"].split()[0] except: rating = None product_dict["Rating"] = rating # Options options, options_to_images = {}, {} # TODO: options_to_images possible? try: option_blocks = soup.findAll('select', {'class': 'msku-sel'}) for block in option_blocks: name = block["name"].strip().strip(":") option_tags = block.findAll("option") opt_list = [] for option_tag in option_tags: if "select" not in option_tag.text.lower(): # Do not include "- select -" (aka `not selected`) choice opt_list.append(option_tag.text) options[name] = opt_list except: options = {} product_dict["options"], product_dict["option_to_image"] = options, options_to_images # Description desc = None try: # Ebay descriptions are shown in `iframe`s desc_link = soup.find('iframe', {'id': 'desc_ifr'})["src"] desc_webpage = requests.get(desc_link, headers={'User-Agent': HEADER_, 'Accept-Language': 'en-US, en;q=0.5'}) desc_soup = BeautifulSoup(desc_webpage.content, "html.parser") desc = ' '.join(desc_soup.text.split()) except: desc = "N/A" product_dict["Description"] = desc # Features features = None try: features = soup.find('div', {'class': 'x-about-this-item'}).text except: features = "N/A" product_dict["BulletPoints"] = features return product_dict def parse_results_ws(query, page_num=None, verbose=True): query_string = '+'.join(query.split()) page_num = 1 if page_num is None else page_num url = ( f'{WEBSHOP_URL}/search_results/{WEBSHOP_SESSION}/' f'{query_string}/{page_num}' ) if verbose: print(f"Search Results URL: {url}") webpage = requests.get(url, headers={'User-Agent': HEADER_, 'Accept-Language': 'en-US, en;q=0.5'}) soup = BeautifulSoup(webpage.content, 'html.parser') products = soup.findAll('div', {'class': 'list-group-item'}) results = [] for product in products: asin = product.find('a', {'class': 'product-link'}) title = product.find('h4', {'class': 'product-title'}) price = product.find('h5', {'class': 'product-price'}) if "\n" in title: title = title.text.split("\n")[0].strip() else: title = title.text.strip().strip("\n") if "to" in price.text: # Parse if price presented as range prices = price.text.split(" to ") price = [float(p.strip().strip("\n$")) for p in prices] else: price = float(price.text.strip().strip("\n$")) results.append({ "asin": asin.text, "Title": title, "Price": price }) if verbose: print(f"Scraped {len(results)} products") return results def parse_item_page_ws(asin, query, page_num, options, verbose=True): product_dict = {} product_dict["asin"] = asin query_string = '+'.join(query.split()) options_string = json.dumps(options) url = ( f'{WEBSHOP_URL}/item_page/{WEBSHOP_SESSION}/' f'{asin}/{query_string}/{page_num}/{options_string}' ) if verbose: print(f"Item Page URL: {url}") webpage = requests.get(url, headers={'User-Agent': HEADER_, 'Accept-Language': 'en-US, en;q=0.5'}) soup = BeautifulSoup(webpage.content, 'html.parser') # Title, Price, Rating, and MainImage product_dict["Title"] = soup.find('h2').text h4_headers = soup.findAll("h4") for header in h4_headers: text = header.text if "Price" in text: product_dict["Price"] = text.split(":")[1].strip().strip("$") elif "Rating" in text: product_dict["Rating"] = text.split(":")[1].strip() product_dict["MainImage"] = soup.find('img')['src'] # Options options, options_to_image = {}, {} option_blocks = soup.findAll("div", {'class': 'radio-toolbar'}) for block in option_blocks: name = block.find("input")["name"] labels = block.findAll("label") inputs = block.findAll("input") opt_list = [] for label, input in zip(labels, inputs): opt = label.text opt_img_path = input["onclick"].split("href=")[1].strip('\';') opt_img_url = f'{WEBSHOP_URL}{opt_img_path}' opt_list.append(opt) options_to_image[opt] = opt_img_url options[name] = opt_list product_dict["options"] = options product_dict["option_to_image"] = options_to_image # Description url = ( f'{WEBSHOP_URL}/item_sub_page/{WEBSHOP_SESSION}/' f'{asin}/{query_string}/{page_num}/Description/{options_string}' ) if verbose: print(f"Item Description URL: {url}") webpage = requests.get(url, headers={'User-Agent': HEADER_, 'Accept-Language': 'en-US, en;q=0.5'}) soup = BeautifulSoup(webpage.content, 'html.parser') product_dict["Description"] = soup.find(name="p", attrs={'class': 'product-info'}).text.strip() # Features url = ( f'{WEBSHOP_URL}/item_sub_page/{WEBSHOP_SESSION}/' f'{asin}/{query_string}/{page_num}/Features/{options_string}' ) if verbose: print(f"Item Features URL: {url}") webpage = requests.get(url, headers={'User-Agent': HEADER_, 'Accept-Language': 'en-US, en;q=0.5'}) soup = BeautifulSoup(webpage.content, 'html.parser') bullets = soup.find(name="ul").findAll(name="li") product_dict["BulletPoints"] = '\n'.join([b.text.strip() for b in bullets]) return product_dict # Query -> Search Result ASINs def parse_results_amz(query, page_num=None, verbose=True): url = 'https://www.amazon.com/s?k=' + query.replace(" ", "+") if page_num is not None: url += "&page=" + str(page_num) if verbose: print(f"Search Results URL: {url}") webpage = requests.get(url, headers={'User-Agent': HEADER_, 'Accept-Language': 'en-US, en;q=0.5'}) soup = BeautifulSoup(webpage.content, 'html.parser') products = soup.findAll('div', {'data-component-type': 's-search-result'}) if products is None: temp = open(DEBUG_HTML, "w") temp.write(str(soup)) temp.close() raise Exception("Couldn't find search results page, outputted html for inspection") results = [] for product in products[:NUM_PROD_LIMIT]: asin = product['data-asin'] title = product.find("h2", {'class': "a-size-mini"}) price_div = product.find("div", {'class': 's-price-instructions-style'}) price = price_div.find("span", {'class': 'a-offscreen'}) result = { 'asin': asin, 'Title': title.text.strip(), 'Price': price.text.strip().strip("$") } results.append(result) if verbose: print("Scraped", len(results), "products") return results # Scrape information of each product def parse_item_page_amz(asin, verbose=True): product_dict = {} product_dict["asin"] = asin url = f"https://www.amazon.com/dp/{asin}" if verbose: print("Item Page URL:", url) begin = time.time() webpage = requests.get(url, headers={'User-Agent': HEADER_, 'Accept-Language': 'en-US, en;q=0.5'}) end = time.time() if verbose: print(f"Item page scraping took {end-begin} seconds") soup = BeautifulSoup(webpage.content, "html.parser") # Title try: title = soup.find("span", attrs={"id": 'productTitle'}) title = title.string.strip().replace(',', '') except AttributeError: title = "N/A" product_dict["Title"] = title # Price try: parent_price_span = soup.find(name="span", class_="apexPriceToPay") price_span = parent_price_span.find(name="span", class_="a-offscreen") price = float(price_span.getText().replace("$", "")) except AttributeError: price = "N/A" product_dict["Price"] = price # Rating try: rating = soup.find(name="span", attrs={"id": "acrPopover"}) if rating is None: rating = "N/A" else: rating = rating.text except AttributeError: rating = "N/A" product_dict["Rating"] = rating.strip("\n").strip() # Features try: features = soup.find(name="div", attrs={"id": "feature-bullets"}).text except AttributeError: features = "N/A" product_dict["BulletPoints"] = features # Description try: desc_body = soup.find(name="div", attrs={"id": "productDescription_feature_div"}) desc_div = desc_body.find(name="div", attrs={"id": "productDescription"}) desc_ps = desc_div.findAll(name="p") desc = " ".join([p.text for p in desc_ps]) except AttributeError: desc = "N/A" product_dict["Description"] = desc.strip() # Main Image try: imgtag = soup.find("img", {"id":"landingImage"}) imageurl = dict(imgtag.attrs)["src"] except AttributeError: imageurl = "" product_dict["MainImage"] = imageurl # Options options, options_to_image = {}, {} try: option_body = soup.find(name='div', attrs={"id": "softlinesTwister_feature_div"}) if option_body is None: option_body = soup.find(name='div', attrs={"id": "twister_feature_div"}) option_blocks = option_body.findAll(name='ul') for block in option_blocks: name = json.loads(block["data-a-button-group"])["name"] # Options opt_list = [] for li in block.findAll("li"): img = li.find(name="img") if img is not None: opt = img["alt"].strip() opt_img = img["src"] if len(opt) > 0: options_to_image[opt] = opt_img else: opt = li.text.strip() if len(opt) > 0: opt_list.append(opt) options[name.replace("_name", "").replace("twister_", "")] = opt_list except AttributeError: options = {} product_dict["options"], product_dict["option_to_image"] = options, options_to_image return product_dict # Get text observation from html # TODO[john-b-yang]: Similar to web_agent_site/envs/...text_env.py func def, merge? def convert_html_to_text(html, simple=False, clicked_options=None, visited_asins=None): def tag_visible(element): ignore = {'style', 'script', 'head', 'title', 'meta', '[document]'} return ( element.parent.name not in ignore and not isinstance(element, Comment) ) html_obj = BeautifulSoup(html, 'html.parser') texts = html_obj.find_all(string=True) visible_texts = filter(tag_visible, texts) if simple: return ' [SEP] '.join(t.strip() for t in visible_texts if t != '\n') else: observation = '' for t in visible_texts: if t == '\n': continue if t.parent.name == 'button': # button processed_t = f'[button] {t} [button]' elif t.parent.name == 'label': # options if f'{t}' in clicked_options: processed_t = f' [clicked button] {t} [clicked button]' observation = f'You have clicked {t}.\n' + observation else: processed_t = f' [button] {t} [button]' elif t.parent.get('class') == ["product-link"]: # asins if f'{t}' in visited_asins: processed_t = f'\n[clicked button] {t} [clicked button]' else: processed_t = f'\n[button] {t} [button]' else: # regular, unclickable text processed_t = str(t) observation += processed_t + '\n' return observation # Get action from dict of values retrieved from html def convert_dict_to_actions(page_type, products=None, asin=None, page_num=None) -> dict: info = {"valid": []} if page_type == Page.RESULTS: info["valid"] = ['click[back to search]'] if products is None or page_num is None: print(page_num) print(products) raise Exception('Provide `products`, `page_num` to get `results` valid actions') # Decide whether to add `next >` as clickable based on # of search results if len(products) > 10: info["valid"].append('click[next >]') # Add `< prev` as clickable if not first page of search results if page_num > 1: info["valid"].append('click[< prev]') for product in products: info["valid"].append("click[item - " + product["Title"] + "]") if page_type == Page.ITEM_PAGE: if products is None or asin is None: raise Exception('Provide `products` and `asin` to get `item_page` valid actions') info["valid"] = ['click[back to search]', 'click[< prev]', 'click[description]',\ 'click[features]', 'click[buy now]'] # To do: reviews if "options" in products[asin]: for key, values in products[asin]["options"].items(): for value in values: info["valid"].append("click[" + value + "]") if page_type == Page.SUB_PAGE: info["valid"] = ['click[back to search]', 'click[< prev]'] info['image_feat'] = torch.zeros(512) return info ================================================ FILE: envs/webshop/src/webshop/transfer/webshop_lite.py ================================================ import os from flask import render_template_string, Flask from .predict_help import Page app=Flask(__name__) app.debug=True SESSION_ID = "ABC" TEMPLATE_DIR = "envs/webshop/src/webshop/web_agent_site/templates/" KEYWORDS = ["placeholder (not needed)"] # To Do: Does this matter? QUERY = "" product_map = {} def read_html_template(path): with open(path) as f: template = f.read() return template @app.route('/', methods=['GET', 'POST']) def index(session_id, **kwargs): print("Hello world") @app.route('/', methods=['GET', 'POST']) def search_results(data): path = os.path.join(TEMPLATE_DIR, 'results_page.html') html = render_template_string( read_html_template(path=path), session_id=SESSION_ID, products=data, keywords=KEYWORDS, page=1, total=len(data), instruction_text=QUERY, ) return html @app.route('/', methods=['GET', 'POST']) def item_page(session_id, asin, keywords, page, options): path = os.path.join(TEMPLATE_DIR, 'item_page.html') html = render_template_string( read_html_template(path=path), session_id=session_id, product_info=product_map[asin], keywords=keywords, page=page, asin=asin, options=options, instruction_text=QUERY ) return html @app.route('/', methods=['GET', 'POST']) def item_sub_page(session_id, asin, keywords, page, sub_page, options): path = os.path.join(TEMPLATE_DIR, sub_page.value.lower() + "_page.html") html = render_template_string( read_html_template(path), session_id=session_id, product_info=product_map[asin], keywords=keywords, page=page, asin=asin, options=options, instruction_text=QUERY ) return html @app.route('/', methods=['GET', 'POST']) def done(asin, options, session_id, **kwargs): path = os.path.join(TEMPLATE_DIR, 'done_page.html') html = render_template_string( read_html_template(path), session_id=session_id, reward=1, asin=asin, options=product_map[asin]["options"], reward_info=kwargs.get('reward_info'), goal_attrs=kwargs.get('goal_attrs'), purchased_attrs=kwargs.get('purchased_attrs'), goal=kwargs.get('goal'), mturk_code=kwargs.get('mturk_code'), query=kwargs.get('query'), category=kwargs.get('category'), product_category=kwargs.get('product_category'), ) return html # Project Dictionary Information onto Fake Amazon def dict_to_fake_html(data, page_type, asin=None, sub_page_type=None, options=None, prod_map={}, query=""): global QUERY, product_map QUERY = query product_map = prod_map with app.app_context(), app.test_request_context(): if page_type == Page.RESULTS: return search_results(data) if page_type == Page.ITEM_PAGE: return item_page(SESSION_ID, asin, KEYWORDS, 1, options) if page_type == Page.SUB_PAGE: if sub_page_type is not None: return item_sub_page(SESSION_ID, asin, KEYWORDS, 1, sub_page_type, options) else: raise Exception("Sub page of type", sub_page_type, "unrecognized") ================================================ FILE: envs/webshop/src/webshop/web_agent_site/__init__.py ================================================ ================================================ FILE: envs/webshop/src/webshop/web_agent_site/app.py ================================================ import argparse, json, logging, random from pathlib import Path from ast import literal_eval from flask import ( Flask, request, redirect, url_for ) from rich import print from .engine.engine import ( load_products, init_search_engine, convert_web_app_string_to_var, get_top_n_product_from_keywords, get_product_per_page, map_action_to_html, END_BUTTON ) from .engine.goal import get_reward, get_goals from .utils import ( generate_mturk_code, setup_logger, DEFAULT_FILE_PATH, DEBUG_PROD_SIZE, ) app = Flask(__name__) search_engine = None all_products = None product_item_dict = None product_prices = None attribute_to_asins = None goals = None weights = None user_sessions = dict() user_log_dir = None SHOW_ATTRS_TAB = False @app.route('/') def home(): return redirect(url_for('index', session_id="abc")) @app.route('/', methods=['GET', 'POST']) def index(session_id): global user_log_dir global all_products, product_item_dict, \ product_prices, attribute_to_asins, \ search_engine, \ goals, weights, user_sessions if search_engine is None: all_products, product_item_dict, product_prices, attribute_to_asins = \ load_products( filepath=DEFAULT_FILE_PATH, num_products=DEBUG_PROD_SIZE ) search_engine = init_search_engine(num_products=DEBUG_PROD_SIZE) goals = get_goals(all_products, product_prices) random.seed(233) random.shuffle(goals) weights = [goal['weight'] for goal in goals] if session_id not in user_sessions and 'fixed' in session_id: goal_dix = int(session_id.split('_')[-1]) goal = goals[goal_dix] instruction_text = goal['instruction_text'] user_sessions[session_id] = {'goal': goal, 'done': False} if user_log_dir is not None: setup_logger(session_id, user_log_dir) elif session_id not in user_sessions: goal = random.choices(goals, weights)[0] instruction_text = goal['instruction_text'] user_sessions[session_id] = {'goal': goal, 'done': False} if user_log_dir is not None: setup_logger(session_id, user_log_dir) else: instruction_text = user_sessions[session_id]['goal']['instruction_text'] if request.method == 'POST' and 'search_query' in request.form: keywords = request.form['search_query'].lower().split(' ') return redirect(url_for( 'search_results', session_id=session_id, keywords=keywords, page=1, )) if user_log_dir is not None: logger = logging.getLogger(session_id) logger.info(json.dumps(dict( page='index', url=request.url, goal=user_sessions[session_id]['goal'], ))) return map_action_to_html( 'start', session_id=session_id, instruction_text=instruction_text, ) @app.route( '/search_results///', methods=['GET', 'POST'] ) def search_results(session_id, keywords, page): instruction_text = user_sessions[session_id]['goal']['instruction_text'] page = convert_web_app_string_to_var('page', page) keywords = convert_web_app_string_to_var('keywords', keywords) top_n_products = get_top_n_product_from_keywords( keywords, search_engine, all_products, product_item_dict, attribute_to_asins, ) products = get_product_per_page(top_n_products, page) html = map_action_to_html( 'search', session_id=session_id, products=products, keywords=keywords, page=page, total=len(top_n_products), instruction_text=instruction_text, ) logger = logging.getLogger(session_id) logger.info(json.dumps(dict( page='search_results', url=request.url, goal=user_sessions[session_id]['goal'], content=dict( keywords=keywords, search_result_asins=[p['asin'] for p in products], page=page, ) ))) return html @app.route( '/item_page/////', methods=['GET', 'POST'] ) def item_page(session_id, asin, keywords, page, options): options = literal_eval(options) product_info = product_item_dict[asin] goal_instruction = user_sessions[session_id]['goal']['instruction_text'] product_info['goal_instruction'] = goal_instruction html = map_action_to_html( 'click', session_id=session_id, product_info=product_info, keywords=keywords, page=page, asin=asin, options=options, instruction_text=goal_instruction, show_attrs=SHOW_ATTRS_TAB, ) logger = logging.getLogger(session_id) logger.info(json.dumps(dict( page='item_page', url=request.url, goal=user_sessions[session_id]['goal'], content=dict( keywords=keywords, page=page, asin=asin, options=options, ) ))) return html @app.route( '/item_sub_page//////', methods=['GET', 'POST'] ) def item_sub_page(session_id, asin, keywords, page, sub_page, options): options = literal_eval(options) product_info = product_item_dict[asin] goal_instruction = user_sessions[session_id]['goal']['instruction_text'] product_info['goal_instruction'] = goal_instruction html = map_action_to_html( f'click[{sub_page}]', session_id=session_id, product_info=product_info, keywords=keywords, page=page, asin=asin, options=options, instruction_text=goal_instruction ) logger = logging.getLogger(session_id) logger.info(json.dumps(dict( page='item_sub_page', url=request.url, goal=user_sessions[session_id]['goal'], content=dict( keywords=keywords, page=page, asin=asin, options=options, ) ))) return html @app.route('/done///', methods=['GET', 'POST']) def done(session_id, asin, options): options = literal_eval(options) goal = user_sessions[session_id]['goal'] purchased_product = product_item_dict[asin] price = product_prices[asin] reward, reward_info = get_reward( purchased_product, goal, price=price, options=options, verbose=True ) user_sessions[session_id]['done'] = True user_sessions[session_id]['reward'] = reward print(user_sessions) logger = logging.getLogger(session_id) logger.info(json.dumps(dict( page='done', url=request.url, goal=goal, content=dict( asin=asin, options=options, price=price, ), reward=reward, reward_info=reward_info, ))) del logging.root.manager.loggerDict[session_id] return map_action_to_html( f'click[{END_BUTTON}]', session_id=session_id, reward=reward, asin=asin, options=options, reward_info=reward_info, query=purchased_product['query'], category=purchased_product['category'], product_category=purchased_product['product_category'], goal_attrs=user_sessions[session_id]['goal']['attributes'], purchased_attrs=purchased_product['Attributes'], goal=goal, mturk_code=generate_mturk_code(session_id), ) if __name__ == "__main__": parser = argparse.ArgumentParser(description="WebShop flask app backend configuration") parser.add_argument("--log", action='store_true', help="Log actions on WebShop in trajectory file") parser.add_argument("--attrs", action='store_true', help="Show attributes tab in item page") parser.add_argument("--port", type=int, default=3000, help="Port to run the app on") args = parser.parse_args() if args.log: user_log_dir = Path('user_session_logs/mturk') user_log_dir.mkdir(parents=True, exist_ok=True) SHOW_ATTRS_TAB = args.attrs app.run(host='0.0.0.0', port=args.port) ================================================ FILE: envs/webshop/src/webshop/web_agent_site/attributes/annotate.py ================================================ import yaml from pathlib import Path from rich import print ATTR_DIR = './data/attributes' ATTR_PATHS = [ 'narrow_2-gram.yaml', 'narrow_1-gram.yaml', 'broad_2-gram.yaml', 'broad_1-gram.yaml', ] ATTR_PATHS = [Path(ATTR_DIR) / af for af in ATTR_PATHS] def annotate(attr_path): with open(attr_path) as f: attrs_by_cat = yaml.safe_load(f) unique_attrs = set() all_attrs = [] for _, attrs in attrs_by_cat.items(): attrs = [a.split('|')[0].strip() for a in attrs] unique_attrs.update(attrs) all_attrs += attrs print(f'Total unique attributes: {len(unique_attrs)}') total = len(all_attrs) num_left = len(all_attrs) annotated_attrs_by_cat = dict() for category, attrs in attrs_by_cat.items(): print( f'Category: [ {category} ] | ' f'Number of attributes: {len(attrs)}\n' ) annotated_attrs = [] for i, attr in enumerate(attrs): attr, score = attr.split(' | ') print( f'{"[" + str(i) + "]":<5} ' f'[bold green]{attr:<30}[/bold green] | ' f'[red]{category}[/red] | ' f'{score}' ) tags = input( 'Annotate [1: ITEM, 2: PROP, 3: USE, ' '⎵: next example, q: next category] > ' ) print('\n') tags = tags.strip() annotated_attrs.append(f'{attr} | {score} | {tags}') if 'q' in tags: break num_left -= len(attrs) print(f'{num_left} / {total} total attributes left.') ans = input('Starting the next category... [y/n] > ') if ans == 'n': break def main(): for attr_path in ATTR_PATHS: annotate(attr_path) if __name__ == '__main__': """ python -m web_agent_site.attributes.annotate """ main() ================================================ FILE: envs/webshop/src/webshop/web_agent_site/attributes/generate_attrs.py ================================================ import json import yaml import random from pathlib import Path from collections import defaultdict from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.feature_extraction import text as sk_text import pandas as pd from tqdm import tqdm from rich import print ITEMS_PATH = './data/ITEMS_mar1.json' REVIEWS_PATH = './data/reviews.json' ATTR_DIR = './data/attributes' random.seed(0) def get_stop_words(): extra_stop_words = set([str(i) for i in range(1000)]) stop_words = sk_text.ENGLISH_STOP_WORDS.union(extra_stop_words) return stop_words def load_products(num=None): """ Loads products from the `items.json` file and combine them with reviews through `asin`. Return: dict[asin, product] """ with open(ITEMS_PATH) as f: all_products = json.load(f) if num is not None: random.shuffle(all_products) all_products = all_products[:num] products = dict() asins = set() for p in all_products: asin = p['asin'] if asin in asins: continue asins.add(asin) products[asin] = p with open(REVIEWS_PATH) as f: reviews = json.load(f) reviews = {r['asin']: r for r in reviews} for asin, p in products.items(): if asin in reviews: p['review'] = reviews[asin] else: p['review'] = None return products def get_top_attrs(attributes, k): attr_to_asins = defaultdict(list) for asin, attr_scores in attributes.items(): top_attr_scoress = attr_scores[:k] for attr, score in top_attr_scoress: attr_to_asins[attr].append(asin) total = len([asin for asin, _ in attributes.items()]) top_attrs = [ (attr, len(asins) / total) for attr, asins in attr_to_asins.items() ] top_attrs = sorted(top_attrs, key=lambda x: -x[1]) top_attrs = [f'{attr} | {score:.4f}' for attr, score in top_attrs] return top_attrs def get_corpus( products, keys=('name', 'small_description'), category_type='category' ): """ keys: `name`, `small_description`, `review` category_type: `category`, `query` """ all_products = list(products.values()) asins_by_cat = defaultdict(set) corpus_by_cat = defaultdict(list) for p in all_products: category = p[category_type] asin = p['asin'] if asin in asins_by_cat[category]: continue asins_by_cat[category].add(asin) text = [] for key in keys: if key == 'review': rs = p['review']['reviews'] if r is not None: text_ = ' '.join([r['review'].lower() for r in rs]) else: text_ = '' else: text_ = p[key].lower() text.append(text_) text = ' '.join(text) corpus_by_cat[category].append((asin, text)) return corpus_by_cat def generate_ngram_attrs(corpus_by_cat, ngram_range, k, attrs): vectorizer = TfidfVectorizer( stop_words=get_stop_words(), ngram_range=ngram_range, max_features=1000, ) top_attrs_by_cat = dict() for category, corpus in tqdm(corpus_by_cat.items(), total=len(corpus_by_cat)): asins = [_[0] for _ in corpus] texts = [_[1] for _ in corpus] vec = vectorizer.fit_transform(texts).todense() df = pd.DataFrame(vec, columns=vectorizer.get_feature_names_out()) attrs_by_cat = dict() for asin, (row_name, row) in zip(asins, df.iterrows()): attr_scores = sorted( list(zip(row.index, row)), key=lambda x: -x[1] ) attrs_by_cat[asin] = attr_scores attrs[asin] = attr_scores top_attrs_by_cat[category.lower()] = get_top_attrs(attrs_by_cat, k=k) print(top_attrs_by_cat.keys()) return top_attrs_by_cat def generate_attrs(corpus_by_cat, k, save_name): attrs = dict() for n in range(1, 3): ngram_range = (n, n) top_attrs_by_cat = \ generate_ngram_attrs(corpus_by_cat, ngram_range, k, attrs) if save_name is not None: save_path = Path(ATTR_DIR) / f'{save_name}_{n}-gram.yaml' with open(save_path, 'w') as f: yaml.dump(top_attrs_by_cat, f, default_flow_style=False) print(f'Saved: {save_path}') save_path = Path(ATTR_DIR) / f'{save_name}_attrs_unfiltered.json' with open(save_path, 'w') as f: json.dump(attrs, f) print(f'Saved: {save_path}') if __name__ == '__main__': """ python -m web_agent_site.attributes.generate_attrs Inspect in notebooks/attributes.ipynb. """ products = load_products(num=40000) corpus_by_cat_broad = get_corpus(products, category_type='category') generate_attrs(corpus_by_cat_broad, k=5, save_name='broad') corpus_by_cat_narrow = get_corpus(products, category_type='query') generate_attrs(corpus_by_cat_narrow, k=5, save_name='narrow') ================================================ FILE: envs/webshop/src/webshop/web_agent_site/engine/__init__.py ================================================ ================================================ FILE: envs/webshop/src/webshop/web_agent_site/engine/engine.py ================================================ """ """ import os import re import json import random from collections import defaultdict from ast import literal_eval from decimal import Decimal import cleantext from tqdm import tqdm from rank_bm25 import BM25Okapi from flask import render_template_string from rich import print from pyserini.search.lucene import LuceneSearcher from ..utils import ( BASE_DIR, DEFAULT_FILE_PATH, DEFAULT_REVIEW_PATH, DEFAULT_ATTR_PATH, HUMAN_ATTR_PATH ) TEMPLATE_DIR = os.path.join(BASE_DIR, 'webshop/web_agent_site/templates') SEARCH_RETURN_N = 50 PRODUCT_WINDOW = 10 TOP_K_ATTR = 10 END_BUTTON = 'Buy Now' NEXT_PAGE = 'Next >' PREV_PAGE = '< Prev' BACK_TO_SEARCH = 'Back to Search' ACTION_TO_TEMPLATE = { 'Description': 'description_page.html', 'Features': 'features_page.html', 'Reviews': 'review_page.html', 'Attributes': 'attributes_page.html', } def map_action_to_html(action, **kwargs): action_name, action_arg = parse_action(action) if action_name == 'start': path = os.path.join(TEMPLATE_DIR, 'search_page.html') html = render_template_string( read_html_template(path=path), session_id=kwargs['session_id'], instruction_text=kwargs['instruction_text'], ) elif action_name == 'search': path = os.path.join(TEMPLATE_DIR, 'results_page.html') html = render_template_string( read_html_template(path=path), session_id=kwargs['session_id'], products=kwargs['products'], keywords=kwargs['keywords'], page=kwargs['page'], total=kwargs['total'], instruction_text=kwargs['instruction_text'], ) elif action_name == 'click' and action_arg == END_BUTTON: path = os.path.join(TEMPLATE_DIR, 'done_page.html') html = render_template_string( read_html_template(path), session_id=kwargs['session_id'], reward=kwargs['reward'], asin=kwargs['asin'], options=kwargs['options'], reward_info=kwargs.get('reward_info'), goal_attrs=kwargs.get('goal_attrs'), purchased_attrs=kwargs.get('purchased_attrs'), goal=kwargs.get('goal'), mturk_code=kwargs.get('mturk_code'), query=kwargs.get('query'), category=kwargs.get('category'), product_category=kwargs.get('product_category'), ) elif action_name == 'click' and action_arg in ACTION_TO_TEMPLATE: path = os.path.join(TEMPLATE_DIR, ACTION_TO_TEMPLATE[action_arg]) html = render_template_string( read_html_template(path), session_id=kwargs['session_id'], product_info=kwargs['product_info'], keywords=kwargs['keywords'], page=kwargs['page'], asin=kwargs['asin'], options=kwargs['options'], instruction_text=kwargs.get('instruction_text') ) elif action_name == 'click': path = os.path.join(TEMPLATE_DIR, 'item_page.html') html = render_template_string( read_html_template(path), session_id=kwargs['session_id'], product_info=kwargs['product_info'], keywords=kwargs['keywords'], page=kwargs['page'], asin=kwargs['asin'], options=kwargs['options'], instruction_text=kwargs.get('instruction_text'), show_attrs=kwargs['show_attrs'] ) else: raise ValueError('Action name not recognized.') return html def read_html_template(path): with open(path) as f: template = f.read() return template def parse_action(action): """ Parse action string to action name and its arguments. """ pattern = re.compile(r'(.+)\[(.+)\]') m = re.match(pattern, action) if m is None: action_name = action action_arg = None else: action_name, action_arg = m.groups() return action_name, action_arg def convert_web_app_string_to_var(name, string): if name == 'keywords': keywords = string if keywords.startswith('['): keywords = literal_eval(keywords) else: keywords = [keywords] var = keywords elif name == 'page': page = string page = int(page) var = page else: raise ValueError('Name of variable not recognized.') return var def get_top_n_product_from_keywords( keywords, search_engine, all_products, product_item_dict, attribute_to_asins=None, ): if keywords[0] == '': top_n_products = random.sample(all_products, k=SEARCH_RETURN_N) elif keywords[0] == '': attribute = ' '.join(keywords[1:]).strip() asins = attribute_to_asins[attribute] top_n_products = [p for p in all_products if p['asin'] in asins] elif keywords[0] == '': category = keywords[1].strip() top_n_products = [p for p in all_products if p['category'] == category] elif keywords[0] == '': query = ' '.join(keywords[1:]).strip() top_n_products = [p for p in all_products if p['query'] == query] else: keywords = ' '.join(keywords) hits = search_engine.search(keywords, k=SEARCH_RETURN_N) docs = [search_engine.doc(hit.docid) for hit in hits] top_n_asins = [json.loads(doc.raw())['id'] for doc in docs] top_n_products = [product_item_dict[asin] for asin in top_n_asins if asin in product_item_dict] return top_n_products def get_product_per_page(top_n_products, page): return top_n_products[(page - 1) * PRODUCT_WINDOW:page * PRODUCT_WINDOW] def generate_product_prices(all_products): product_prices = dict() for product in all_products: asin = product['asin'] pricing = product['pricing'] if not pricing: price = 100.0 elif len(pricing) == 1: price = pricing[0] else: price = random.uniform(*pricing[:2]) product_prices[asin] = price return product_prices def init_search_engine(num_products=None): if num_products == 100: indexes = 'indexes_100' elif num_products == 1000: indexes = 'indexes_1k' elif num_products == 100000: indexes = 'indexes_100k' elif num_products is None: indexes = 'indexes' else: raise NotImplementedError(f'num_products being {num_products} is not supported yet.') index_dir = os.path.abspath(os.path.join(BASE_DIR, f'../search_index/{indexes}')) assert os.path.isdir(index_dir), f'Index dir missing: {index_dir}' search_engine = LuceneSearcher(index_dir) # search_engine = LuceneSearcher(os.path.join(BASE_DIR, f'../search_index/indexes')) return search_engine def clean_product_keys(products, quiet: bool = False): for product in products: product.pop('product_information', None) product.pop('brand', None) product.pop('brand_url', None) product.pop('list_price', None) product.pop('availability_quantity', None) product.pop('availability_status', None) product.pop('total_reviews', None) product.pop('total_answered_questions', None) product.pop('seller_id', None) product.pop('seller_name', None) product.pop('fulfilled_by_amazon', None) product.pop('fast_track_message', None) product.pop('aplus_present', None) product.pop('small_description_old', None) if not quiet: print('Keys cleaned.') return products def load_products(filepath, num_products=None, human_goals=True, quiet: bool = False): # TODO: move to preprocessing step -> enforce single source of truth with open(filepath) as f: products = json.load(f) if not quiet: print('Products loaded.') products = clean_product_keys(products, quiet=quiet) # with open(DEFAULT_REVIEW_PATH) as f: # reviews = json.load(f) all_reviews = dict() all_ratings = dict() # for r in reviews: # all_reviews[r['asin']] = r['reviews'] # all_ratings[r['asin']] = r['average_rating'] if human_goals: with open(HUMAN_ATTR_PATH) as f: human_attributes = json.load(f) with open(DEFAULT_ATTR_PATH) as f: attributes = json.load(f) with open(HUMAN_ATTR_PATH) as f: human_attributes = json.load(f) if not quiet: print('Attributes loaded.') asins = set() all_products = [] attribute_to_asins = defaultdict(set) if num_products is not None: # using item_shuffle.json, we assume products already shuffled products = products[:num_products] for i, p in tqdm(enumerate(products), total=len(products), disable=quiet): asin = p['asin'] if asin == 'nan' or len(asin) > 10: continue if asin in asins: continue else: asins.add(asin) products[i]['category'] = p['category'] products[i]['query'] = p['query'] products[i]['product_category'] = p['product_category'] products[i]['Title'] = p['name'] products[i]['Description'] = p['full_description'] products[i]['Reviews'] = all_reviews.get(asin, []) products[i]['Rating'] = all_ratings.get(asin, 'N.A.') for r in products[i]['Reviews']: if 'score' not in r: r['score'] = r.pop('stars') if 'review' not in r: r['body'] = '' else: r['body'] = r.pop('review') products[i]['BulletPoints'] = p['small_description'] \ if isinstance(p['small_description'], list) else [p['small_description']] pricing = p.get('pricing') if pricing is None or not pricing: pricing = [100.0] price_tag = '$100.0' else: pricing = [ float(Decimal(re.sub(r'[^\d.]', '', price))) for price in pricing.split('$')[1:] ] if len(pricing) == 1: price_tag = f"${pricing[0]}" else: price_tag = f"${pricing[0]} to ${pricing[1]}" pricing = pricing[:2] products[i]['pricing'] = pricing products[i]['Price'] = price_tag options = dict() customization_options = p['customization_options'] option_to_image = dict() if customization_options: for option_name, option_contents in customization_options.items(): if option_contents is None: continue option_name = option_name.lower() option_values = [] for option_content in option_contents: option_value = option_content['value'].strip().replace('/', ' | ').lower() option_image = option_content.get('image', None) option_values.append(option_value) option_to_image[option_value] = option_image options[option_name] = option_values products[i]['options'] = options products[i]['option_to_image'] = option_to_image # without color, size, price, availability # if asin in attributes and 'attributes' in attributes[asin]: # products[i]['Attributes'] = attributes[asin]['attributes'] # else: # products[i]['Attributes'] = ['DUMMY_ATTR'] # products[i]['instruction_text'] = \ # attributes[asin].get('instruction', None) # products[i]['instruction_attributes'] = \ # attributes[asin].get('instruction_attributes', None) # without color, size, price, availability if asin in attributes and 'attributes' in attributes[asin]: products[i]['Attributes'] = attributes[asin]['attributes'] else: products[i]['Attributes'] = ['DUMMY_ATTR'] if human_goals: if asin in human_attributes: products[i]['instructions'] = human_attributes[asin] else: products[i]['instruction_text'] = \ attributes[asin].get('instruction', None) products[i]['instruction_attributes'] = \ attributes[asin].get('instruction_attributes', None) products[i]['MainImage'] = p['images'][0] products[i]['query'] = p['query'].lower().strip() all_products.append(products[i]) for p in all_products: for a in p['Attributes']: attribute_to_asins[a].add(p['asin']) product_item_dict = {p['asin']: p for p in all_products} product_prices = generate_product_prices(all_products) return all_products, product_item_dict, product_prices, attribute_to_asins ================================================ FILE: envs/webshop/src/webshop/web_agent_site/engine/goal.py ================================================ """ Functions for specifying goals and reward calculations. """ import itertools import random import spacy from collections import defaultdict from rich import print from thefuzz import fuzz from .normalize import normalize_color nlp = spacy.load("en_core_web_lg") PRICE_RANGE = [10.0 * i for i in range(1, 100)] def get_goals(all_products, product_prices, human_goals=True, quiet: bool = False): if human_goals: return get_human_goals(all_products, product_prices, quiet=quiet) else: return get_synthetic_goals(all_products, product_prices, quiet=quiet) def get_human_goals(all_products, product_prices, quiet: bool = False): goals = [] cnt_atts = defaultdict(int) cnt = 0 for item in all_products: asin = item['asin'] if 'instructions' not in item: continue for product in item['instructions']: attributes = product['instruction_attributes'] if len(attributes) == 0: cnt += 1 continue if product_prices is not None: price = product_prices[asin] price_range = [p for p in PRICE_RANGE if p > price][:4] if len(price_range) >= 2: _, price_upper = sorted(random.sample(price_range, 2)) price_text = \ f', and price lower than {price_upper:.2f} dollars' else: price_upper = 1000000 price_text = '' else: price_upper = 1000000 goals.append({ 'asin': asin, 'category': item['category'], 'query': item['query'], 'name': item['name'], 'product_category': item['product_category'], 'instruction_text': product['instruction'].strip('.') + price_text, 'attributes': attributes, 'price_upper': price_upper, 'goal_options': product['instruction_options'], }) for att in attributes: cnt_atts[att] += 1 # goals += product_goals for goal in goals: goal['weight'] = 1 if not quiet: print(len(all_products)) print("Number of Goals:", len(goals)) print(cnt, 'skipped') return goals def get_synthetic_goals(all_products, product_prices, quiet: bool = False): goals = [] cnt_atts = defaultdict(int) for product in all_products: if ('instruction_text' not in product or product['instruction_text'] is None): continue product_goals = [] asin = product['asin'] attributes = product['instruction_attributes'] assert len(attributes) > 0 if product_prices is not None: price = product_prices[asin] price_range = [p for p in PRICE_RANGE if p > price][:4] if len(price_range) >= 2: _, price_upper = sorted(random.sample(price_range, 2)) price_text = \ f', and price lower than {price_upper:.2f} dollars' else: price_upper = 1000000 price_text = '' else: price_upper = 1000000 price_text = '' instruction_text = product['instruction_text'] options = product['options'] option_names = sorted(options) combinations = list(itertools.product( *(options[option_name] for option_name in option_names) )) for combination in combinations: goal_options = dict() for i, o in enumerate(combination): # option_text.append(f'{option_names[i]}: {o}') goal_options[option_names[i]] = o option_text = ', and '.join([ f'{k}: {v}' for k, v in goal_options.items() ]) option_text = ' with ' + option_text if option_text else '' product_goals.append({ 'asin': asin, 'category': product['category'], 'query': product['query'], 'name': product['name'], 'product_category': product['product_category'], 'instruction_text': f'{instruction_text}{option_text}{price_text}', 'attributes': attributes, 'price_upper': price_upper, 'goal_options': goal_options, 'name': product['Title'], }) for att in attributes: cnt_atts[att] += 1 goals += product_goals for goal in goals: goal['weight'] = sum(1. / cnt_atts[att] for att in goal['attributes']) / len(goal['attributes']) return goals def get_type_reward(purchased_product, goal): """Determines the type reward - captures whether chosen product is in the same category""" query_match = purchased_product['query'] == goal['query'] # Check number of unique categories that match, ignoring order purchased_product_category = [x.strip() for x in purchased_product['product_category'].split('›')] goal_product_category = [x.strip() for x in goal['product_category'].split('›')] category_match = len(set(purchased_product_category) & set(goal_product_category)) >= 2 # Determine whether types align based on product name similarity purchased_type = purchased_product['name'] desired_type = goal['name'] purchased_type_parse = nlp(purchased_type) desired_type_parse = nlp(desired_type) purchased_type_parse = [t.text.lower() for t in purchased_type_parse if t.pos_ in ('PNOUN', 'NOUN', 'PROPN')] desired_type_parse = [t.text.lower() for t in desired_type_parse if t.pos_ in ('PNOUN', 'NOUN', 'PROPN')] n_intersect_type = len( set(purchased_type_parse) & set(desired_type_parse) ) if len(desired_type_parse) == 0: title_score = 0.2 else: title_score = n_intersect_type / len(desired_type_parse) r_type = 1.0 # Adjust r_type score based on query, category title matching/scores match = query_match or category_match or title_score > 0.2 if not match: r_type = 0.5 if title_score < 0.1: r_type = 0.1 if title_score == 0.0: r_type = 0.0 return dict( r_type=r_type, query_match=query_match, category_match=category_match, title_score=title_score, ) def get_attribute_reward(purchased_product, goal): """Determines whether purchased products shares same attributes as goal""" purchased_attrs = purchased_product['Attributes'] goal_attrs = goal['attributes'] num_attr_matches = 0 for g_attr in goal_attrs: matched = False # Check whether goal attribute found in purchased product attribute list for p_attr in purchased_attrs: score = fuzz.token_set_ratio(p_attr, g_attr) if score > 85: num_attr_matches += 1 matched = True break # If not in purchased attrs, check Title, Bullet Points (Features), Desc if ( not matched and ( g_attr in purchased_product['Title'].lower() or g_attr in ' '.join(purchased_product['BulletPoints']).lower() or g_attr in purchased_product['Description'].lower() ) ): num_attr_matches += 1 matched = True r_attr = num_attr_matches / len(goal_attrs) return r_attr, num_attr_matches def get_option_reward(purchased_options, goal_options): """Calculate reward for purchased product's options w.r.t. goal options""" purchased_options = [normalize_color(o) for o in purchased_options] goal_options = [normalize_color(o) for o in goal_options] # Perform fuzzy matching of each purchased option against each goal option num_option_matches = 0 for g_option in goal_options: for p_option in purchased_options: score = fuzz.token_set_ratio(p_option, g_option) if score > 85: num_option_matches += 1 break # Calculate option reward as fraction of goal options hit r_option = num_option_matches / len(goal_options) if len(goal_options) > 0 else None return r_option, num_option_matches def get_reward(purchased_product, goal, price, options, **kwargs): """Get cumulative reward score for purchased product and goal""" r_type_dict = get_type_reward(purchased_product, goal) r_price = ( price <= goal['price_upper'] ) if goal['price_upper'] > 0 else None r_att, num_attr_matches = get_attribute_reward(purchased_product, goal) r_option, num_option_matches = get_option_reward( list(options.values()), goal['goal_options'].items() if isinstance(goal['goal_options'], dict) else goal['goal_options'] ) total_reward = ( (num_attr_matches + num_option_matches + r_price) \ / (len(goal['attributes']) + len(goal['goal_options']) + 1) ) total_reward *= r_type_dict['r_type'] # If verbose flag enabled, store score sub-components into dictionary if kwargs.get('verbose', False): info = { 'r_type': r_type_dict['r_type'], 'r_att': r_att, 'w_att': len(goal['attributes']) / (len(goal['attributes']) + len(goal['goal_options']) + 1), 'query_match': r_type_dict['query_match'], 'category_match': r_type_dict['category_match'], 'title_score': r_type_dict['title_score'], } if r_option is not None: info['r_option'] = r_option info['w_option'] = len(goal['goal_options']) / (len(goal['attributes']) + len(goal['goal_options']) + 1) if r_price is not None: info['r_price'] = r_price info['w_price'] = 1 / (len(goal['attributes']) + len(goal['goal_options']) + 1) return total_reward, info return total_reward ================================================ FILE: envs/webshop/src/webshop/web_agent_site/engine/normalize.py ================================================ import re from typing import Tuple COLOR_SET = [ 'alabaster', 'apricot', 'aqua', 'ash', 'asphalt', 'azure', 'banana', 'beige', 'black', 'blue', 'blush', 'bordeaux', 'bronze', 'brown', 'burgundy', 'camel', 'camo', 'caramel', 'champagne', 'charcoal', 'cheetah', 'chestnut', 'chocolate', 'christmas', 'coffee', 'cognac', 'copper', 'coral', 'cranberry', 'cream', 'crystal', 'dark', 'denim', 'eggplant', 'elephant', 'espresso', 'fuchsia', 'gold', 'granite', 'grape', 'graphite', 'grass', 'gray', 'green', 'grey', 'heather', 'indigo', 'ivory', 'ivy', 'khaki', 'lavender', 'lemon', 'leopard', 'light', 'lilac', 'lime', 'magenta', 'maroon', 'mauve', 'merlot', 'midnight', 'mint', 'mocha', 'multicolor', 'mushroom', 'mustard', 'natural', 'navy', 'nude', 'olive', 'orange', 'peach', 'pewter', 'pink', 'plum', 'purple', 'rainbow', 'red', 'rose', 'royal', 'rust', 'sand', 'sapphire', 'seashell', 'silver', 'skull', 'slate', 'steel', 'stone', 'stonewash', 'sunflower', 'tan', 'taupe', 'teal', 'tiger', 'turquoise', 'violet', 'walnut', 'wheat', 'white', 'wine', 'yellow', ] SIZE_SET = [ 'xx-large', '3x-large', '4x-large', '5x-large', 'x-large', 'x-small', 'medium', 'large', 'small', 'queen', 'twin', 'full', 'king', 'one size', 'pack', ] SIZE_PATTERNS = [ re.compile(r'(.*)neck(.*)sleeve'), re.compile(r'(.*) women \| (.*) men'), re.compile(r'(.*)w x(.*)l'), re.compile(r'(.*)w by (.*)l'), re.compile(r'(.*)w x(.*)h'), re.compile(r'(.*)wide'), re.compile(r'(.*)x-wide'), re.compile(r'(.*)narrow'), re.compile(r'(.*)petite'), re.compile(r'(.*)inch'), re.compile(r'(.*)plus'), re.compile(r'(.*)mm'), re.compile(r'women(.*)'), re.compile(r'(.*)x(.*)'), re.compile(r'(.*)ft'), re.compile(r'(.*)feet'), re.compile(r'(.*)meter'), re.compile(r'(.*)yards'), re.compile(r'(.*)\*(.*)'), re.compile(r'(.*)\-(.*)'), re.compile(r'(\d+)"$'), re.compile(r'(\d+)f$'), re.compile(r'(\d+)m$'), re.compile(r'(\d+)cm$'), re.compile(r'(\d+)g$'), ] SIZE_PATTERNS = [re.compile(s) for s in SIZE_SET] + SIZE_PATTERNS def normalize_color(color_string: str) -> str: """Extracts the first color found if exists""" for norm_color in COLOR_SET: if norm_color in color_string: return norm_color return color_string def normalize_color_size(product_prices: dict) -> Tuple[dict, dict]: """Get mappings of all colors, sizes to corresponding values in COLOR_SET, SIZE_PATTERNS""" # Get all colors, sizes from list of all products all_colors, all_sizes = set(), set() for (_, color, size), _ in product_prices.items(): all_colors.add(color.lower()) all_sizes.add(size.lower()) # Create mapping of each original color value to corresponding set value color_mapping = {'N.A.': 'not_matched'} for c in all_colors: matched = False for base in COLOR_SET: if base in c: color_mapping[c] = base matched = True break if not matched: color_mapping[c] = 'not_matched' # Create mapping of each original size value to corresponding set value size_mapping = {'N.A.': 'not_matched'} for s in all_sizes: matched = False for pattern in SIZE_PATTERNS: m = re.search(pattern, s) if m is not None: matched = True size_mapping[s] = pattern.pattern break if not matched: if s.replace('.', '', 1).isdigit(): size_mapping[s] = 'numeric_size' matched= True if not matched: size_mapping[s] = 'not_matched' return color_mapping, size_mapping ================================================ FILE: envs/webshop/src/webshop/web_agent_site/envs/__init__.py ================================================ from gym.envs.registration import register from envs.webshop.src.webshop.web_agent_site.envs.web_agent_site_env import WebAgentSiteEnv from envs.webshop.src.webshop.web_agent_site.envs.web_agent_text_env import WebAgentTextEnv register( id='WebAgentSiteEnv-v0', entry_point='envs.webshop.src.webshop.web_agent_site.envs:WebAgentSiteEnv', ) register( id='WebAgentTextEnv-v0', entry_point='envs.webshop.src.webshop.web_agent_site.envs:WebAgentTextEnv', ) ================================================ FILE: envs/webshop/src/webshop/web_agent_site/envs/chromedriver ================================================ [File too large to display: 15.9 MB] ================================================ FILE: envs/webshop/src/webshop/web_agent_site/envs/web_agent_site_env.py ================================================ import gym import random import requests import string import time from bs4 import BeautifulSoup from bs4.element import Comment from gym import spaces from os.path import join, dirname, abspath from selenium import webdriver from selenium.webdriver.chrome.service import Service from selenium.webdriver.chrome.options import Options from selenium.webdriver.common.keys import Keys from selenium.common.exceptions import ElementNotInteractableException from ..engine.engine import parse_action, END_BUTTON class WebAgentSiteEnv(gym.Env): """Gym environment for HTML mode of WebShop environment""" def __init__(self, observation_mode='html', **kwargs): """ Constructor for HTML environment Arguments: observation_mode (`str`) -- ['html' | 'text'] (default 'html') pause (`float`) -- Pause (in seconds) after taking an action. This is mainly for demo purposes. Recommended value: 2.0s render (`bool`) -- Show browser if set to `True`. session ('str') -- Session ID to initialize environment with """ super(WebAgentSiteEnv, self).__init__() self.observation_mode = observation_mode self.kwargs = kwargs # Create a browser driver to simulate the WebShop site service = Service(join(dirname(abspath(__file__)), 'chromedriver')) options = Options() if 'render' not in kwargs or not kwargs['render']: options.add_argument("--headless") # don't show browser self.browser = webdriver.Chrome(service=service, options=options) # Set flags and values for WebShop session self.text_to_clickable = None self.assigned_session = kwargs.get('session') self.session = None self.reset() def step(self, action): """ Takes an action, updates WebShop environment, and returns (observation, reward, done, info) Arguments: action (`str`): An action should be of the following structure: - search[keywords] - click[value] If action not valid, perform nothing. """ reward = 0.0 done = False info = None # Map action to executed command on the WebShop environment via the broswer driver action_name, action_arg = parse_action(action) if action_name == 'search': try: search_bar = self.browser.find_element_by_id('search_input') except Exception: pass else: search_bar.send_keys(action_arg) search_bar.submit() elif action_name == 'click': try: self.text_to_clickable[action_arg].click() except ElementNotInteractableException: # Perform force click with JavaScript button = self.text_to_clickable[action_arg] self.browser.execute_script("arguments[0].click();", button) reward = self.get_reward() if action_arg == END_BUTTON: done = True elif action_name == 'end': done = True else: print('Invalid action. No action performed.') if 'pause' in self.kwargs: time.sleep(self.kwargs['pause']) return self.observation, reward, done, info def get_available_actions(self): """Returns list of available actions at the current step""" # Determine if a search bar is available try: search_bar = self.browser.find_element_by_id('search_input') except Exception: has_search_bar = False else: has_search_bar = True # Collect buttons, links, and options as clickables buttons = self.browser.find_elements_by_class_name('btn') product_links = self.browser.find_elements_by_class_name('product-link') buying_options = self.browser.find_elements_by_css_selector("input[type='radio']") self.text_to_clickable = { f'{b.text}': b for b in buttons + product_links } for opt in buying_options: opt_value = opt.get_attribute('value') self.text_to_clickable[f'{opt_value}'] = opt return dict( has_search_bar=has_search_bar, clickables=list(self.text_to_clickable.keys()), ) def _parse_html(self, html=None, url=None): """ Returns web request result wrapped in BeautifulSoup object Arguments: url (`str`): If no url or html is provided, use the current observation (HTML) for parsing. """ if html is None: if url is not None: html = requests.get(url) else: html = self.state['html'] html_obj = BeautifulSoup(html, 'html.parser') return html_obj def get_reward(self): """Get reward value at current step of the environment""" html_obj = self._parse_html() r = html_obj.find(id='reward') r = float(r.findChildren("pre")[0].string) if r is not None else 0.0 return r def get_instruction_text(self): """Get corresponding instruction text for environment current step""" html_obj = self._parse_html(self.browser.page_source) instruction_text = html_obj.find(id='instruction-text').h4.text return instruction_text def convert_html_to_text(self, html): """Strip HTML of tags and add separators to convert observation into simple mode""" texts = self._parse_html(html).find_all(string=True) visible_texts = filter(tag_visible, texts) observation = ' [SEP] '.join(t.strip() for t in visible_texts if t != '\n') return observation @property def state(self): """ State that includes all information. The actual observation are likely to be a subset or reduced form of the state. """ return dict( url=self.browser.current_url, html=self.browser.page_source, instruction_text=self.instruction_text, ) @property def observation(self): """Compiles state into either the `html` or `text` observation mode""" html = self.state['html'] if self.observation_mode == 'html': return html elif self.observation_mode == 'text': return self.convert_html_to_text(html) else: raise ValueError( f'Observation mode {self.observation_mode} not supported.' ) @property def action_space(self): # Recommended to use `get_available_actions` instead return NotImplementedError @property def observation_space(self): return NotImplementedError def reset(self): """Create a new session and reset environment variables""" if self.assigned_session is not None: self.session = self.assigned_session else: self.session = ''.join(random.choices(string.ascii_lowercase, k=5)) init_url = f'http://127.0.0.1:3000/{self.session}' self.browser.get(init_url) self.instruction_text = self.get_instruction_text() return self.observation, None def render(self, mode='human'): # TODO: Render observation in terminal or WebShop website return NotImplementedError def close(self): # TODO: When DB used instead of JSONs, tear down DB here self.browser.close() print('Browser closed.') def tag_visible(element): """Helper method to strip HTML block of extraneous tags""" ignore = {'style', 'script', 'head', 'title', 'meta', '[document]'} return ( element.parent.name not in ignore and not isinstance(element, Comment) ) ================================================ FILE: envs/webshop/src/webshop/web_agent_site/envs/web_agent_text_env.py ================================================ import os import gym import json import random import string import time import torch import pickle from bs4 import BeautifulSoup from bs4.element import Comment from collections import defaultdict from flask import Flask from ..engine.engine import ( load_products, init_search_engine, get_top_n_product_from_keywords, map_action_to_html, parse_action, get_product_per_page, ACTION_TO_TEMPLATE, END_BUTTON, NEXT_PAGE, PREV_PAGE, BACK_TO_SEARCH, ) from ..engine.goal import get_reward, get_goals from ..utils import ( DEFAULT_FILE_PATH, FEAT_CONV, FEAT_IDS, random_idx ) app = Flask(__name__) class WebAgentTextEnv(gym.Env): """Gym environment for Text mode of WebShop environment""" def __init__( self, observation_mode='html', file_path=DEFAULT_FILE_PATH, server=None, **kwargs ): """ Constructor for text environment Arguments: observation_mode (`str`) -- ['html' | 'text'] (default 'html') get_image filter_goals limit_goals num_products human_goals session session_prefix show_attrs """ super(WebAgentTextEnv, self).__init__() self.observation_mode = observation_mode self.kwargs = kwargs self.file_path = file_path self.base_url = 'http://127.0.0.1:3000' self.server = SimServer( self.base_url, self.file_path, self.kwargs.get('filter_goals'), self.kwargs.get('limit_goals', -1), self.kwargs.get('num_products'), self.kwargs.get('human_goals'), self.kwargs.get('show_attrs', False), self.kwargs.get('quiet', False), ) if server is None else server self.browser = SimBrowser(self.server) self.session = self.kwargs.get('session') self.session_prefix = self.kwargs.get('session_prefix') if self.kwargs.get('get_image', 0): self.feats = torch.load(FEAT_CONV) self.ids = torch.load(FEAT_IDS) self.ids = {url: idx for idx, url in enumerate(self.ids)} self.prev_obs = [] self.prev_actions = [] self.num_prev_obs = self.kwargs.get('num_prev_obs', 0) self.num_prev_actions = self.kwargs.get('num_prev_actions', 0) self.reset() def step(self, action): """ Takes an action, updates WebShop environment, and returns (observation, reward, done, info) Arguments: action (`str`): An action should be of the following structure: - search[keywords] - click[value] If action not valid, perform nothing. """ info = None self.get_available_actions() # Determine action type (click, search) and argument action_name, action_arg = parse_action(action) action_name = action_name.lower() if action_arg is not None: action_arg = action_arg.lower() if (action_name == 'search' and action_arg is not None and action_arg != ''): status = self.browser.search(action_arg) elif (action_name == 'click' and action_arg in self.text_to_clickable.keys() and action_arg != 'search'): status = self.browser.click(action_arg, self.text_to_clickable) else: status = dict(reward=0, done=False) # Update observation, state with the new action ob = self.observation text_list = [ob] self.prev_actions.append(action) for i in range(1, 1 + max(self.num_prev_obs, self.num_prev_actions)): if len(self.prev_actions) >= i and self.num_prev_actions >= i: text_list.append(self.prev_actions[-i]) if len(self.prev_obs) >= i and self.num_prev_obs >= i: text_list.append(self.prev_obs[-i]) state = ' [SEP] '.join(text_list[::-1]) self.prev_obs.append(ob) return state, status['reward'], status['done'], info def get_available_actions(self): """Returns list of available actions at the current step""" html_obj = self._parse_html() # Collect search bar, buttons, links, and options as clickables search_bar = html_obj.find(id='search_input') has_search_bar = True if search_bar is not None else False buttons = html_obj.find_all(class_='btn') product_links = html_obj.find_all(class_='product-link') buying_options = html_obj.select('input[type="radio"]') self.text_to_clickable = { f'{b.get_text()}'.lower(): b for b in buttons + product_links } for opt in buying_options: opt_value = opt.get('value') self.text_to_clickable[f'{opt_value}'] = opt return dict( has_search_bar=has_search_bar, clickables=list(self.text_to_clickable.keys()), ) def get_image(self): """Scrape image from page HTML and return as a list of pixel values""" html_obj = self._parse_html(self.browser.page_source) image_url = html_obj.find(id='product-image') if image_url is not None: image_url = image_url['src'] if image_url in self.ids: image_idx = self.ids[image_url] image = self.feats[image_idx] return image return torch.zeros(512) def get_instruction_text(self): """Get corresponding instruction text for current environment session""" html_obj = self._parse_html(self.browser.page_source) instruction_text = html_obj.find(id='instruction-text').h4.text return instruction_text def _parse_html(self, html=None): """ Returns web request result wrapped in BeautifulSoup object Arguments: url (`str`): If no url or html is provided, use the current observation (HTML) for parsing. """ if html is None: html = self.state['html'] html_obj = BeautifulSoup(html, 'html.parser') return html_obj @property def observation(self): """Compiles state into either the `html` or `text` observation mode""" html = self.state['html'] if self.observation_mode == 'html': return html elif self.observation_mode == 'text': return self.convert_html_to_text(html, simple=True) elif self.observation_mode == 'text_rich': return self.convert_html_to_text(html, simple=False) elif self.observation_mode == 'url': return self.state['url'] else: raise ValueError( f'Observation mode {self.observation_mode} not supported.' ) @property def state(self): """ State that includes all information. The actual observation are likely to be a subset or reduced form of the state. """ return dict( url=self.browser.current_url, html=self.browser.page_source, instruction_text=self.instruction_text, ) def convert_html_to_text(self, html, simple=False): """Strip HTML of tags and add separators to convert observation into simple mode""" texts = self._parse_html(html).find_all(string=True) visible_texts = filter(tag_visible, texts) if simple: # For `simple` mode, return just [SEP] separators return ' [SEP] '.join(t.strip() for t in visible_texts if t != '\n') else: # Otherwise, return an observation with tags mapped to specific, unique separators observation = '' for t in visible_texts: if t == '\n': continue if t.parent.name == 'button': # button processed_t = f'[button] {t} [button_]' elif t.parent.name == 'label': # options if f'"{t}"' in self.state['url']: processed_t = f' [clicked button] {t} [clicked button_]' observation = f'You have clicked {t}.\n' + observation else: processed_t = f' [button] {t} [button_]' elif t.parent.get('class') == ["product-link"]: # product asins if f'{t}' in self.server.user_sessions[self.session]['asins']: processed_t = f'\n[clicked button] {t} [clicked button_]' else: processed_t = f'\n[button] {t} [button_]' else: # regular, unclickable text processed_t = str(t) observation += processed_t + '\n' return observation def reset(self, session=None, instruction_text=None): """Create a new session and reset environment variables""" session_int = None if session is not None: self.session = str(session) if isinstance(session, int): session_int = session else: self.session = ''.join(random.choices(string.ascii_lowercase, k=10)) if self.session_prefix is not None: self.session = self.session_prefix + self.session init_url = f'{self.base_url}/{self.session}' self.browser.get(init_url, session_id=self.session, session_int=session_int) self.text_to_clickable = None self.instruction_text = self.get_instruction_text() if instruction_text is None else instruction_text obs = self.observation self.prev_obs = [obs] self.prev_actions = [] return obs, None def render(self, mode='human'): pass def close(self): pass def tag_visible(element): ignore = {'style', 'script', 'head', 'title', 'meta', '[document]'} return ( element.parent.name not in ignore and not isinstance(element, Comment) ) class SimServer: """Lightweight simulator of WebShop Flask application for generating HTML observations""" def __init__( self, base_url, file_path, filter_goals=None, limit_goals=-1, num_products=None, human_goals=0, show_attrs=False, quiet=False, ): """ Constructor for simulated server serving WebShop application Arguments: filter_goals (`func`) -- Select specific goal(s) for consideration based on criteria of custom function limit_goals (`int`) -- Limit to number of goals available num_products (`int`) -- Number of products to search across human_goals (`bool`) -- If true, load human goals; otherwise, load synthetic goals """ # Load all products, goals, and search engine self.base_url = base_url self.quiet = bool(quiet) # cache_path = os.path.join(os.getcwd(), '.cache') # if os.path.exists(cache_path): # self.all_products = pickle.load(open(os.path.join(cache_path, 'all_products.pkl'), 'rb')) # self.product_item_dict = pickle.load(open(os.path.join(cache_path, 'product_item_dict.pkl'), 'rb')) # self.product_prices = pickle.load(open(os.path.join(cache_path, 'product_prices.pkl'), 'rb')) # self.goals = pickle.load(open(os.path.join(cache_path, 'goals.pkl'), 'rb')) # else: # self.all_products, self.product_item_dict, self.product_prices, _ = \ # load_products(filepath=file_path, num_products=num_products, human_goals=human_goals) # self.goals = get_goals(self.all_products, self.product_prices, human_goals) # os.mkdir(cache_path) # pickle.dump(self.all_products, open(os.path.join(cache_path, 'all_products.pkl'), 'wb')) # pickle.dump(self.product_item_dict, open(os.path.join(cache_path, 'product_item_dict.pkl'), 'wb')) # pickle.dump(self.product_prices, open(os.path.join(cache_path, 'product_prices.pkl'), 'wb')) # pickle.dump(self.goals, open(os.path.join(cache_path, 'goals.pkl'), 'wb')) # self.search_engine = init_search_engine(num_products=num_products) self.all_products, self.product_item_dict, self.product_prices, _ = \ load_products(filepath=file_path, num_products=num_products, human_goals=human_goals, quiet=self.quiet) self.search_engine = init_search_engine(num_products=num_products) self.goals = get_goals(self.all_products, self.product_prices, human_goals, quiet=self.quiet) self.show_attrs = show_attrs # Fix outcome for random shuffling of goals random.seed(233) random.shuffle(self.goals) # Apply `filter_goals` parameter if exists to select speific goal(s) if filter_goals is not None: self.goals = [ goal for (i, goal) in enumerate(self.goals) if filter_goals(i, goal) ] # Imposes `limit` on goals via random selection if limit_goals != -1 and limit_goals < len(self.goals): self.weights = [goal['weight'] for goal in self.goals] self.cum_weights = [0] for w in self.weights: self.cum_weights.append(self.cum_weights[-1] + w) idxs = [] while len(idxs) < limit_goals: idx = random_idx(self.cum_weights) if idx not in idxs: idxs.append(idx) self.goals = [self.goals[i] for i in idxs] if not self.quiet: print(f'Loaded {len(self.goals)} goals.') # pickle.dump(self.goals, open(os.path.join(cache_path, 'goals_final.pkl'), 'wb')) # Set extraneous housekeeping variables self.weights = [goal['weight'] for goal in self.goals] self.cum_weights = [0] for w in self.weights: self.cum_weights.append(self.cum_weights[-1] + w) self.user_sessions = dict() self.search_time = 0 self.render_time = 0 self.sample_time = 0 self.assigned_instruction_text = None # TODO: very hacky, should remove @app.route('/', methods=['GET', 'POST']) def index(self, session_id, **kwargs): """Redirect to the search page with the given session ID""" html = map_action_to_html( 'start', session_id=session_id, instruction_text=kwargs['instruction_text'], ) url = f'{self.base_url}/{session_id}' return html, url @app.route('/', methods=['GET', 'POST']) def search_results(self, session_id, **kwargs): """Initialize session and return the search results page""" session = self.user_sessions[session_id] keywords = kwargs['keywords'] # TODO: why is this using kwargs? why not session? assert isinstance(keywords, list) page = 1 if 'page' not in kwargs else kwargs['page'] session["page"] = page session["keywords"] = keywords session["actions"]["search"] += 1 session["asin"] = None session["options"] = {} # Perform search on keywords from items and record amount of time it takes old_time = time.time() top_n_products = get_top_n_product_from_keywords( keywords, self.search_engine, self.all_products, self.product_item_dict, ) self.search_time += time.time() - old_time # Get product list from search result asins and get list of corresponding URLs products = get_product_per_page(top_n_products, page) keywords_url_string = '+'.join(keywords) url = ( f'{self.base_url}/search_results/{session_id}/' f'{keywords_url_string}/{page}' ) # Render HTML search page and record amount of time taken old_time = time.time() html = map_action_to_html( 'search', session_id=session_id, products=products, keywords=session["keywords"], page=page, total=len(top_n_products), instruction_text=session["goal"]["instruction_text"], ) self.render_time += time.time() - old_time return html, url @app.route('/', methods=['GET', 'POST']) def item_page(self, session_id, **kwargs): """Render and return the HTML for a product item page""" session = self.user_sessions[session_id] clickable_name = kwargs['clickable_name'] text_to_clickable = kwargs['text_to_clickable'] clickable = text_to_clickable[clickable_name] # Update session logs with information of last product asin selected if (clickable.get('class') is not None and clickable.get('class')[0] == 'product-link'): session["asin"] = clickable_name.upper() session["actions"]["asin"] += 1 session["asins"].add(session["asin"]) elif clickable.get('name') is not None: clickable_key = clickable['name'].lower() session["options"][clickable_key] = clickable_name session["actions"]["options"] += 1 # Set fields + url of page, then render page's HTML product_info = self.product_item_dict[session["asin"]] keywords_url_string = '+'.join(session["keywords"]) option_string = json.dumps(session['options']) url = ( f'{self.base_url}/item_page/{session_id}/' f'{session["asin"]}/{keywords_url_string}/' f'{session["page"]}/{option_string}' ) html = map_action_to_html( 'click', session_id=session_id, product_info=product_info, keywords=session["keywords"], page=session["page"], asin=session["asin"], options=session["options"], instruction_text=session["goal"]["instruction_text"], show_attrs=self.show_attrs, ) return html, url @app.route('/', methods=['GET', 'POST']) def item_sub_page(self, session_id, **kwargs): """Render and return the HTML for a product's sub page (i.e. description, features)""" session = self.user_sessions[session_id] clickable_name = kwargs['clickable_name'] for k in ACTION_TO_TEMPLATE: if clickable_name.lower() == k.lower(): clickable_name = k break # Set fields + url of page, then render page's HTML product_info = self.product_item_dict[session["asin"]] session["actions"][clickable_name] += 1 keywords_url_string = '+'.join(session["keywords"]) url = ( f'{self.base_url}/item_sub_page/{session_id}/' f'{session["asin"]}/{keywords_url_string}/{session["page"]}/' f'{clickable_name}/{session["options"]}' ) html = map_action_to_html( f'click[{clickable_name}]', session_id=session_id, product_info=product_info, keywords=session["keywords"], page=session["page"], asin=session["asin"], options=session["options"], instruction_text=session["goal"]["instruction_text"], ) return html, url @app.route('/', methods=['GET', 'POST']) def done(self, session_id, **kwargs): """Render and return HTML for done page""" session = self.user_sessions[session_id] goal = self.user_sessions[session_id]['goal'] purchased_product = self.product_item_dict[session["asin"]] session["actions"]["purchase"] += 1 price = self.product_prices.get(session["asin"]) # Calculate reward for selected product and set variables for page details reward, info = get_reward( purchased_product, goal, price=price, options=session["options"], verbose=True ) self.user_sessions[session_id]['verbose_info'] = info self.user_sessions[session_id]['done'] = True self.user_sessions[session_id]['reward'] = reward url = ( f'{self.base_url}/done/{session_id}/' f'{session["asin"]}/{session["options"]}' ) html = map_action_to_html( f'click[{END_BUTTON}]', session_id=session_id, reward=reward, asin=session["asin"], options=session["options"], instruction_text=session["goal"]["instruction_text"], ) return html, url, reward def receive(self, session_id, current_url, session_int=None, **kwargs): """Map action to the corresponding page""" status = dict(reward=0.0, done=False) with app.app_context(), app.test_request_context(): # Create/determine goal, instruction_text from current session if session_id not in self.user_sessions: idx = session_int if (session_int is not None and isinstance(session_int, int)) else random_idx(self.cum_weights) goal = self.goals[idx] instruction_text = goal['instruction_text'] self.user_sessions[session_id] = {'goal': goal, 'done': False} else: instruction_text = \ self.user_sessions[session_id]['goal']['instruction_text'] if self.assigned_instruction_text is not None: instruction_text = self.assigned_instruction_text # TODO: very hacky, should remove self.user_sessions[session_id]['goal']['instruction_text'] = instruction_text session = self.user_sessions[session_id] if not kwargs: # If no action, reset the session variables kwargs['instruction_text'] = instruction_text html, url = self.index(session_id, **kwargs) self.user_sessions[session_id].update( { 'keywords': None, 'page': None, 'asin': None, 'asins': set(), 'options': dict(), 'actions': defaultdict(int) } ) elif 'keywords' in kwargs: # If search keywords are available, run a search html, url = self.search_results(session_id, **kwargs) elif 'clickable_name' in kwargs: clickable_name = kwargs['clickable_name'].lower() if clickable_name == END_BUTTON.lower(): # If "buy now" clicked, calculate reward and flag session as terminated html, url, reward = self.done(session_id, **kwargs) status['reward'] = reward status['done'] = True elif clickable_name == BACK_TO_SEARCH.lower(): # If "back to search" clicked, recursively reset the session back to search page html, url, status = self.receive(session_id, current_url) elif (clickable_name == NEXT_PAGE.lower() and self.get_page_name(current_url) == 'search_results'): # If "next page" clicked from search results, re-render with `page` enumerated html, url, status = self.receive( session_id, current_url, keywords=session["keywords"], page=session["page"] + 1, ) elif (clickable_name == PREV_PAGE.lower() and self.get_page_name(current_url) == 'search_results'): # If "prev page" clicked from search results, re-render with `page` denumerated html, url, status = self.receive( session_id, current_url, keywords=session["keywords"], page=session["page"] - 1, ) elif (clickable_name == PREV_PAGE.lower() and self.get_page_name(current_url) == 'item_sub_page'): # If "prev page" clicked from sub page, return to corresponding item page html, url = self.item_page(session_id, **kwargs) elif (clickable_name == PREV_PAGE.lower() and self.get_page_name(current_url) == 'item_page'): # If "prev page" clicked from item page, return to search results page html, url = self.search_results( session_id, keywords=session["keywords"], page=session["page"], **kwargs ) elif clickable_name in [k.lower() for k in ACTION_TO_TEMPLATE]: # Render item_sub_page if clickable is description, features, or reviews html, url = self.item_sub_page(session_id, **kwargs) else: # Otherwise, render current item page html, url = self.item_page(session_id, **kwargs) return html, url, status def get_page_name(self, url): """Determine which page (i.e. item_page, search_results) the given URL is pointing at""" if url is None: return None page_names = [ 'search_results', 'item_page', 'item_sub_page', 'done' ] for page_name in page_names: if page_name in url: return page_name return '' # index page class SimBrowser: """Simulated browser for rendering the HTML source of WebShop environment pages""" def __init__(self, server): self.server = server self.current_url = None self.page_source = None self.session_id = None def get(self, url, session_id=None, session_int=None): """Set browser variables to corresponding link, page HTML for URL""" self.session_id = url.split('/')[-1] if session_id is None else session_id self.page_source, _, _ = \ self.server.receive(self.session_id, self.current_url, session_int=session_int) self.current_url = url def click(self, clickable_name, text_to_clickable): """Wrapper for `receive` handler for performing click action on current page""" self.page_source, self.current_url, status = \ self.server.receive( self.session_id, current_url=self.current_url, clickable_name=clickable_name, text_to_clickable=text_to_clickable, ) return status def search(self, keywords): """Wrapper for `receive` handler for performing search action on current page""" if isinstance(keywords, str): keywords = keywords.split(' ') self.page_source, self.current_url, status = \ self.server.receive( self.session_id, current_url=self.current_url, keywords=keywords, ) return status ================================================ FILE: envs/webshop/src/webshop/web_agent_site/models/__init__.py ================================================ from .models import ( HumanPolicy, RandomPolicy, ) ================================================ FILE: envs/webshop/src/webshop/web_agent_site/models/models.py ================================================ """ Model implementations. The model interface should be suitable for both the ``site env'' and the ``text env''. """ import random random.seed(4) class BasePolicy: def __init__(self): pass def forward(observation, available_actions): """ Args: observation (`str`): HTML string available_actions (): ... Returns: action (`str`): Return string of the format ``action_name[action_arg]''. Examples: - search[white shoes] - click[button=Reviews] - click[button=Buy Now] """ raise NotImplementedError class HumanPolicy(BasePolicy): def __init__(self): super().__init__() def forward(self, observation, available_actions): action = input('> ') return action class RandomPolicy(BasePolicy): def __init__(self): super().__init__() def forward(self, observation, available_actions): if available_actions['has_search_bar']: action = 'search[shoes]' else: action_arg = random.choice(available_actions['clickables']) action = f'click[{action_arg}]' return action ================================================ FILE: envs/webshop/src/webshop/web_agent_site/static/style.css ================================================ .text { font-family: Arial, Helvetica; } * { -webkit-border-radius: 1px !important; -moz-border-radius: 1px !important; border-radius: 1px !important; } #logo { color: #666; width:100%; } #logo h1 { font-size: 60px; text-shadow: 1px 2px 3px #999; font-family: Roboto, sans-serif; font-weight: 700; letter-spacing: -1px; } #logo p{ padding-bottom: 20px; } #thankyou { color: #666; width:100%; font-size: 60px; font-family: Roboto, sans-serif; font-weight: 700; padding-bottom: 20px; letter-spacing: -1px; } #form-buscar >.form-group >.input-group > .form-control { height: 40px; } #form-buscar >.form-group >.input-group > .input-group-btn > .btn{ height: 40px; font-size: 16px; font-weight: 300; } #form-buscar >.form-group >.input-group > .input-group-btn > .btn .glyphicon{ margin-right:12px; } #form-buscar >.form-group >.input-group > .form-control { font-size: 16px; font-weight: 300; } #form-buscar >.form-group >.input-group > .form-control:focus { border-color: #33A444; outline: 0; -webkit-box-shadow: inset 0 1px 1px rgba(0,0,0,.075), 0 0 1px rgba(0, 109, 0, 0.8); box-shadow: inset 0 1px 1px rgba(0,0,0,.075), 0 0 1px rgba(0, 109, 0, 0.8); } body { background: white; min-height: 100vh } .text-gray { color: #aaa } .result-img { max-height: 300px; max-width: 300px; overflow: hidden; } .item-page-img { max-height: 600px; max-width: 370px; overflow: hidden; } .top-buffer { margin-top:10px; } .product-info { font-size: 18px; } .star-active { color: #FBC02D; margin-top: 10px; margin-bottom: 10px } .star-active:hover { color: #F9A825; cursor: pointer } .star-inactive { color: #CFD8DC; margin-top: 10px; margin-bottom: 10px } .blue-text { color: #116396 } .btn { margin-left: 0px; margin-right: 0px; } /* Boostrap Buttons Styling */ .btn-primary { font-size: 13px; color: rgba(58, 133, 191, 0.75); letter-spacing: 1px; line-height: 15px; border: 2px solid rgba(58, 133, 191, 0.75); border-radius: 40px; background: transparent; } .btn-primary:hover { color: #FFF; background: rgba(58, 133, 191, 0.75); } .btn-success { font-size: 13px; color: rgba(103, 192, 103, 0.75); letter-spacing: 1px; line-height: 15px; border: 2px solid rgba(103, 192, 103, 0.75); border-radius: 40px; background: transparent; } .btn-success:hover { color: #FFF; background: rgb(103, 192, 103, 0.75); } .btn.purchase { color: rgb(0, 0, 0); background: rgb(250, 167, 13); } .btn.purchase:hover { color: rgb(0, 0, 0); background: rgb(253, 199, 98); } .radio-toolbar { margin: 5px; } .radio-toolbar input[type="radio"] { opacity: 0; position: fixed; width: 0; } .radio-toolbar label { display: inline-block; background-color: rgb(245, 241, 241); padding: 10px 10px; font-size: 14px; border: 1px solid #444; border-radius: 4px; } .radio-toolbar label:hover { background-color: rgb(255, 247, 217); } .radio-toolbar input[type="radio"]:focus + label { border: 1px solid #444; } .radio-toolbar input[type="radio"]:checked + label { background-color: rgb(255, 234, 163); border: 1px solid #444; } #instruction-text { margin-top:10px; margin-bottom:10px; border: #797474 solid; border-radius: 20px; padding: 5px; } pre { white-space: pre-line; } ================================================ FILE: envs/webshop/src/webshop/web_agent_site/templates/attributes_page.html ================================================

Instruction:
{{ instruction_text }}

    {% for attribute in product_info.Attributes %}
  • {{attribute}}

  • {% endfor %}
{{product_info.category}}
{{product_info.query}}
{{product_info.product_category}}
================================================ FILE: envs/webshop/src/webshop/web_agent_site/templates/description_page.html ================================================

Instruction:
{{ instruction_text }}

{{product_info.Description}}

================================================ FILE: envs/webshop/src/webshop/web_agent_site/templates/done_page.html ================================================

Thank you for shopping with us!

Your code:

{{ mturk_code }}
(Paste it in your MTurk interface.)

Purchased


asin
{{ asin }}

options
{{ options | tojson }}

attrs
{{ purchased_attrs }}

category
{{ category }}

query
{{ query }}

product category
{{ product_category }}

Target


asin
{{ goal.asin }}

options
{{ goal.goal_options }}

attrs
{{ goal.attributes }}

price upper
{{ goal.price_upper }}

instuction text
{{ goal.instruction_text }}

category
{{ goal.category }}

product category
{{ goal.product_category }}

query
{{ goal.query }}

Goal
{{ goal | pprint }}

Reward


Your score (min 0.0, max 1.0)
{{ reward }}

================================================ FILE: envs/webshop/src/webshop/web_agent_site/templates/features_page.html ================================================

Instruction:
{{ instruction_text }}

    {% for bulletpoint in product_info.BulletPoints %}
  • {{bulletpoint}}

  • {% endfor %}
================================================ FILE: envs/webshop/src/webshop/web_agent_site/templates/item_page.html ================================================

Instruction:
{{ instruction_text }}

{% for option_name, option_contents in product_info.options.items() %}

{{ option_name }}

{% for option_content in option_contents %} {% set current_options = options.copy() %} {% set _ = current_options.update({option_name: option_content}) %} {% set url = url_for('item_page', session_id=session_id, asin=asin, keywords=keywords, page=page, options=current_options) %} {% endfor %}
{% endfor %}

{{product_info.Title}}

Price: {{product_info.Price}}

Rating: {{product_info.Rating}}

{% if show_attrs %}
{% endif %}
================================================ FILE: envs/webshop/src/webshop/web_agent_site/templates/results_page.html ================================================
================================================ FILE: envs/webshop/src/webshop/web_agent_site/templates/review_page.html ================================================

Instruction:
{{ instruction_text }}

{% for review in product_info.Reviews %}

"{{review.title}}"

{{review.score}} {% for i in range(review.score | int) %} {% endfor %} {% for i in range(5 - review.score | int) %} {% endfor %}

{{review.body}}

{% endfor %}
================================================ FILE: envs/webshop/src/webshop/web_agent_site/templates/search_page.html ================================================

Instruction:
{{ instruction_text }}

================================================ FILE: envs/webshop/src/webshop/web_agent_site/utils.py ================================================ import os import bisect import hashlib import logging import random from os.path import dirname, abspath, join BASE_DIR = join(dirname(abspath(__file__)), '../..') DEBUG_PROD_SIZE = None # set to `None` to disable DEFAULT_ATTR_PATH = join(BASE_DIR, '../data/items_ins_v2.json') DEFAULT_FILE_PATH = join(BASE_DIR, '../data/items_shuffle.json') DEFAULT_REVIEW_PATH = join(BASE_DIR, '../data/reviews.json') FEAT_CONV = join(BASE_DIR, '../data/feat_conv.pt') FEAT_IDS = join(BASE_DIR, '../data/feat_ids.pt') HUMAN_ATTR_PATH = join(BASE_DIR, '../data/items_human_ins.json') # HUMAN_ATTR_PATH = join(BASE_DIR, '../data/items_human_ins.json') def random_idx(cum_weights): """Generate random index by sampling uniformly from sum of all weights, then selecting the `min` between the position to keep the list sorted (via bisect) and the value of the second to last index """ pos = random.uniform(0, cum_weights[-1]) idx = bisect.bisect(cum_weights, pos) idx = min(idx, len(cum_weights) - 2) return idx def setup_logger(session_id, user_log_dir): """Creates a log file and logging object for the corresponding session ID""" logger = logging.getLogger(session_id) formatter = logging.Formatter('%(message)s') file_handler = logging.FileHandler( user_log_dir / f'{session_id}.jsonl', mode='w' ) file_handler.setFormatter(formatter) logger.setLevel(logging.INFO) logger.addHandler(file_handler) return logger def generate_mturk_code(session_id: str) -> str: """Generates a redeem code corresponding to the session ID for an MTurk worker once the session is completed """ sha = hashlib.sha1(session_id.encode()) return sha.hexdigest()[:10].upper() ================================================ FILE: requirements.txt ================================================ openai==2.6.1 rich==14.2.0 torch==2.9.0 ================================================ FILE: run.py ================================================ import argparse import asyncio import importlib import time import shutil from pathlib import Path from typing import Type, Dict, Any, List, Optional import inspect import logging from datetime import datetime # Suppress common warnings - must be at the very beginning import warnings import os os.environ['PYTHONWARNINGS'] = 'ignore::DeprecationWarning' # Suppress all categories of warnings warnings.simplefilter("ignore") warnings.filterwarnings("ignore") # Make sure these are applied globally import sys if not sys.warnoptions: warnings.simplefilter("ignore") # Specific warning suppressions warnings.filterwarnings("ignore", category=DeprecationWarning) warnings.filterwarnings("ignore", category=FutureWarning) warnings.filterwarnings("ignore", category=UserWarning) # Module-specific suppressions warnings.filterwarnings("ignore", module="gym") warnings.filterwarnings("ignore", module="gym.*") warnings.filterwarnings("ignore", module="faiss") warnings.filterwarnings("ignore", module="faiss.*") warnings.filterwarnings("ignore", module="setuptools") warnings.filterwarnings("ignore", module="setuptools.*") warnings.filterwarnings("ignore", module="typer") warnings.filterwarnings("ignore", module="typer.*") warnings.filterwarnings("ignore", module="spacy") warnings.filterwarnings("ignore", module="spacy.*") warnings.filterwarnings("ignore", module="click") warnings.filterwarnings("ignore", module="click.*") from tqdm import tqdm from base.agent import Agent from base.environment import Env from utils.logger import SimpleLogger from utils.errors import StepLimitError # Allow short aliases like `-a human` and `-e alfworld` AGENT_ALIASES = { "recode": "agents.recode.agent.ReCodeAgent", } ENV_ALIASES = { "alfworld": "envs.alfworld.env.AlfworldEnv", "webshop": "envs.webshop.env.WebShopEnv", "sciworld": "envs.sciworld.env.SciWorldEnv", } def resolve_class_identifier(identifier: str, aliases: Dict[str, str], kind: str) -> str: """Resolve a possibly-short alias (e.g., 'human') to a full dotted class path. If `identifier` already looks like a dotted path, return it unchanged. Otherwise, look up a lowercase alias in `aliases`. """ if not identifier: raise ValueError(f"Empty {kind} identifier") if "." in identifier: return identifier key = identifier.strip().lower() if key in aliases: return aliases[key] available = ", ".join(sorted(aliases.keys())) raise ValueError(f"Unknown {kind} alias '{identifier}'. Available: {available}") def _default_run_id(agent_path: str, env_path: str) -> str: """Generate default run_id = __.""" ts = datetime.now().strftime("%Y%m%d_%H%M%S") agent_cls_name = agent_path.split(".")[-1] env_cls_name = env_path.split(".")[-1] return f"{ts}_{agent_cls_name}_{env_cls_name}" def create_instance(cls: Type, running_config: Optional[Dict[str, Any]], logger: Optional[SimpleLogger]): """Instantiate a class, injecting logger and config-defined constructor kwargs.""" sig = inspect.signature(cls) kwargs: Dict[str, Any] = {} if logger is not None and "logger" in sig.parameters: kwargs["logger"] = logger for k in running_config or {}: if k in sig.parameters and k not in kwargs: kwargs[k] = running_config[k] if "task_type" in sig.parameters and running_config and "task_types" in running_config: task_types = running_config.get("task_types", []) if isinstance(task_types, list) and task_types: kwargs["task_type"] = task_types[0].upper() elif isinstance(task_types, str): kwargs["task_type"] = task_types.upper() try: return cls(**kwargs) # type: ignore[arg-type] except TypeError: return cls() def load_class(path: str) -> Type: """Import a class given a dotted path "package.module.Class" only.""" try: module_path, class_name = path.rsplit(".", 1) except ValueError: raise ValueError(f"Invalid class path '{path}'. Expected format: package.module.Class") module = importlib.import_module(module_path) cls = getattr(module, class_name, None) if not isinstance(cls, type): raise AttributeError(f"'{path}' does not resolve to a class") return cls def _safe_report(obj: Any) -> Dict[str, Any]: """Call obj.report() if available and return a dict; otherwise return {}.""" try: if hasattr(obj, "report") and callable(getattr(obj, "report")): data = getattr(obj, "report")() or {} return data if isinstance(data, dict) else {} except Exception: return {} return {} def _assemble_result( agent: Agent, env: Env, instance_id: Optional[int], duration: float, error: Optional[str] = None, ) -> Dict[str, Any]: """Assemble the unified result dict from agent/env reports plus local info.""" agent_report = _safe_report(agent) # print(f"[HERE] {agent_report}") env_report = _safe_report(env) # Ensure task_type present if env exposes it if hasattr(env, "task_type") and "task_type" not in env_report: try: env_report["task_type"] = getattr(env, "task_type") except Exception: pass local_info: Dict[str, Any] = { "instance_id": instance_id, "time": duration, } if error is not None: local_info["error"] = error return {**agent_report, **env_report, **local_info} async def run_single_instance( agent: Agent, env: Env, config: Dict[str, Any], logger: SimpleLogger, instance_id: Optional[int] = None, ) -> Dict[str, Any]: """Run one episode and collect result dict (async).""" # Determine per-instance time limit (seconds). Default to 900s if unspecified. try: max_duration_cfg = config.get("max_duration", 900) time_limit_secs = float(max_duration_cfg if max_duration_cfg is not None else 900) if time_limit_secs <= 0: time_limit_secs = 900.0 except Exception: time_limit_secs = 900.0 init_info = env.reset(config, str(instance_id) if instance_id is not None else None) observations = init_info["observations"] agent.reset(config, init_info) logger.info(f"[Instance {instance_id}] Environment reset. Starting episode.") start_time = time.time() async def episode_runner() -> Dict[str, Any]: nonlocal observations try: while not env.is_done(): actions = await agent.act(observations) observations = await env.run(actions) success = env.is_success() duration_local = time.time() - start_time final_steps_local = env._step_count logger.info( f"{env.id}-Finished: {'SUCCESS' if success else 'FAILURE'} " f"({final_steps_local} steps, {duration_local:.4f}s)" ) return _assemble_result(agent, env, instance_id, duration_local) except StepLimitError as e: duration_local = time.time() - start_time final_steps_local = env._step_count logger.warning(f"[Instance {instance_id}] {e} ({final_steps_local} steps, {duration_local:.4f}s)") return _assemble_result(agent, env, instance_id, duration_local, error=str(e)) except Exception as e: duration_local = time.time() - start_time try: final_steps_local = env.get_step_count() except Exception: final_steps_local = getattr(env, "_step_count", 0) logger.error(f"{env.id}-ERROR: {e} ({final_steps_local} steps, {duration_local:.4f}s)") return _assemble_result(agent, env, instance_id, duration_local, error=str(e)) try: result = await asyncio.wait_for(episode_runner(), timeout=time_limit_secs) return result except asyncio.TimeoutError: duration = time.time() - start_time try: final_steps = env.get_step_count() except Exception: final_steps = getattr(env, "_step_count", 0) logger.warning( f"[Instance {instance_id}] TIMEOUT after {int(time_limit_secs)}s " f"({final_steps} steps, {duration:.4f}s)" ) res = _assemble_result( agent, env, instance_id, duration, error=f"Timeout after {int(time_limit_secs)}s" ) # Explicitly mark as failure to ensure correct final statistics res["success"] = False return res async def run_concurrent_instances( agent_cls: Type[Agent], env_cls: Type[Env], num_instances: int, max_concurrent: int = 10, config: Optional[Dict[str, Any]] = None, logger: Optional[SimpleLogger] = None, ) -> List[Dict[str, Any]]: """Run many environment instances concurrently with a live progress UI. If `rich` is available, use a richer UI with per-instance spinners; otherwise fallback to tqdm-based overall bar plus lightweight per-instance lines. """ config = config or {} # Determine base start id from running_config try: base_start_id = int(config.get("start_id", 0) or 0) except (TypeError, ValueError): base_start_id = 0 sem = asyncio.Semaphore(max_concurrent) # Decide whether to use any progress UI. Allow config to forcibly disable it # (e.g., for HumanAgent which reads from stdin and conflicts with live updating UIs). disable_rich_ui = False try: # Accept multiple possible keys to disable rich UI for key in ("disable_rich_ui", "no_rich", "disable_rich"): v = config.get(key) if isinstance(v, str): v_norm = v.strip().lower() if v_norm in ("1", "true", "yes", "y", "on"): # treat truthy strings as True disable_rich_ui = True break elif v: disable_rich_ui = True break except Exception: disable_rich_ui = False # Also disable UI if HumanAgent is used to avoid interfering with stdin try: if getattr(agent_cls, "__name__", "") == "HumanAgent": disable_rich_ui = True except Exception: pass use_rich = False if not disable_rich_ui: try: from rich.progress import Progress, SpinnerColumn, TextColumn, TimeElapsedColumn, BarColumn, TaskProgressColumn from rich.console import Group from rich.live import Live from rich.text import Text use_rich = True except Exception: use_rich = False # Common runner utilities ------------------------------------------------- def make_instance_logger(effective_id: int): instance_logger = None if logger is not None: import logging instance_logger_name = f"instance_{effective_id}_{logger.run_id}" instance_logger_obj = logging.getLogger(instance_logger_name) instance_logger_obj.setLevel(logging.INFO) instance_logger_obj.handlers.clear() instance_log_file = Path(logger.get_log_dir()) / f"instance_{effective_id}.log" file_handler = logging.FileHandler(instance_log_file, mode="w", encoding="utf-8") from utils.logger import MultiLineFormatter file_handler.setFormatter(MultiLineFormatter('%(asctime)s - %(levelname)s - %(message)s')) instance_logger_obj.addHandler(file_handler) class InstanceLogger: def __init__(self, logger_obj, main_logger): self.logger = logger_obj self.main_logger = main_logger self.run_id = main_logger.run_id def info(self, message): self.logger.info(message) def warning(self, message): self.logger.warning(message) def error(self, message): self.logger.error(message) def get_log_dir(self): return self.main_logger.get_log_dir() def get_base_dir(self): return self.main_logger.get_base_dir() instance_logger = InstanceLogger(instance_logger_obj, logger) return instance_logger or logger # No-UI branch (for HumanAgent or when explicitly disabled) -------------- if disable_rich_ui: results: List[Dict[str, Any]] = [] for instance_id in range(num_instances): effective_id = base_start_id + instance_id plogger = make_instance_logger(effective_id) agent = create_instance(agent_cls, config, plogger) env = create_instance(env_cls, config, plogger) res = await run_single_instance(agent, env, config, plogger, effective_id) results.append(res) return results # Rich UI branch ---------------------------------------------------------- if use_rich: results: List[Dict[str, Any]] = [] overall_progress = Progress( TextColumn("[bold]Overall[/bold]"), BarColumn(bar_width=None), TaskProgressColumn(), TimeElapsedColumn(), refresh_per_second=8, ) instances_progress = Progress( SpinnerColumn(style="cyan"), TextColumn("[bold]{task.description}[/bold]"), TextColumn("{task.fields[status]}", style="dim"), refresh_per_second=8, ) instance_tasks: Dict[int, int] = {} finished_names: List[str] = [] async def runner(instance_id: int): async with sem: effective_id = base_start_id + instance_id plogger = make_instance_logger(effective_id) agent = create_instance(agent_cls, config, plogger) env = create_instance(env_cls, config, plogger) # Add a per-instance spinner task task_id = instances_progress.add_task(f"instance {effective_id}", status="running") instance_tasks[effective_id] = task_id try: res = await run_single_instance(agent, env, config, plogger, effective_id) overall_progress.update(overall_task, advance=1) # Remove finished task from running list and update Done line try: # Prefer hiding the task to avoid accumulating many visible lines instances_progress.update(task_id, status="done", visible=True) # Immediately hide the finished task for a clean UI instances_progress.update(task_id, visible=False) except Exception: pass try: # Best-effort removal (not strictly required if hidden) instances_progress.remove_task(task_id) except Exception: pass finished_names.append(f"instance {effective_id}") try: done_renderable = Text("✔ Done: ", style="green") if finished_names: done_renderable.append(", ".join(finished_names)) live.update(Group(overall_progress, instances_progress, done_renderable)) except Exception: pass return res finally: # Keep finished tasks displayed; just cleanup mapping instance_tasks.pop(effective_id, None) # Ensure any lingering task is hidden in case of earlier failure try: tid = instance_tasks.get(effective_id) if tid is not None: instances_progress.update(tid, visible=False) except Exception: pass with Live(Group(overall_progress, instances_progress, Text("✔ Done: ", style="green")), refresh_per_second=8, transient=False) as live: overall_task = overall_progress.add_task("Instances", total=num_instances) tasks = [asyncio.create_task(runner(i)) for i in range(num_instances)] raw_results = await asyncio.gather(*tasks, return_exceptions=True) for idx, res in enumerate(raw_results): if isinstance(res, Exception): if logger: logger.error(f"Instance {idx} raised exception: {res}") else: print(f"Instance {idx} raised exception: {res}") results.append({"instance_id": idx, "success": False, "error": str(res)}) else: results.append(res) # type: ignore[arg-type] return results # Fallback tqdm branch ---------------------------------------------------- # Main overall progress bar (position 0) progress_bar = tqdm(total=num_instances, desc="Instances", leave=True) # Allocate fixed display slots for per-instance lightweight spinners slot_queue: asyncio.Queue[int] = asyncio.Queue() for i in range(max_concurrent): slot_queue.put_nowait(i) slot_bars = [ tqdm( total=1, position=1 + i, leave=True, bar_format="{desc} {postfix}", dynamic_ncols=True, ) for i in range(max_concurrent) ] for i, bar in enumerate(slot_bars): bar.set_description_str("[instance -]") bar.set_postfix_str("") active_slots: Dict[int, Dict[str, Any]] = {} stop_spinners = asyncio.Event() async def spinner_updater(): spinner_chars = ["|", "/", "-", "\\"] idx = 0 try: while not stop_spinners.is_set(): for slot, meta in list(active_slots.items()): bar = slot_bars[slot] inst_id = meta.get("id") bar.set_description_str(f"[instance {inst_id}]") bar.set_postfix_str(f"running {spinner_chars[idx % len(spinner_chars)]}") bar.refresh() idx += 1 await asyncio.sleep(0.1) finally: for slot, meta in list(active_slots.items()): bar = slot_bars[slot] inst_id = meta.get("id") bar.set_description_str(f"[instance {inst_id}]") bar.set_postfix_str("done") bar.refresh() async def runner(instance_id: int): async with sem: effective_id = base_start_id + instance_id slot = await slot_queue.get() active_slots[slot] = {"id": effective_id} plogger = make_instance_logger(effective_id) agent = create_instance(agent_cls, config, plogger) env = create_instance(env_cls, config, plogger) try: result = await run_single_instance(agent, env, config, plogger, effective_id) progress_bar.update(1) return result finally: try: bar = slot_bars[slot] bar.set_description_str(f"[instance {effective_id}]") bar.set_postfix_str("done") bar.refresh() except Exception: pass active_slots.pop(slot, None) slot_queue.put_nowait(slot) spinner_task = asyncio.create_task(spinner_updater()) tasks = [asyncio.create_task(runner(i)) for i in range(num_instances)] raw_results = await asyncio.gather(*tasks, return_exceptions=True) stop_spinners.set() try: await spinner_task except Exception: pass progress_bar.close() for bar in slot_bars: try: bar.close() except Exception: pass results: List[Dict[str, Any]] = [] for idx, res in enumerate(raw_results): if isinstance(res, Exception): if logger: logger.error(f"Instance {idx} raised exception: {res}") else: print(f"Instance {idx} raised exception: {res}") results.append({"instance_id": idx, "success": False, "error": str(res)}) else: results.append(res) # type: ignore[arg-type] return results def write_summary(results: List[Dict[str, Any]], output_file: Path): """Write results summary to `output_file`, creating parent dirs.""" total = len(results) successes = sum(1 for r in results if r.get("success")) # Per-task-type aggregation (only if task_type present) by_task: Dict[str, Dict[str, Any]] = {} for r in results: if "task_type" not in r or r.get("task_type") is None: continue task_type = str(r.get("task_type")) bucket = by_task.setdefault(task_type, { "total_instances": 0, "successful_instances": 0, "total_time": 0.0, "total_steps": 0, "total_cost": 0.0, "total_reward": 0.0, }) bucket["total_instances"] += 1 if r.get("success"): bucket["successful_instances"] += 1 bucket["total_time"] += float(r.get("time", 0.0)) bucket["total_steps"] += int(r.get("steps", 0) or 0) bucket["total_cost"] += float(r.get("cost", 0.0)) bucket["total_reward"] += float(r.get("reward", 0.0)) # Compute averages per task for t, b in by_task.items(): ti = b["total_instances"] or 1 b["success_rate"] = b["successful_instances"] / ti b["avg_time_per_instance"] = b["total_time"] / ti b["avg_steps_per_instance"] = b["total_steps"] / ti b["avg_cost_per_instance"] = b["total_cost"] / ti # Dynamically aggregate all numeric-like metrics (totals and averages) # Exclude only 'success' to avoid double counting in metrics, # and exclude non-meaningful fields like instance_id numeric_keys = set() for r in results: for k, v in r.items(): if k in ("instance_id", "success"): continue if isinstance(v, (int, float, bool)): numeric_keys.add(k) metrics_total: Dict[str, float] = {} metrics_avg: Dict[str, float] = {} for k in sorted(numeric_keys): s = 0.0 for r in results: try: val = r.get(k, 0) s += float(val or 0) except Exception: continue metrics_total[k] = s metrics_avg[k] = (s / total) if total > 0 else 0.0 # Per-task dynamic metrics by_task_metrics: Dict[str, Dict[str, Dict[str, float]]] = {} if by_task: for task_type in by_task.keys(): totals: Dict[str, float] = {} avgs: Dict[str, float] = {} bucket_results = [r for r in results if str(r.get("task_type")) == task_type] bucket_n = len(bucket_results) or 1 for k in sorted(numeric_keys): s = 0.0 for r in bucket_results: try: val = r.get(k, 0) s += float(val or 0) except Exception: continue totals[k] = s avgs[k] = s / bucket_n by_task_metrics[task_type] = {"metrics_total": totals, "metrics_avg": avgs} summary = { "summary": { "total_instances": total, "successful_instances": successes, "success_rate": successes / total if total > 0 else 0, "metrics_total": metrics_total, "metrics_avg": metrics_avg, }, "instances": results, } if by_task: # merge base by_task stats with dynamic metrics merged_by_task: Dict[str, Any] = {} for t, base_stats in by_task.items(): merged = dict(base_stats) if t in by_task_metrics: merged.update(by_task_metrics[t]) merged_by_task[t] = merged summary["by_task_type"] = merged_by_task output_file.parent.mkdir(parents=True, exist_ok=True) import json output_file.write_text(json.dumps(summary, indent=2)) print("\n📊 Summary:") rate_pct = (successes / total * 100.0) if total > 0 else 0.0 print(f" Success: {successes}/{total} ({rate_pct:.4f}%)") print(f" Results saved to: {output_file}") # Print per-task breakdown if any if by_task: print(" By task_type:") for t, b in by_task.items(): rpct = b["success_rate"] * 100.0 print(f" - {t}: {b['successful_instances']}/{b['total_instances']} ({rpct:.4f}%)") # Print standard metrics if present standard_keys_order = ["time", "steps", "cost", "reward"] std_present = [k for k in standard_keys_order if k in metrics_total] if std_present: print(" Metrics (totals/avg):") for k in std_present: total_v = metrics_total[k] avg_v = metrics_avg[k] try: print(f" - {k}: total={total_v:.4f}, avg={avg_v:.4f}") except Exception: print(f" - {k}: total={total_v}, avg={avg_v}") # Print any additional numeric metrics not already shown (excluding 'success') excluded_keys = set(std_present) extra_keys = [k for k in metrics_total.keys() if k not in excluded_keys] if extra_keys: print(" Extra metrics (totals/avg):") for k in extra_keys: total_v = metrics_total[k] avg_v = metrics_avg[k] try: print(f" - {k}: total={total_v:.4f}, avg={avg_v:.4f}") except Exception: print(f" - {k}: total={total_v}, avg={avg_v}") def main(): parser = argparse.ArgumentParser( description="Run an agent in an environment", formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) parser.add_argument( "-a", "--agent", type=str, default="agents.recode.agent.ReCodeAgent", help="Agent class path or alias. Examples: agents.recode.agent.ReCodeAgent | aliases: human, recode, react, codeact, adaplanner", ) parser.add_argument( "-e", "--env", type=str, default="envs.alfworld.env.AlfworldEnv", help="Environment class path or alias. Examples: envs.alfworld.env.AlfworldEnv | aliases: alfworld, webshop, sciworld, travelplanner", ) parser.add_argument( "-n", "--instances", type=int, default=1, help="Number of instances to run", ) parser.add_argument( "-c", "--concurrent", type=int, default=1, help="Maximum concurrent instances", ) parser.add_argument( "-o", "--output", type=str, default="results.json", help="Results JSON filename (will be saved in logs//)", ) parser.add_argument( "-C", "--config", type=str, default=None, help="YAML config file path. Values here override CLI flags.", ) parser.add_argument( "--split", type=str, default="test", help="Dataset split to use (e.g., train/valid/test)", ) parser.add_argument( "--seed", type=int, default=42, help="Random seed forwarded to environments", ) parser.add_argument( "-p", "--profile", type=str, default=None, help="LLM profile name forwarded to the agent", ) parser.add_argument( "-l", "--log-dir", type=str, default=None, help="Custom log directory name (otherwise autogenerated)", ) parser.add_argument( "--max-depth", type=int, default=None, help="Maximum depth for agent execution", ) args = parser.parse_args() try: # Load YAML config (overrides CLI) import yaml yaml_cfg = {} if args.config: try: with open(args.config) as f: yaml_cfg = yaml.safe_load(f) or {} except FileNotFoundError: print(f"⚠️ Config file not found: {args.config}. Using CLI values only.") yaml_cfg = {} # Compose final config: CLI base, YAML overrides cli_cfg: Dict[str, Any] = { "agent": args.agent, "env": args.env, "instances": args.instances, "concurrent": args.concurrent, "output": args.output, "log_dir": args.log_dir, "split": args.split, "seed": args.seed, "profile": args.profile, "max_depth": args.max_depth, } config: Dict[str, Any] = {**cli_cfg, **yaml_cfg} agent_path: str = config.get("agent", args.agent) env_path: str = config.get("env", args.env) instances: int = int(config.get("instances", args.instances) or 1) concurrent: int = int(config.get("concurrent", args.concurrent) or 1) output_name: str = str(config.get("output", args.output)) # Resolve short aliases if provided agent_path = resolve_class_identifier(agent_path, AGENT_ALIASES, "agent") env_path = resolve_class_identifier(env_path, ENV_ALIASES, "env") agent_cls = load_class(agent_path) env_cls = load_class(env_path) # Use class names for default run_id for readability run_id = config.get("log_dir") or _default_run_id(agent_cls.__name__, env_cls.__name__) # Clear existing log directory if present existing_base_dir = Path("logs") / run_id if existing_base_dir.exists(): try: shutil.rmtree(existing_base_dir) except Exception as e: print(f"⚠️ Failed to clear existing log directory: {existing_base_dir} ({e})") logger = SimpleLogger(run_id=run_id) # Special handling for HumanAgent: disable Rich UI and force concurrency to 1 is_human_agent = (getattr(agent_cls, "__name__", "") == "HumanAgent") or agent_path.endswith(".HumanAgent") if is_human_agent: if concurrent != 1: logger.info(f"Human agent detected. Forcing max concurrent to 1 (was {concurrent}).") concurrent = 1 config["concurrent"] = 1 config["disable_rich_ui"] = True logger.info(f"🤖 Agent: {agent_path}") logger.info(f"🌍 Environment: {env_path}") logger.info(f"📊 Instances: {instances} (max {concurrent} concurrent)") logger.info("-" * 50) results = asyncio.run( run_concurrent_instances(agent_cls, env_cls, instances, concurrent, config, logger) ) output_file = logger.get_base_dir() / output_name write_summary(results, output_file) except KeyboardInterrupt: print("\n⏹️ Interrupted by user") except Exception as e: # import traceback # traceback.print_exc() print(f"\n❌ Error: {e}") return 1 return 0 if __name__ == "__main__": exit(main()) ================================================ FILE: utils/common.py ================================================ import json import yaml from pathlib import Path from typing import Any, List, Dict, Optional from pydantic_core import to_jsonable_python import re def read_json_file(json_file: str, encoding="utf-8") -> List[Any]: if not Path(json_file).exists(): raise FileNotFoundError(f"json_file: {json_file} not exist, return []") with open(json_file, "r", encoding=encoding) as fin: try: data = json.load(fin) except Exception: raise ValueError(f"read json file: {json_file} failed") return data def write_json_file(json_file: str, data: list, encoding: str = None, indent: int = 4): folder_path = Path(json_file).parent if not folder_path.exists(): folder_path.mkdir(parents=True, exist_ok=True) with open(json_file, "w", encoding=encoding) as fout: json.dump(data, fout, ensure_ascii=False, indent=indent, default=to_jsonable_python) def read_yaml_file(yaml_file: str, encoding='utf-8') -> Dict[str, Any]: if not Path(yaml_file).exists(): raise FileNotFoundError(f"yaml_file: {yaml_file} not exist, return empty dict") with open(yaml_file, "r", encoding=encoding) as f: try: data = yaml.safe_load(f) except Exception: raise ValueError(f"read yaml file: {yaml_file} failed") return data def parse_code_block(text: str, lang: str = "python") -> Optional[str]: """Extracts the first code block of a given language from a markdown-formatted text.""" pattern = rf"```{lang}\s*\n(.*?)\n```" match = re.search(pattern, text, re.DOTALL) if match: return match.group(1).strip() return None def parse_xml_tag(response: str, xml_tag: str) -> str: pattern = rf"<{xml_tag}>(.*?)" match = re.search(pattern, response, re.DOTALL) return match.group(1).strip() if match else "" ================================================ FILE: utils/errors.py ================================================ class StepLimitError(Exception): """Raised when the environment exceeds the maximum allowed step count.""" pass ================================================ FILE: utils/executor.py ================================================ from typing import List, Dict, Any, Callable import io, sys import functools import asyncio import types import re import threading import time from utils.llm import AsyncLLM from base.environment import Env def print_output(func): @functools.wraps(func) def wrapper(*args, **kwargs): result = func(*args, **kwargs) if result is not None: print(result, file=sys.stdout, flush=True) return result return wrapper class Executor: def __init__(self, env: Env = None, if_run_print: bool = False) -> None: self.env = env self.actions: List[str] = [] self._variables: Dict[str, Any] = {} self.if_run_print = if_run_print if self.if_run_print: self.run = print_output(self.run) self._base_globals = { "run": self.run, "re": re, } self._loop = None self._loop_thread = None self._start_loop_thread() def register_function(self, name: str, func: Callable): self._base_globals[name] = func def register_action_function(self, name: str, func: Callable): func_with_run = lambda *args, **kwargs: self.run(func(*args, **kwargs)) self.register_function(name, func_with_run) def register_ask_llm(self, llm: AsyncLLM): def _ask_llm_sync(query: str) -> str: async def _ask_llm(query: str) -> str: response, _cost = await llm( prompt=query, ) return response return self._submit_coro(_ask_llm(query)) self.register_function("ask_llm", _ask_llm_sync) def skip(self, reason: str): return None def set_var(self, key: str, value: Any): self._variables[key] = value def get_var(self, key: str) -> Any: if key not in self._variables: return None return self._variables.get(key) def set_env(self, env: Env): self.env = env def _is_preserved_variable(self, key: str, value: Any) -> bool: if key.startswith('_') or key in self._base_globals: return False return not isinstance(value, (types.ModuleType, types.FunctionType, types.BuiltinFunctionType, types.MethodType, type)) def _infer_type_string(self, value: Any, depth: int = 0, max_depth: int = 2) -> str: if value is None: return "NoneType" if depth > max_depth: return type(value).__name__ try: if isinstance(value, (bool, int, float, str)): return type(value).__name__ if isinstance(value, list): if not value: return "list" elem_types = {self._infer_type_string(v, depth + 1, max_depth) for v in value[:5]} if len(elem_types) == 1: return f"list[{next(iter(elem_types))}]" return "list" if isinstance(value, tuple): if not value: return "tuple" elem_types = [self._infer_type_string(v, depth + 1, max_depth) for v in value[:5]] if all(t == elem_types[0] for t in elem_types): return f"tuple[{elem_types[0]}]" return f"tuple[{', '.join(elem_types)}]" if isinstance(value, set): if not value: return "set" sample = list(value)[:5] elem_types = {self._infer_type_string(v, depth + 1, max_depth) for v in sample} if len(elem_types) == 1: return f"set[{next(iter(elem_types))}]" return "set" if isinstance(value, dict): if not value: return "dict" items = list(value.items())[:5] key_types = {self._infer_type_string(k, depth + 1, max_depth) for k, _ in items} val_types = {self._infer_type_string(v, depth + 1, max_depth) for _, v in items} if len(key_types) == 1 and len(val_types) == 1: return f"dict[{next(iter(key_types))}, {next(iter(val_types))}]" return "dict" return type(value).__name__ except Exception: return type(value).__name__ def run(self, action: str) -> str: if self.env is None: raise RuntimeError("Environment not set. Call set_env() first.") result = self._submit_coro(self.env.run(action)) self.actions.append(action) if isinstance(result, list): result = "\n".join(result) return result def get_actions(self) -> List[str]: actions = self.actions.copy() self.actions.clear() return actions def get_variables(self) -> str: return "\n".join([f"- {key} ({self._infer_type_string(value)}): {value}" for key, value in self._variables.items()]) def reset(self): self.actions.clear() self._variables.clear() def _start_loop_thread(self): if self._loop and self._loop.is_running(): return def _loop_runner(): loop = asyncio.new_event_loop() self._loop = loop asyncio.set_event_loop(loop) loop.run_forever() t = threading.Thread(target=_loop_runner, daemon=True) t.start() while self._loop is None or not self._loop.is_running(): time.sleep(0.01) self._loop_thread = t def _submit_coro(self, coro): self._start_loop_thread() future = asyncio.run_coroutine_threadsafe(coro, self._loop) return future.result() def close(self): if self._loop and self._loop.is_running(): try: self._loop.call_soon_threadsafe(self._loop.stop) except Exception: pass if self._loop_thread: self._loop_thread.join(timeout=1) self._loop = None self._loop_thread = None def execute(self, code: str) -> Dict[str, Any]: success, stdout_lines, error_msg = self._run_block(code) return {"code": code, "stdout": stdout_lines, "error": error_msg, "success": success} def _run_block(self, block: str) -> tuple[bool, List[str], str]: output = [] class OutputCapture: def __init__(self): self.lines = [] def write(self, text): if text and text != '\n': self.lines.extend(line for line in text.splitlines() if line.strip()) def flush(self): pass capture = OutputCapture() old_stdout = sys.stdout sys.stdout = capture exec_globals = {**self._base_globals, **self._variables} try: exec(block, exec_globals) for key, value in exec_globals.items(): if self._is_preserved_variable(key, value): self._variables[key] = value return True, capture.lines, "" except NameError as e: match = re.search(r"name '(.+?)' is not defined", str(e)) if match and f"{match.group(1)}(" in block: return False, capture.lines, f"NeedExpansion: `{match.group(1)}` needs to be expanded." return False, capture.lines, f"NameError: {e}" except Exception as e: return False, capture.lines, f"{e.__class__.__name__}: {e}" finally: sys.stdout = old_stdout ================================================ FILE: utils/llm.py ================================================ import os import asyncio import yaml import random from pathlib import Path from typing import Optional, Dict, Any, Tuple, Union, List from openai import AsyncOpenAI, APIError, APIConnectionError, APITimeoutError, RateLimitError from pydantic import BaseModel, Field, model_validator, ConfigDict from utils.common import read_json_file DEFAULT_LLM_PROFILE_PATH = Path("configs/profiles.yaml") DEFAULT_PRICE_PATH = Path("configs/prices.json") class LLMConfig(BaseModel): model_config = ConfigDict(extra="forbid", validate_default=True) api_key: Optional[str] = Field( default=None, description="OpenAI API key (defaults to OPENAI_API_KEY environment variable)" ) base_url: Optional[str] = Field( default=None, description="Custom API base URL for OpenAI-compatible endpoints" ) model: str = Field(default="gpt-4o-mini", description="Model name to use") temperature: Optional[float] = Field( default=None, ge=0.0, le=2.0, description="Sampling temperature (omit by default; excluded for o-series)" ) max_tokens: Optional[int] = Field( default=None, gt=0, description="Maximum number of tokens to generate (omit by default)" ) timeout: int = Field(default=60, gt=0, description="API request timeout in seconds") max_retries: int = Field(default=3, ge=0, description="Maximum retry attempts") retry_base_delay: float = Field(default=1.0, description="Base retry delay in seconds") retry_jitter: float = Field(default=0.1, description="Retry delay jitter factor") track_costs: bool = Field(default=True, description="Enable cost tracking") @classmethod def from_profile(cls, profile: str = "default", config_path: Path = DEFAULT_LLM_PROFILE_PATH) -> "LLMConfig": try: with config_path.open("r", encoding="utf-8") as f: config = yaml.safe_load(f) profile_config = config.get("models", {}).get(profile, {}) return cls(**{ k: v for k, v in profile_config.items() if v is not None and k in cls.model_fields }) except Exception as e: return cls() @model_validator(mode="after") def resolve_api_key(self) -> "LLMConfig": if not self.api_key: self.api_key = os.environ.get("OPENAI_API_KEY") return self class CostCalculator(BaseModel): pricing: Dict[str, Dict[str, float]] = Field( default_factory=lambda: read_json_file(DEFAULT_PRICE_PATH), description="Pricing data in USD per million tokens" ) def compute_cost( self, model: str, prompt_tokens: int, completion_tokens: int ) -> Tuple[float, Dict[str, Any]]: rates = self.pricing.get(model, self.pricing["default"]) input_cost = (prompt_tokens / 1e6) * rates["input"] output_cost = (completion_tokens / 1e6) * rates["output"] total_cost = input_cost + output_cost cost_breakdown = { "model": model, "prompt_tokens": prompt_tokens, "completion_tokens": completion_tokens, "total_tokens": prompt_tokens + completion_tokens, "input_cost": input_cost, "output_cost": output_cost, "total_cost": total_cost, "currency": "USD" } return total_cost, cost_breakdown class AsyncLLM(BaseModel): config: LLMConfig = Field(default_factory=LLMConfig) cost_calculator: CostCalculator = Field(default_factory=CostCalculator) client: Optional[AsyncOpenAI] = Field(default=None, exclude=True) spent: float = Field(default=0.0, description="Total accumulated cost for this instance") model_config = ConfigDict(arbitrary_types_allowed=True) def __init__(self, profile_or_config: Union[str, Dict[str, Any]] = "default", **kwargs): if isinstance(profile_or_config, str): config = self._load_profile_config(profile_or_config) config.update({k: v for k, v in kwargs.items() if k in LLMConfig.model_fields}) super().__init__(config=LLMConfig(**config)) else: config_kwargs = profile_or_config if isinstance(profile_or_config, dict) else {} config_kwargs.update({k: v for k, v in kwargs.items() if k in LLMConfig.model_fields}) super().__init__(config=LLMConfig(**config_kwargs)) self._initialize_client() def _load_profile_config(self, profile: str) -> Dict[str, Any]: try: config_path = DEFAULT_LLM_PROFILE_PATH with open(config_path, 'r', encoding='utf-8') as f: config = yaml.safe_load(f) if profile in config.get("models", {}): return config["models"][profile] elif profile in config.get("llm_pool", {}): pool_config = config["llm_pool"][profile] return {k: v for k, v in pool_config.items() if k in LLMConfig.model_fields} else: return {} except Exception as e: return {} def _initialize_client(self) -> None: if not self.config.api_key: self.config.api_key = os.environ.get("OPENAI_API_KEY") if not self.config.api_key: raise ValueError("Missing required API key. Set OPENAI_API_KEY environment variable.") client_args = { "api_key": self.config.api_key, "timeout": self.config.timeout } if self.config.base_url: client_args["base_url"] = self.config.base_url self.client = AsyncOpenAI(**client_args) async def __call__( self, prompt: str, system_prompt: Optional[str] = None, **generation_args ) -> Tuple[str, float]: messages = self._build_messages(prompt, system_prompt) params = self._prepare_params(messages, generation_args) response = await self._retry_api_call(params) content = response.choices[0].message.content cost = 0.0 if self.config.track_costs and (usage := getattr(response, "usage", None)): cost, _ = self.cost_calculator.compute_cost( response.model, usage.prompt_tokens, usage.completion_tokens ) self.spent += cost return content, cost def _build_messages(self, prompt: str, system_prompt: Optional[str]) -> List[Dict[str, str]]: messages = [] if system_prompt: messages.append({"role": "system", "content": system_prompt}) messages.append({"role": "user", "content": prompt}) return messages def _prepare_params( self, messages: list[Dict[str, str]], generation_args: Dict[str, Any] ) -> Dict[str, Any]: params: Dict[str, Any] = { "model": self.config.model, "messages": messages, } model_name = (self.config.model or "").lower() is_o_series = model_name.startswith("o") if (self.config.temperature is not None) and (not is_o_series): params["temperature"] = self.config.temperature if self.config.max_tokens is not None: params["max_tokens"] = self.config.max_tokens safe_generation_args = dict(generation_args) if generation_args else {} if is_o_series: safe_generation_args.pop("temperature", None) params.update(safe_generation_args) return params async def _retry_api_call(self, params: Dict[str, Any]) -> Any: for attempt in range(self.config.max_retries + 1): try: return await self.client.chat.completions.create(**params) except (APIError, APIConnectionError, APITimeoutError, RateLimitError) as e: if attempt == self.config.max_retries: raise backoff_time = self._calculate_backoff( attempt, self.config.retry_base_delay, self.config.timeout ) await asyncio.sleep(backoff_time) def _calculate_backoff(self, attempt: int, base: float, max_wait: float) -> float: delay = base * (2 ** attempt) jitter = delay * self.config.retry_jitter * random.uniform(-1, 1) return min(delay + jitter, max_wait) def create_llm_instance(model_name: str) -> AsyncLLM: return AsyncLLM(profile_or_config=model_name) async def main(): try: llm = AsyncLLM("default") prompt = "Hello, what is the capital of France?" response, cost = await llm(prompt) print("Response:", response) print("Cost:", cost) except Exception as e: print(f"An error occurred: {e}") if __name__ == "__main__": asyncio.run(main()) ================================================ FILE: utils/logger.py ================================================ import os import logging from datetime import datetime from pathlib import Path class MultiLineFormatter(logging.Formatter): def format(self, record): msg = super().format(record) lines = msg.split('\n') if len(lines) <= 1: return msg return '\n'.join(lines) class SimpleLogger: def __init__(self, run_id=None, log_level=logging.INFO): if run_id is None: run_id = datetime.now().strftime("%Y%m%d_%H%M%S") self.run_id = run_id self.base_dir = Path("logs") / run_id self.log_dir = self.base_dir / "running_logs" self.log_dir.mkdir(parents=True, exist_ok=True) sanitized_run_id = run_id.replace("/", "_").replace("\\", "_") self.logger = logging.getLogger(f"alfworld_run_{sanitized_run_id}") self.logger.setLevel(log_level) self.logger.handlers.clear() log_file = self.log_dir / "run.log" file_handler = logging.FileHandler(log_file, mode='w', encoding='utf-8') file_handler.setLevel(log_level) console_handler = logging.StreamHandler() console_handler.setLevel(log_level) formatter = MultiLineFormatter('%(asctime)s - %(levelname)s - %(message)s') file_handler.setFormatter(formatter) console_handler.setFormatter(formatter) self.logger.addHandler(file_handler) self.logger.addHandler(console_handler) self.info(f"Starting new run with ID: {run_id}") self.info(f"Logs will be saved to: {self.log_dir.absolute()}") def info(self, message): self.logger.info(message) def error(self, message): self.logger.error(message) def warning(self, message): self.logger.warning(message) def debug(self, message): self.logger.debug(message) def log_result(self, result): task_id = result.get("task_id", "unknown") success = result.get("both_success", result.get("is_success", False)) exec_time = result.get("execution_time", result.get("time", 0)) game_name = result.get("game_name", "") status = "SUCCESS" if success else "FAILED" self.info(f"[{status}] {task_id} - {game_name} - {exec_time:.2f}s") if "error" in result: self.error(f"Error in {task_id}: {result['error']}") def log_stats(self, stats): self.info("=" * 50) self.info("RUN STATISTICS") self.info("=" * 50) self.info(f"Total tests: {stats['total_tests']}") self.info(f"Successful: {stats['successful_tests']}") self.info(f"Success rate: {stats['success_rate']:.1%}") self.info(f"Average execution time: {stats['average_execution_time']:.2f}s") if stats.get('task_types'): self.info("\nSuccess rate by task type:") for task_type, type_stats in stats['task_types'].items(): rate = type_stats['rate'] total = type_stats['total'] success = type_stats['success'] self.info(f" {task_type}: {success}/{total} ({rate:.1%})") def get_log_dir(self): return self.log_dir def get_base_dir(self): return self.base_dir ================================================ FILE: utils/mockllm.py ================================================ import asyncio class MockLLM: def __init__(self, name="MockLLM"): self.name = name async def __call__(self, prompt): print(f"\n--- {self.name} Prompt ---") print(prompt) print(f"\n--- Please provide your response (enter an empty line to finish) ---") lines = [] while True: line = input() if line.strip() == "": break lines.append(line) return "\n".join(lines) async def test_mock_llm(): mock_llm = MockLLM(name="TestLLM") prompts = [ "What is the capital of France?", "Write a short poem about artificial intelligence." ] for i, prompt in enumerate(prompts, 1): print(f"\nTest {i}:") response = await mock_llm(prompt) print("\nYour response was:") print("-" * 40) print(response) print("-" * 40) print("\nTest completed successfully!") if __name__ == "__main__": asyncio.run(test_mock_llm())