Repository: lebrice/Sequoia Branch: master Commit: 7e12ff8ed67f Files: 460 Total size: 2.6 MB Directory structure: gitextract_c6gc35b2/ ├── .dockerignore ├── .gitattributes ├── .gitignore ├── .gitmodules ├── .travis.yml ├── LICENSE ├── MANIFEST.in ├── README.md ├── dockers/ │ ├── .gitignore │ ├── base/ │ │ ├── Dockerfile │ │ └── build.sh │ └── branch/ │ ├── Dockerfile │ └── build.sh ├── docs/ │ └── diagrams/ │ └── src/ │ ├── gym.puml │ ├── pytorch_lightning.puml │ └── seq_diagram.puml ├── examples/ │ ├── README.md │ ├── __init__.py │ ├── advanced/ │ │ ├── RL_and_SL_demo.py │ │ ├── continual_rl_demo.py │ │ ├── ewc_in_rl.py │ │ ├── hat_demo.py │ │ ├── hparam_tuning.py │ │ ├── pnn/ │ │ │ ├── __init__.py │ │ │ ├── layers.py │ │ │ ├── model_rl.py │ │ │ ├── model_sl.py │ │ │ └── pnn_method.py │ │ └── procgen_example.py │ ├── basic/ │ │ ├── __init__.py │ │ ├── base_method_demo.py │ │ ├── pl_example.py │ │ ├── pl_example_packnet.py │ │ ├── pl_example_test.py │ │ ├── quick_demo.ipynb │ │ ├── quick_demo.py │ │ ├── quick_demo_ewc.py │ │ ├── quick_demo_packnet.py │ │ └── quick_demo_test.py │ ├── clcomp21/ │ │ ├── README.md │ │ ├── __init__.py │ │ ├── a2c_example.py │ │ ├── a2c_example_test.py │ │ ├── classifier.py │ │ ├── classifier_test.py │ │ ├── conftest.py │ │ ├── dummy_method.py │ │ ├── dummy_method_test.py │ │ ├── multihead_classifier.py │ │ ├── multihead_classifier_test.py │ │ ├── regularization_example.py │ │ ├── regularization_example_test.py │ │ ├── sb3_example.py │ │ └── sb3_example_test.py │ ├── demo_utils.py │ └── prerequisites/ │ └── dataclasses_example.py ├── mypy.ini ├── pytest.ini ├── requirements.txt ├── scripts/ │ ├── eai/ │ │ ├── cancel_all_queuing.sh │ │ ├── cancel_all_running.sh │ │ ├── job.sh │ │ ├── rl_sweep.sh │ │ ├── shell_job.sh │ │ └── sl_sweep.sh │ └── slurm/ │ ├── launch_many_sweeps.sh │ ├── run.sh │ └── sweep.sh ├── sequoia/ │ ├── README.md │ ├── __init__.py │ ├── _version.py │ ├── client/ │ │ ├── README.md │ │ ├── __init__.py │ │ ├── __main__.py │ │ ├── env.proto │ │ ├── env_proxy.py │ │ ├── env_proxy_test.py │ │ ├── server.py │ │ ├── setting_proxy.py │ │ └── setting_proxy_test.py │ ├── common/ │ │ ├── __init__.py │ │ ├── batch.py │ │ ├── batch_test.py │ │ ├── callbacks/ │ │ │ ├── __init__.py │ │ │ ├── knn_callback.py │ │ │ └── vae_callback.py │ │ ├── config/ │ │ │ ├── __init__.py │ │ │ ├── config.py │ │ │ └── wandb_config.py │ │ ├── gym_wrappers/ │ │ │ ├── __init__.py │ │ │ ├── action_limit.py │ │ │ ├── action_limit_test.py │ │ │ ├── add_done.py │ │ │ ├── add_info.py │ │ │ ├── convert_tensors.py │ │ │ ├── convert_tensors_test.py │ │ │ ├── env_dataset.py │ │ │ ├── env_dataset_test.py │ │ │ ├── episode_limit.py │ │ │ ├── episode_limit_test.py │ │ │ ├── measure_performance.py │ │ │ ├── multi_task_environment.py │ │ │ ├── multi_task_environment_test.py │ │ │ ├── observation_limit.py │ │ │ ├── observation_limit_test.py │ │ │ ├── pixel_observation.py │ │ │ ├── pixel_observation_test.py │ │ │ ├── policy_env.py │ │ │ ├── policy_env_test.py │ │ │ ├── smooth_environment.py │ │ │ ├── smooth_environment_test.py │ │ │ ├── step_callback_wrapper.py │ │ │ ├── step_callback_wrapper_test.py │ │ │ ├── transform_wrappers.py │ │ │ ├── transform_wrappers_test.py │ │ │ ├── utils.py │ │ │ └── utils_test.py │ │ ├── hparams/ │ │ │ └── __init__.py │ │ ├── layers.py │ │ ├── loss.py │ │ ├── loss_test.py │ │ ├── metrics/ │ │ │ ├── __init__.py │ │ │ ├── classification.py │ │ │ ├── classification_test.py │ │ │ ├── get_metrics.py │ │ │ ├── metrics.py │ │ │ ├── metrics_utils.py │ │ │ ├── metrics_utils_test.py │ │ │ ├── regression.py │ │ │ └── rl_metrics.py │ │ ├── replay.py │ │ ├── spaces/ │ │ │ ├── __init__.py │ │ │ ├── image.py │ │ │ ├── named_tuple.py │ │ │ ├── named_tuple_test.py │ │ │ ├── space.py │ │ │ ├── sparse.py │ │ │ ├── sparse_test.py │ │ │ ├── tensor_spaces.py │ │ │ ├── tensor_spaces_test.py │ │ │ ├── typed_dict.py │ │ │ └── typed_dict_test.py │ │ ├── task.py │ │ └── transforms/ │ │ ├── __init__.py │ │ ├── channels.py │ │ ├── compose.py │ │ ├── resize.py │ │ ├── split_batch.py │ │ ├── to_tensor.py │ │ ├── transform.py │ │ ├── transform_enum.py │ │ ├── transforms_test.py │ │ └── utils.py │ ├── common.puml │ ├── conftest.py │ ├── experiments/ │ │ ├── __init__.py │ │ ├── experiment.py │ │ ├── experiment_test.py │ │ ├── hpo_sweep.py │ │ └── hpo_sweep_test.py │ ├── main.py │ ├── methods/ │ │ ├── README.md │ │ ├── __init__.py │ │ ├── aux_tasks/ │ │ │ ├── __init__.py │ │ │ ├── auxiliary_task.py │ │ │ ├── ewc.py │ │ │ ├── reconstruction/ │ │ │ │ ├── __init__.py │ │ │ │ ├── ae.py │ │ │ │ ├── decoder_for_dataset.py │ │ │ │ ├── decoders.py │ │ │ │ └── vae.py │ │ │ └── transformation_based/ │ │ │ ├── __init__.py │ │ │ ├── bases.py │ │ │ └── rotation.py │ │ ├── avalanche_methods/ │ │ │ ├── __init__.py │ │ │ ├── agem.py │ │ │ ├── agem_test.py │ │ │ ├── ar1.py │ │ │ ├── ar1_test.py │ │ │ ├── base.py │ │ │ ├── base_test.py │ │ │ ├── conftest.py │ │ │ ├── cwr_star.py │ │ │ ├── cwr_star_test.py │ │ │ ├── ewc.py │ │ │ ├── ewc_test.py │ │ │ ├── experience.py │ │ │ ├── gdumb.py │ │ │ ├── gdumb_test.py │ │ │ ├── gem.py │ │ │ ├── gem_test.py │ │ │ ├── lwf.py │ │ │ ├── lwf_test.py │ │ │ ├── naive.py │ │ │ ├── naive_test.py │ │ │ ├── patched_models.py │ │ │ ├── plugins.py │ │ │ ├── replay.py │ │ │ ├── replay_test.py │ │ │ ├── synaptic_intelligence.py │ │ │ └── synaptic_intelligence_test.py │ │ ├── base_method.py │ │ ├── base_method_test.py │ │ ├── conftest.py │ │ ├── d3rlpy_methods/ │ │ │ ├── __init__.py │ │ │ ├── base.py │ │ │ └── base_test.py │ │ ├── ewc_method.py │ │ ├── ewc_method_test.py │ │ ├── experience_replay.py │ │ ├── experience_replay_test.py │ │ ├── hat.py │ │ ├── method_test.py │ │ ├── models/ │ │ │ ├── __init__.py │ │ │ ├── base_model/ │ │ │ │ ├── __init__.py │ │ │ │ ├── base_model.py │ │ │ │ ├── model.py │ │ │ │ ├── multihead_model.py │ │ │ │ ├── multihead_model_test.py │ │ │ │ ├── self_supervised_model.py │ │ │ │ ├── self_supervised_model_test.py │ │ │ │ └── semi_supervised_model.py │ │ │ ├── baseline_model.puml │ │ │ ├── fcnet.py │ │ │ ├── forward_pass.py │ │ │ ├── output_heads/ │ │ │ │ ├── __init__.py │ │ │ │ ├── classification_head.py │ │ │ │ ├── output_head.py │ │ │ │ ├── regression_head.py │ │ │ │ └── rl/ │ │ │ │ ├── __init__.py │ │ │ │ ├── actor_critic_head.py │ │ │ │ ├── episodic_a2c.py │ │ │ │ ├── episodic_a2c_test.py │ │ │ │ ├── policy_head.py │ │ │ │ ├── policy_head_test.py │ │ │ │ └── wasted_steps_calc.py │ │ │ ├── output_heads.puml │ │ │ └── simple_convnet.py │ │ ├── models.puml │ │ ├── packnet_method.py │ │ ├── packnet_method_test.py │ │ ├── pl_bolts_methods/ │ │ │ └── __init__.py │ │ ├── pl_dqn.py │ │ ├── pnn/ │ │ │ ├── __init__.py │ │ │ ├── layers.py │ │ │ ├── model_rl.py │ │ │ ├── model_sl.py │ │ │ └── pnn_method.py │ │ ├── random_baseline.py │ │ ├── random_baseline_test.py │ │ ├── stable_baselines3_methods/ │ │ │ ├── __init__.py │ │ │ ├── a2c.py │ │ │ ├── a2c_test.py │ │ │ ├── base.py │ │ │ ├── base_test.py │ │ │ ├── ddpg.py │ │ │ ├── ddpg_test.py │ │ │ ├── dqn.py │ │ │ ├── dqn_test.py │ │ │ ├── off_policy_method.py │ │ │ ├── off_policy_method_test.py │ │ │ ├── on_policy_method.py │ │ │ ├── policy_wrapper.py │ │ │ ├── ppo.py │ │ │ ├── ppo_test.py │ │ │ ├── sac.py │ │ │ ├── sac_test.py │ │ │ ├── td3.py │ │ │ └── td3_test.py │ │ └── trainer.py │ ├── methods.puml │ ├── sequoia.puml │ ├── settings/ │ │ ├── README.md │ │ ├── __init__.py │ │ ├── assumptions/ │ │ │ ├── __init__.py │ │ │ ├── assumptions.puml │ │ │ ├── base.py │ │ │ ├── classification.py │ │ │ ├── context_discreteness.py │ │ │ ├── context_visibility.py │ │ │ ├── continual.py │ │ │ ├── discrete_results.py │ │ │ ├── iid.py │ │ │ ├── iid_results.py │ │ │ ├── incremental.py │ │ │ ├── incremental_results.py │ │ │ ├── incremental_test.py │ │ │ ├── task_incremental.py │ │ │ └── task_type.py │ │ ├── base/ │ │ │ ├── __init__.py │ │ │ ├── base.puml │ │ │ ├── bases.py │ │ │ ├── environment.py │ │ │ ├── objects.py │ │ │ ├── results.py │ │ │ ├── setting.py │ │ │ ├── setting_meta.py │ │ │ └── setting_test.py │ │ ├── offline_rl/ │ │ │ └── setting.py │ │ ├── presets/ │ │ │ ├── __init__.py │ │ │ ├── cartpole_pixels.yaml │ │ │ ├── cartpole_state.yaml │ │ │ ├── cifar10.yaml │ │ │ ├── cifar100.yaml │ │ │ ├── classic_control/ │ │ │ │ ├── cartpole.yaml │ │ │ │ └── mountaincar_continuous.yaml │ │ │ ├── fashion_mnist.yaml │ │ │ ├── mnist.yaml │ │ │ ├── monsterkong/ │ │ │ │ ├── monsterkong_3each.yaml │ │ │ │ ├── monsterkong_4each.yaml │ │ │ │ ├── monsterkong_5each.yaml │ │ │ │ ├── monsterkong_all.yaml │ │ │ │ ├── monsterkong_jumps.yaml │ │ │ │ ├── monsterkong_jumps_and_ladders.yaml │ │ │ │ ├── monsterkong_ladders.yaml │ │ │ │ └── monsterkong_mix.yaml │ │ │ ├── mujoco/ │ │ │ │ └── half_cheetah.yaml │ │ │ ├── rl_track.yaml │ │ │ └── sl_track.yaml │ │ ├── rl/ │ │ │ ├── __init__.py │ │ │ ├── continual/ │ │ │ │ ├── __init__.py │ │ │ │ ├── environment.py │ │ │ │ ├── environment_test.py │ │ │ │ ├── make_env.py │ │ │ │ ├── make_env_test.py │ │ │ │ ├── objects.py │ │ │ │ ├── results.py │ │ │ │ ├── setting.py │ │ │ │ ├── setting_test.py │ │ │ │ ├── tasks.py │ │ │ │ ├── tasks_test.py │ │ │ │ └── test_environment.py │ │ │ ├── discrete/ │ │ │ │ ├── __init__.py │ │ │ │ ├── multienv_wrappers.py │ │ │ │ ├── multienv_wrappers_test.py │ │ │ │ ├── results.py │ │ │ │ ├── setting.py │ │ │ │ ├── setting_test.py │ │ │ │ ├── tasks.py │ │ │ │ ├── tasks_test.py │ │ │ │ └── test_environment.py │ │ │ ├── environment.py │ │ │ ├── environment_test.py │ │ │ ├── envs/ │ │ │ │ ├── __init__.py │ │ │ │ ├── classic_control.py │ │ │ │ ├── monsterkong.py │ │ │ │ ├── mujoco/ │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── half_cheetah.py │ │ │ │ │ ├── half_cheetah_test.py │ │ │ │ │ ├── hopper.py │ │ │ │ │ ├── hopper_test.py │ │ │ │ │ ├── modified_friction.py │ │ │ │ │ ├── modified_friction_test.py │ │ │ │ │ ├── modified_gravity.py │ │ │ │ │ ├── modified_gravity_test.py │ │ │ │ │ ├── modified_mass.py │ │ │ │ │ ├── modified_mass_test.py │ │ │ │ │ ├── modified_size.py │ │ │ │ │ ├── modified_size_test.py │ │ │ │ │ ├── modified_wall.py │ │ │ │ │ ├── mujoco_model_utils.py │ │ │ │ │ ├── walker2d.py │ │ │ │ │ └── walker2d_test.py │ │ │ │ └── variant_spec.py │ │ │ ├── incremental/ │ │ │ │ ├── __init__.py │ │ │ │ ├── objects.py │ │ │ │ ├── results.py │ │ │ │ ├── setting.py │ │ │ │ ├── setting_test.py │ │ │ │ └── tasks.py │ │ │ ├── multi_task/ │ │ │ │ ├── __init__.py │ │ │ │ ├── setting.py │ │ │ │ └── setting_test.py │ │ │ ├── objects.py │ │ │ ├── setting.py │ │ │ ├── setting_test.py │ │ │ ├── task_incremental/ │ │ │ │ ├── __init__.py │ │ │ │ ├── setting.py │ │ │ │ ├── setting_test.py │ │ │ │ └── tasks.py │ │ │ ├── traditional/ │ │ │ │ ├── __init__.py │ │ │ │ ├── setting.py │ │ │ │ └── setting_test.py │ │ │ └── wrappers/ │ │ │ ├── __init__.py │ │ │ ├── measure_performance.py │ │ │ ├── measure_performance_test.py │ │ │ ├── no_typed_objects.py │ │ │ ├── task_labels.py │ │ │ └── typed_objects.py │ │ ├── settings.puml │ │ └── sl/ │ │ ├── README.md │ │ ├── __init__.py │ │ ├── continual/ │ │ │ ├── __init__.py │ │ │ ├── environment.py │ │ │ ├── environment_test.py │ │ │ ├── envs.py │ │ │ ├── objects.py │ │ │ ├── results.py │ │ │ ├── setting.py │ │ │ ├── setting_test.py │ │ │ └── wrappers.py │ │ ├── discrete/ │ │ │ ├── __init__.py │ │ │ ├── setting.py │ │ │ └── setting_test.py │ │ ├── domain_incremental/ │ │ │ ├── __init__.py │ │ │ ├── setting.py │ │ │ └── setting_test.py │ │ ├── environment.py │ │ ├── environment_test.py │ │ ├── incremental/ │ │ │ ├── __init__.py │ │ │ ├── environment.py │ │ │ ├── environment_test.py │ │ │ ├── objects.py │ │ │ ├── results.py │ │ │ ├── setting.py │ │ │ ├── setting_test.py │ │ │ └── unused_batch_transforms.py │ │ ├── multi_task/ │ │ │ ├── __init__.py │ │ │ ├── setting.py │ │ │ └── setting_test.py │ │ ├── setting.py │ │ ├── task_incremental/ │ │ │ ├── __init__.py │ │ │ ├── setting.py │ │ │ └── setting_test.py │ │ ├── traditional/ │ │ │ ├── __init__.py │ │ │ ├── results.py │ │ │ ├── setting.py │ │ │ └── setting_test.py │ │ └── wrappers/ │ │ ├── __init__.py │ │ ├── measure_performance.py │ │ └── measure_performance_test.py │ ├── settings.puml │ └── utils/ │ ├── __init__.py │ ├── categorical.py │ ├── data_utils.py │ ├── encode.py │ ├── generic_functions/ │ │ ├── __init__.py │ │ ├── _namedtuple.py │ │ ├── _namedtuple_test.py │ │ ├── concatenate.py │ │ ├── detach.py │ │ ├── move.py │ │ ├── replace.py │ │ ├── replace_test.py │ │ ├── singledispatchmethod.py │ │ ├── slicing.py │ │ ├── slicing_test.py │ │ ├── stack.py │ │ └── to_from_tensor.py │ ├── logging_utils.py │ ├── module_dict.py │ ├── parseable.py │ ├── plotting.py │ ├── pretrained_utils.py │ ├── readme.py │ ├── serialization.py │ └── utils.py ├── setup.cfg ├── setup.py └── versioneer.py ================================================ FILE CONTENTS ================================================ ================================================ FILE: .dockerignore ================================================ data lightning_logs checkpoints results ================================================ FILE: .gitattributes ================================================ sequoia/_version.py export-subst ================================================ FILE: .gitignore ================================================ **/__pycache__/ .vscode # mypy .mypy_cache/ .dmypy.json dmypy.json examples/results/* results/* !results/**/*.csv data/* */data/* !data/**/*.py scripts/*.png wandb .idea .ipynb_checkpoints checkpoints lightning_logs .pylintrc **.png *.gz *.pt build dist *.egg-info sequoia/results mjkey.txt ================================================ FILE: .gitmodules ================================================ [submodule "sequoia/methods/cn_dpm"] path = sequoia/methods/cn_dpm url = https://github.com/ryanlindeborg/CN-DPM.git [submodule "examples/clcomp21/Real_DEEL"] path = examples/clcomp21/Real_DEEL url = https://github.com/mostafaelaraby/Real-DEEL-Dark-Experience.git [submodule "sequoia/methods/continual_world"] path = sequoia/methods/continual_world url = https://www.github.com/lebrice/continual_world.git ================================================ FILE: .travis.yml ================================================ language: python python: - "3.7" install: - pip install gym[atari] - pip install -r requirements.txt script: - pytest after_sucess: coveralls ================================================ FILE: LICENSE ================================================ GNU GENERAL PUBLIC LICENSE Version 3, 29 June 2007 Copyright (C) 2007 Free Software Foundation, Inc. Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. Preamble The GNU General Public License is a free, copyleft license for software and other kinds of works. The licenses for most software and other practical works are designed to take away your freedom to share and change the works. By contrast, the GNU General Public License is intended to guarantee your freedom to share and change all versions of a program--to make sure it remains free software for all its users. We, the Free Software Foundation, use the GNU General Public License for most of our software; it applies also to any other work released this way by its authors. You can apply it to your programs, too. When we speak of free software, we are referring to freedom, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for them if you wish), that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs, and that you know you can do these things. To protect your rights, we need to prevent others from denying you these rights or asking you to surrender the rights. Therefore, you have certain responsibilities if you distribute copies of the software, or if you modify it: responsibilities to respect the freedom of others. For example, if you distribute copies of such a program, whether gratis or for a fee, you must pass on to the recipients the same freedoms that you received. You must make sure that they, too, receive or can get the source code. And you must show them these terms so they know their rights. Developers that use the GNU GPL protect your rights with two steps: (1) assert copyright on the software, and (2) offer you this License giving you legal permission to copy, distribute and/or modify it. For the developers' and authors' protection, the GPL clearly explains that there is no warranty for this free software. For both users' and authors' sake, the GPL requires that modified versions be marked as changed, so that their problems will not be attributed erroneously to authors of previous versions. Some devices are designed to deny users access to install or run modified versions of the software inside them, although the manufacturer can do so. This is fundamentally incompatible with the aim of protecting users' freedom to change the software. The systematic pattern of such abuse occurs in the area of products for individuals to use, which is precisely where it is most unacceptable. Therefore, we have designed this version of the GPL to prohibit the practice for those products. If such problems arise substantially in other domains, we stand ready to extend this provision to those domains in future versions of the GPL, as needed to protect the freedom of users. Finally, every program is threatened constantly by software patents. States should not allow patents to restrict development and use of software on general-purpose computers, but in those that do, we wish to avoid the special danger that patents applied to a free program could make it effectively proprietary. To prevent this, the GPL assures that patents cannot be used to render the program non-free. The precise terms and conditions for copying, distribution and modification follow. TERMS AND CONDITIONS 0. Definitions. "This License" refers to version 3 of the GNU General Public License. "Copyright" also means copyright-like laws that apply to other kinds of works, such as semiconductor masks. "The Program" refers to any copyrightable work licensed under this License. Each licensee is addressed as "you". "Licensees" and "recipients" may be individuals or organizations. To "modify" a work means to copy from or adapt all or part of the work in a fashion requiring copyright permission, other than the making of an exact copy. The resulting work is called a "modified version" of the earlier work or a work "based on" the earlier work. A "covered work" means either the unmodified Program or a work based on the Program. To "propagate" a work means to do anything with it that, without permission, would make you directly or secondarily liable for infringement under applicable copyright law, except executing it on a computer or modifying a private copy. Propagation includes copying, distribution (with or without modification), making available to the public, and in some countries other activities as well. To "convey" a work means any kind of propagation that enables other parties to make or receive copies. Mere interaction with a user through a computer network, with no transfer of a copy, is not conveying. An interactive user interface displays "Appropriate Legal Notices" to the extent that it includes a convenient and prominently visible feature that (1) displays an appropriate copyright notice, and (2) tells the user that there is no warranty for the work (except to the extent that warranties are provided), that licensees may convey the work under this License, and how to view a copy of this License. If the interface presents a list of user commands or options, such as a menu, a prominent item in the list meets this criterion. 1. Source Code. The "source code" for a work means the preferred form of the work for making modifications to it. "Object code" means any non-source form of a work. A "Standard Interface" means an interface that either is an official standard defined by a recognized standards body, or, in the case of interfaces specified for a particular programming language, one that is widely used among developers working in that language. The "System Libraries" of an executable work include anything, other than the work as a whole, that (a) is included in the normal form of packaging a Major Component, but which is not part of that Major Component, and (b) serves only to enable use of the work with that Major Component, or to implement a Standard Interface for which an implementation is available to the public in source code form. A "Major Component", in this context, means a major essential component (kernel, window system, and so on) of the specific operating system (if any) on which the executable work runs, or a compiler used to produce the work, or an object code interpreter used to run it. The "Corresponding Source" for a work in object code form means all the source code needed to generate, install, and (for an executable work) run the object code and to modify the work, including scripts to control those activities. However, it does not include the work's System Libraries, or general-purpose tools or generally available free programs which are used unmodified in performing those activities but which are not part of the work. For example, Corresponding Source includes interface definition files associated with source files for the work, and the source code for shared libraries and dynamically linked subprograms that the work is specifically designed to require, such as by intimate data communication or control flow between those subprograms and other parts of the work. The Corresponding Source need not include anything that users can regenerate automatically from other parts of the Corresponding Source. The Corresponding Source for a work in source code form is that same work. 2. Basic Permissions. All rights granted under this License are granted for the term of copyright on the Program, and are irrevocable provided the stated conditions are met. This License explicitly affirms your unlimited permission to run the unmodified Program. The output from running a covered work is covered by this License only if the output, given its content, constitutes a covered work. This License acknowledges your rights of fair use or other equivalent, as provided by copyright law. You may make, run and propagate covered works that you do not convey, without conditions so long as your license otherwise remains in force. You may convey covered works to others for the sole purpose of having them make modifications exclusively for you, or provide you with facilities for running those works, provided that you comply with the terms of this License in conveying all material for which you do not control copyright. Those thus making or running the covered works for you must do so exclusively on your behalf, under your direction and control, on terms that prohibit them from making any copies of your copyrighted material outside their relationship with you. Conveying under any other circumstances is permitted solely under the conditions stated below. Sublicensing is not allowed; section 10 makes it unnecessary. 3. Protecting Users' Legal Rights From Anti-Circumvention Law. No covered work shall be deemed part of an effective technological measure under any applicable law fulfilling obligations under article 11 of the WIPO copyright treaty adopted on 20 December 1996, or similar laws prohibiting or restricting circumvention of such measures. When you convey a covered work, you waive any legal power to forbid circumvention of technological measures to the extent such circumvention is effected by exercising rights under this License with respect to the covered work, and you disclaim any intention to limit operation or modification of the work as a means of enforcing, against the work's users, your or third parties' legal rights to forbid circumvention of technological measures. 4. Conveying Verbatim Copies. You may convey verbatim copies of the Program's source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice; keep intact all notices stating that this License and any non-permissive terms added in accord with section 7 apply to the code; keep intact all notices of the absence of any warranty; and give all recipients a copy of this License along with the Program. You may charge any price or no price for each copy that you convey, and you may offer support or warranty protection for a fee. 5. Conveying Modified Source Versions. You may convey a work based on the Program, or the modifications to produce it from the Program, in the form of source code under the terms of section 4, provided that you also meet all of these conditions: a) The work must carry prominent notices stating that you modified it, and giving a relevant date. b) The work must carry prominent notices stating that it is released under this License and any conditions added under section 7. This requirement modifies the requirement in section 4 to "keep intact all notices". c) You must license the entire work, as a whole, under this License to anyone who comes into possession of a copy. This License will therefore apply, along with any applicable section 7 additional terms, to the whole of the work, and all its parts, regardless of how they are packaged. This License gives no permission to license the work in any other way, but it does not invalidate such permission if you have separately received it. d) If the work has interactive user interfaces, each must display Appropriate Legal Notices; however, if the Program has interactive interfaces that do not display Appropriate Legal Notices, your work need not make them do so. A compilation of a covered work with other separate and independent works, which are not by their nature extensions of the covered work, and which are not combined with it such as to form a larger program, in or on a volume of a storage or distribution medium, is called an "aggregate" if the compilation and its resulting copyright are not used to limit the access or legal rights of the compilation's users beyond what the individual works permit. Inclusion of a covered work in an aggregate does not cause this License to apply to the other parts of the aggregate. 6. Conveying Non-Source Forms. You may convey a covered work in object code form under the terms of sections 4 and 5, provided that you also convey the machine-readable Corresponding Source under the terms of this License, in one of these ways: a) Convey the object code in, or embodied in, a physical product (including a physical distribution medium), accompanied by the Corresponding Source fixed on a durable physical medium customarily used for software interchange. b) Convey the object code in, or embodied in, a physical product (including a physical distribution medium), accompanied by a written offer, valid for at least three years and valid for as long as you offer spare parts or customer support for that product model, to give anyone who possesses the object code either (1) a copy of the Corresponding Source for all the software in the product that is covered by this License, on a durable physical medium customarily used for software interchange, for a price no more than your reasonable cost of physically performing this conveying of source, or (2) access to copy the Corresponding Source from a network server at no charge. c) Convey individual copies of the object code with a copy of the written offer to provide the Corresponding Source. This alternative is allowed only occasionally and noncommercially, and only if you received the object code with such an offer, in accord with subsection 6b. d) Convey the object code by offering access from a designated place (gratis or for a charge), and offer equivalent access to the Corresponding Source in the same way through the same place at no further charge. You need not require recipients to copy the Corresponding Source along with the object code. If the place to copy the object code is a network server, the Corresponding Source may be on a different server (operated by you or a third party) that supports equivalent copying facilities, provided you maintain clear directions next to the object code saying where to find the Corresponding Source. Regardless of what server hosts the Corresponding Source, you remain obligated to ensure that it is available for as long as needed to satisfy these requirements. e) Convey the object code using peer-to-peer transmission, provided you inform other peers where the object code and Corresponding Source of the work are being offered to the general public at no charge under subsection 6d. A separable portion of the object code, whose source code is excluded from the Corresponding Source as a System Library, need not be included in conveying the object code work. A "User Product" is either (1) a "consumer product", which means any tangible personal property which is normally used for personal, family, or household purposes, or (2) anything designed or sold for incorporation into a dwelling. In determining whether a product is a consumer product, doubtful cases shall be resolved in favor of coverage. For a particular product received by a particular user, "normally used" refers to a typical or common use of that class of product, regardless of the status of the particular user or of the way in which the particular user actually uses, or expects or is expected to use, the product. A product is a consumer product regardless of whether the product has substantial commercial, industrial or non-consumer uses, unless such uses represent the only significant mode of use of the product. "Installation Information" for a User Product means any methods, procedures, authorization keys, or other information required to install and execute modified versions of a covered work in that User Product from a modified version of its Corresponding Source. The information must suffice to ensure that the continued functioning of the modified object code is in no case prevented or interfered with solely because modification has been made. If you convey an object code work under this section in, or with, or specifically for use in, a User Product, and the conveying occurs as part of a transaction in which the right of possession and use of the User Product is transferred to the recipient in perpetuity or for a fixed term (regardless of how the transaction is characterized), the Corresponding Source conveyed under this section must be accompanied by the Installation Information. But this requirement does not apply if neither you nor any third party retains the ability to install modified object code on the User Product (for example, the work has been installed in ROM). The requirement to provide Installation Information does not include a requirement to continue to provide support service, warranty, or updates for a work that has been modified or installed by the recipient, or for the User Product in which it has been modified or installed. Access to a network may be denied when the modification itself materially and adversely affects the operation of the network or violates the rules and protocols for communication across the network. Corresponding Source conveyed, and Installation Information provided, in accord with this section must be in a format that is publicly documented (and with an implementation available to the public in source code form), and must require no special password or key for unpacking, reading or copying. 7. Additional Terms. "Additional permissions" are terms that supplement the terms of this License by making exceptions from one or more of its conditions. Additional permissions that are applicable to the entire Program shall be treated as though they were included in this License, to the extent that they are valid under applicable law. If additional permissions apply only to part of the Program, that part may be used separately under those permissions, but the entire Program remains governed by this License without regard to the additional permissions. When you convey a copy of a covered work, you may at your option remove any additional permissions from that copy, or from any part of it. (Additional permissions may be written to require their own removal in certain cases when you modify the work.) You may place additional permissions on material, added by you to a covered work, for which you have or can give appropriate copyright permission. Notwithstanding any other provision of this License, for material you add to a covered work, you may (if authorized by the copyright holders of that material) supplement the terms of this License with terms: a) Disclaiming warranty or limiting liability differently from the terms of sections 15 and 16 of this License; or b) Requiring preservation of specified reasonable legal notices or author attributions in that material or in the Appropriate Legal Notices displayed by works containing it; or c) Prohibiting misrepresentation of the origin of that material, or requiring that modified versions of such material be marked in reasonable ways as different from the original version; or d) Limiting the use for publicity purposes of names of licensors or authors of the material; or e) Declining to grant rights under trademark law for use of some trade names, trademarks, or service marks; or f) Requiring indemnification of licensors and authors of that material by anyone who conveys the material (or modified versions of it) with contractual assumptions of liability to the recipient, for any liability that these contractual assumptions directly impose on those licensors and authors. All other non-permissive additional terms are considered "further restrictions" within the meaning of section 10. If the Program as you received it, or any part of it, contains a notice stating that it is governed by this License along with a term that is a further restriction, you may remove that term. If a license document contains a further restriction but permits relicensing or conveying under this License, you may add to a covered work material governed by the terms of that license document, provided that the further restriction does not survive such relicensing or conveying. If you add terms to a covered work in accord with this section, you must place, in the relevant source files, a statement of the additional terms that apply to those files, or a notice indicating where to find the applicable terms. Additional terms, permissive or non-permissive, may be stated in the form of a separately written license, or stated as exceptions; the above requirements apply either way. 8. Termination. You may not propagate or modify a covered work except as expressly provided under this License. Any attempt otherwise to propagate or modify it is void, and will automatically terminate your rights under this License (including any patent licenses granted under the third paragraph of section 11). However, if you cease all violation of this License, then your license from a particular copyright holder is reinstated (a) provisionally, unless and until the copyright holder explicitly and finally terminates your license, and (b) permanently, if the copyright holder fails to notify you of the violation by some reasonable means prior to 60 days after the cessation. Moreover, your license from a particular copyright holder is reinstated permanently if the copyright holder notifies you of the violation by some reasonable means, this is the first time you have received notice of violation of this License (for any work) from that copyright holder, and you cure the violation prior to 30 days after your receipt of the notice. Termination of your rights under this section does not terminate the licenses of parties who have received copies or rights from you under this License. If your rights have been terminated and not permanently reinstated, you do not qualify to receive new licenses for the same material under section 10. 9. Acceptance Not Required for Having Copies. You are not required to accept this License in order to receive or run a copy of the Program. Ancillary propagation of a covered work occurring solely as a consequence of using peer-to-peer transmission to receive a copy likewise does not require acceptance. However, nothing other than this License grants you permission to propagate or modify any covered work. These actions infringe copyright if you do not accept this License. Therefore, by modifying or propagating a covered work, you indicate your acceptance of this License to do so. 10. Automatic Licensing of Downstream Recipients. Each time you convey a covered work, the recipient automatically receives a license from the original licensors, to run, modify and propagate that work, subject to this License. You are not responsible for enforcing compliance by third parties with this License. An "entity transaction" is a transaction transferring control of an organization, or substantially all assets of one, or subdividing an organization, or merging organizations. If propagation of a covered work results from an entity transaction, each party to that transaction who receives a copy of the work also receives whatever licenses to the work the party's predecessor in interest had or could give under the previous paragraph, plus a right to possession of the Corresponding Source of the work from the predecessor in interest, if the predecessor has it or can get it with reasonable efforts. You may not impose any further restrictions on the exercise of the rights granted or affirmed under this License. For example, you may not impose a license fee, royalty, or other charge for exercise of rights granted under this License, and you may not initiate litigation (including a cross-claim or counterclaim in a lawsuit) alleging that any patent claim is infringed by making, using, selling, offering for sale, or importing the Program or any portion of it. 11. Patents. A "contributor" is a copyright holder who authorizes use under this License of the Program or a work on which the Program is based. The work thus licensed is called the contributor's "contributor version". A contributor's "essential patent claims" are all patent claims owned or controlled by the contributor, whether already acquired or hereafter acquired, that would be infringed by some manner, permitted by this License, of making, using, or selling its contributor version, but do not include claims that would be infringed only as a consequence of further modification of the contributor version. For purposes of this definition, "control" includes the right to grant patent sublicenses in a manner consistent with the requirements of this License. Each contributor grants you a non-exclusive, worldwide, royalty-free patent license under the contributor's essential patent claims, to make, use, sell, offer for sale, import and otherwise run, modify and propagate the contents of its contributor version. In the following three paragraphs, a "patent license" is any express agreement or commitment, however denominated, not to enforce a patent (such as an express permission to practice a patent or covenant not to sue for patent infringement). To "grant" such a patent license to a party means to make such an agreement or commitment not to enforce a patent against the party. If you convey a covered work, knowingly relying on a patent license, and the Corresponding Source of the work is not available for anyone to copy, free of charge and under the terms of this License, through a publicly available network server or other readily accessible means, then you must either (1) cause the Corresponding Source to be so available, or (2) arrange to deprive yourself of the benefit of the patent license for this particular work, or (3) arrange, in a manner consistent with the requirements of this License, to extend the patent license to downstream recipients. "Knowingly relying" means you have actual knowledge that, but for the patent license, your conveying the covered work in a country, or your recipient's use of the covered work in a country, would infringe one or more identifiable patents in that country that you have reason to believe are valid. If, pursuant to or in connection with a single transaction or arrangement, you convey, or propagate by procuring conveyance of, a covered work, and grant a patent license to some of the parties receiving the covered work authorizing them to use, propagate, modify or convey a specific copy of the covered work, then the patent license you grant is automatically extended to all recipients of the covered work and works based on it. A patent license is "discriminatory" if it does not include within the scope of its coverage, prohibits the exercise of, or is conditioned on the non-exercise of one or more of the rights that are specifically granted under this License. You may not convey a covered work if you are a party to an arrangement with a third party that is in the business of distributing software, under which you make payment to the third party based on the extent of your activity of conveying the work, and under which the third party grants, to any of the parties who would receive the covered work from you, a discriminatory patent license (a) in connection with copies of the covered work conveyed by you (or copies made from those copies), or (b) primarily for and in connection with specific products or compilations that contain the covered work, unless you entered into that arrangement, or that patent license was granted, prior to 28 March 2007. Nothing in this License shall be construed as excluding or limiting any implied license or other defenses to infringement that may otherwise be available to you under applicable patent law. 12. No Surrender of Others' Freedom. If conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot convey a covered work so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not convey it at all. For example, if you agree to terms that obligate you to collect a royalty for further conveying from those to whom you convey the Program, the only way you could satisfy both those terms and this License would be to refrain entirely from conveying the Program. 13. Use with the GNU Affero General Public License. Notwithstanding any other provision of this License, you have permission to link or combine any covered work with a work licensed under version 3 of the GNU Affero General Public License into a single combined work, and to convey the resulting work. The terms of this License will continue to apply to the part which is the covered work, but the special requirements of the GNU Affero General Public License, section 13, concerning interaction through a network will apply to the combination as such. 14. Revised Versions of this License. The Free Software Foundation may publish revised and/or new versions of the GNU General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the Program specifies that a certain numbered version of the GNU General Public License "or any later version" applies to it, you have the option of following the terms and conditions either of that numbered version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of the GNU General Public License, you may choose any version ever published by the Free Software Foundation. If the Program specifies that a proxy can decide which future versions of the GNU General Public License can be used, that proxy's public statement of acceptance of a version permanently authorizes you to choose that version for the Program. Later license versions may give you additional or different permissions. However, no additional obligations are imposed on any author or copyright holder as a result of your choosing to follow a later version. 15. Disclaimer of Warranty. THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 16. Limitation of Liability. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. 17. Interpretation of Sections 15 and 16. If the disclaimer of warranty and limitation of liability provided above cannot be given local legal effect according to their terms, reviewing courts shall apply local law that most closely approximates an absolute waiver of all civil liability in connection with the Program, unless a warranty or assumption of liability accompanies a copy of the Program in return for a fee. END OF TERMS AND CONDITIONS How to Apply These Terms to Your New Programs If you develop a new program, and you want it to be of the greatest possible use to the public, the best way to achieve this is to make it free software which everyone can redistribute and change under these terms. To do so, attach the following notices to the program. It is safest to attach them to the start of each source file to most effectively state the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found. Copyright (C) This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . Also add information on how to contact you by electronic and paper mail. If the program does terminal interaction, make it output a short notice like this when it starts in an interactive mode: Copyright (C) This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. This is free software, and you are welcome to redistribute it under certain conditions; type `show c' for details. The hypothetical commands `show w' and `show c' should show the appropriate parts of the General Public License. Of course, your program's commands might be different; for a GUI interface, you would use an "about box". You should also get your employer (if you work as a programmer) or school, if any, to sign a "copyright disclaimer" for the program, if necessary. For more information on this, and how to apply and follow the GNU GPL, see . The GNU General Public License does not permit incorporating your program into proprietary programs. If your program is a subroutine library, you may consider it more useful to permit linking proprietary applications with the library. If this is what you want to do, use the GNU Lesser General Public License instead of this License. But first, please read . ================================================ FILE: MANIFEST.in ================================================ include versioneer.py include sequoia/_version.py ================================================ FILE: README.md ================================================ # Sequoia - The Research Tree A Playground for research at the intersection of Continual, Reinforcement, and Self-Supervised Learning. - 5 minute intro: https://www.youtube.com/watch?v=0u48vr96zRQ - Paper link: https://arxiv.org/abs/2108.01005 - [Continual Supervised Learning Study](https://wandb.ai/sequoia/csl_study) (~6K runs) - [Continual Reinforcement Learning Study](https://wandb.ai/sequoia/crl_study) (~2300 runs) ## Note: This project is not being actively developed at the moment. If you encounter any difficulties, please create an issue and I'll help you out. If you have any questions or comments, please make an issue! ## Motivation: Most applied ML research generally either proposes new Settings (research problems), new Methods (solutions to such problems), or both. - When proposing new Settings, researchers almost always have to reimplement or heavily modify existing solutions before they can be applied onto their new problem. - Likewise, when creating new Methods, it's often necessary to first re-create the experimental setting of other baseline papers, or even the baseline methods themselves, as experimental conditions may be *slightly* different between papers! The goal of this repo is to: - Organize various research Settings into an inheritance hierarchy (a tree!), with more *general*, challenging settings with few assumptions at the top, and more constrained problems at the bottom. - Provide a mechanism for easily reusing existing solutions (Methods) onto new Settings through **Polymorphism**! - Allow researchers to easily create new, general Methods and quickly gather results on a multitude of Settings, ranging from Supervised to Reinforcement Learning! ## Installation Requires python >= 3.7 ### Basic installation: ```console $ git clone https://www.github.com/lebrice/Sequoia.git $ pip install -e Sequoia ``` ### Optional Addons You can also install optional "addons" for Sequoia, each of which either adds new Methods, new environments/datasets, or both. using either the usual `extras_require` feature of setuptools, or by pip-installing other repositories which register Methods for Sequoia using an `entry_point` in their `setup.py` file. ```console pip install -e Sequoia[all|] ``` Here are some of the optional addons: - `avalanche`: Continual Supervised Learning methods, provided by the [Avalanche](https://github.com/ContinualAI/avalanche) library: ```console $ pip install -e Sequoia[avalanche] ``` - `CN-DPM`: Continual Neural Dirichlet Process Mixture model: ```console $ cd Sequoia $ git submodule init # to setup the submodules $ pip install -e sequoia/methods/cn_dpm ``` - `orion`: Hyper-parameter optimization using [Orion](https://github.com/epistimio/orion) ```console $ pip install -e Sequoia[orion] ``` - `metaworld`: Continual / Multi-Task Reinforcement Learning environments, thanks to the [metaworld](https://github.com/rlworkgroup/metaworld) package. The usual setup for mujoco needs to be done, Sequoia unfortunately can't do it for you ;( ```console $ pip install -e Sequoia[metaworld] ``` - `monsterkong`: Continual Reinforcement Learning environment from [the Meta-MonsterKong repo](https://github.com/lebrice/MetaMonsterkong). ```console $ pip install -e Sequoia[monsterkong] ``` - `continual_world`: The Continual World benchmark for Continual Reinforcement learning. Adds 6 different Continual RL Methods to Sequoia. ```console $ cd Sequoia $ git submodule init # to setup the submodules $ pip install -e sequoia/methods/continual_world ``` See the `setup.py` file for all the optional extras. ### Additional Installation Steps for Mac Install the latest XQuartz app from here: https://www.xquartz.org/releases/index.html Then run the following commands on the terminal: ```console mkdir /tmp/.X11-unix sudo chmod 1777 /tmp/.X11-unix sudo chown root /tmp/.X11-unix/ ``` ## Documentation overview: - ### **[Getting Started / Examples (take a look at this first)](examples/)** - ### Runing Experiments (below) - ### [Settings overview](sequoia/settings/) - ### [Methods overview](sequoia/methods/) ### Current Settings & Assumptions: | Setting | RL vs SL | clear task boundaries? | Task boundaries given? | Task labels at training time? | task labels at test time | Stationary context? | Fixed action space | | -------------------------------------------------------------------------- | ------------------------------------------------------------------------ | ---------------------- | ---------------------- | ----------------------------- | ------------------------ | ------------------- | ------------------ | | [Continual RL](sequoia/settings/rl/continual/setting.py) | RL | no | no | no | no | no | no(?) | | [Discrete Task-Agnostic RL](sequoia/settings/rl/discrete/setting.py) | RL | **yes** | **yes** | no | no | no | no(?) | | [Incremental RL](sequoia/settings/rl/incremental/setting.py) | RL | **yes** | **yes** | **yes** | no | no | no(?) | | [Task-Incremental RL](sequoia/settings/rl/task_incremental/setting.py) | RL | **yes** | **yes** | **yes** | **yes** | no | no(?) | | [Traditional RL](sequoia/settings/rl/task_incremental/setting.py) | RL | **yes** | **yes** | **yes** | no | **yes** | no(?) | | [Multi-Task RL](sequoia/settings/rl/task_incremental/setting.py) | RL | **yes** | **yes** | **yes** | **yes** | **yes** | no(?) | | [Continual SL](sequoia/settings/sl/continual/setting.py) | SL | no | no | no | no | no | no | | [Discrete Task-Agnostic SL](sequoia/settings/sl/discrete/setting.py) | SL | **yes** | no | no | no | no | no | | [(Class) Incremental SL](sequoia/settings/sl/incremental/setting.py) | SL | **yes** | **yes** | no | no | no | no | | [Domain-Incremental SL](sequoia/settings/sl/domain_incremental/setting.py) | SL | **yes** | **yes** | **yes** | no | no | **yes** | | [Task-Incremental SL](sequoia/settings/sl/task_incremental/setting.py) | SL | **yes** | **yes** | **yes** | **yes** | no | no | | [Traditional SL](sequoia/settings/sl/traditional/setting.py) | SL | **yes** | **yes** | **yes** | no | **yes** | no | | [Multi-Task SL](sequoia/settings/sl/multi_task/setting.py) | SL | **yes** | **yes** | **yes** | **yes** | **yes** | no | #### Notes - **Active / Passive**: Active settings are Settings where the next observation depends on the current action, i.e. where actions influence future observations, e.g. Reinforcement Learning. Passive settings are Settings where the current actions don't influence the next observations (e.g. Supervised Learning.) - **Bold entries** in the table mark constant attributes which cannot be changed from their default value. - \*: The environment is changing constantly over time in `ContinualRLSetting`, so there aren't really "tasks" to speak of. ## Running experiments --> **(Reminder) First, take a look at the [Examples](/examples)** <-- #### Directly in code: ```python from sequoia.settings import TaskIncrementalSLSetting from sequoia.methods import BaseMethod # Create the setting setting = TaskIncrementalSLSetting(dataset="mnist") # Create the method method = BaseMethod(max_epochs=1) # Apply the setting to the method to generate results. results = setting.apply(method) print(results.summary()) ``` ### Command-line: ```console $ sequoia --help usage: sequoia [-h] [--version] {run,sweep,info} ... Sequoia - The Research Tree Used to run experiments, which consist in applying a Method to a Setting. optional arguments: -h, --help show this help message and exit --version Displays the installed version of Sequoia and exits. command: Command to execute {run,sweep,info} run Run an experiment on a given setting. sweep Run a hyper-parameter optimization sweep. info Displays some information about a Setting or Method. ``` For example: ```console $ sequoia run [--debug] (setting arguments) (method arguments) $ sequoia sweep [--debug] (setting arguments) (method arguments) $ sequoia info [setting or method] ``` For a detailed description of all the arguments, use the `--help` command for any of the actions: ```console $ sequoia --help $ sequoia run --help $ sequoia run --help $ sequoia run --help $ sequoia sweep --help $ sequoia sweep --help $ sequoia sweep --help ``` For example: ```console $ sequoia run --debug task_incremental_sl --dataset mnist random_baseline ``` For example: - Run the BaseMethod on task-incremental MNIST, with one epoch per task, and without wandb: ```console $ sequoia run task_incremental_sl --dataset mnist base --max_epochs 1 ``` - Run the PPO Method from stable-baselines3 on an incremental RL setting, with the default dataset (CartPole) and 5 tasks: ```console $ sequoia --setting incremental_rl --nb_tasks 5 --method sb3.ppo --steps_per_task 10_000 ``` More questions? Please let us know by creating an issue or posting in the discussions! ================================================ FILE: dockers/.gitignore ================================================ # Hiding the 'eai' dockerfile eai ================================================ FILE: dockers/base/Dockerfile ================================================ # syntax=docker/dockerfile:1 FROM pytorch/pytorch:1.8.1-cuda11.1-cudnn8-runtime USER root EXPOSE 2222 EXPOSE 6000 EXPOSE 8088 ENV LANG=en_US.UTF-8 RUN apt update && \ apt install -y \ git wget zsh unzip rsync build-essential \ ca-certificates supervisor openssh-server ssh \ curl wget vim procps htop locales nano man net-tools iputils-ping \ libosmesa6-dev libgl1-mesa-glx libgl1-mesa-dev libglu1-mesa-dev libglfw3 \ libglfw3-dev freeglut3 xvfb ffmpeg curl patchelf cmake zlib1g zlib1g-dev \ swig libopenmpi-dev aptitude screen xz-utils locate && \ sed -i "s/# en_US.UTF-8/en_US.UTF-8/" /etc/locale.gen && locale-gen && \ useradd -m -u 13011 -s /bin/zsh toolkit && passwd -d toolkit && \ useradd -m -u 13011 -s /bin/zsh --non-unique console && passwd -d console && \ useradd -m -u 13011 -s /bin/zsh --non-unique _toolchain && passwd -d _toolchain && \ useradd -m -u 13011 -s /bin/bash --non-unique coder && passwd -d coder && \ chown -R toolkit:toolkit /run /etc/shadow /etc/profile && \ apt autoremove --purge && apt-get clean && \ rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* && \ echo ssh >> /etc/securetty && \ rm -f /etc/legal /etc/motd # RUN conda install -c conda-forge opencv RUN conda install matplotlib numpy scipy hdf5 h5py cython # RUN pip install \ # # Needed to build atari_py: (WHY don't they put it in a build_requires?) # lockfile # fasteners \ # pybullet \ # wandb \ # tqdm \ # # tensorflow \ # bs4 \ # pandas notebook plotly tqdm pyamg lxml numba pyyaml torchmeta # Removing this `torchtext` package, seems to be causing an import issue in pytorch! RUN pip uninstall -y torchtext RUN chown -R toolkit:root /workspace RUN chmod -R 777 /workspace # this doesn't do anything RUN adduser toolkit sudo RUN chown -R toolkit:root /mnt/ # RUN mkdir -p /mnt/home RUN chmod 777 /opt/conda RUN chmod 777 /mnt RUN chmod -R 777 /workspace SHELL [ "conda", "run", "-n", "base", "/bin/bash", "-c"] ## Unused zshell and oh-my-zsh stuff: # RUN sh -c "$(wget -O- https://raw.githubusercontent.com/ohmyzsh/ohmyzsh/master/tools/install.sh)" # RUN sed -i 's/robbyrussell/clean/' ~/.zshrc # RUN sed -i 's/plugins=(git)/plugins=(git debian history-substring-search)/' ~/.zshrc # MuJoCo-related stuff: # RUN curl -o ~/mujoco200_linux.zip -L -C - https://www.roboti.us/download/mujoco200_linux.zip # RUN curl -o ~/mjpro150_linux.zip -L -C - https://www.roboti.us/download/mjpro150_linux.zip # RUN cd ~ && unzip mujoco200_linux.zip && rm mujoco200_linux.zip # RUN cd ~ && unzip mjpro150_linux.zip && rm mjpro150_linux.zip # RUN mkdir ~/.mujoco # RUN mv ~/mujoco200_linux ~/.mujoco/mujoco200 # RUN mv ~/mjpro150 ~/.mujoco # RUN echo "export LD_LIBRARY_PATH=\$LD_LIBRARY_PATH:~/.mujoco/mujoco200/bin" >> ~/.bashrc # RUN echo "export LD_LIBRARY_PATH=\$LD_LIBRARY_PATH:~/.mujoco/mjpro150/bin" >> ~/.bashrc # COPY mjkey.txt /home/toolkit/.mujoco/ # ENV LD_LIBRARY_PATH /home/toolkit/.mujoco/mujoco200/bin:${LD_LIBRARY_PATH} # ENV LD_LIBRARY_PATH /home/toolkit/.mujoco/mjpro150/bin:${LD_LIBRARY_PATH} # RUN mkdir /workspace/tools # RUN cd /workspace/tools && git clone https://github.com/openai/mujoco-py.git && pip install -e mujoco-py # For Wandb (TODO: Doesn't appear to work, using env variable with WANDB_API_KEY # instead.) # COPY .netrc /home/toolkit/.netrc # COPY .netrc /root/.netrc # COPY .netrc /tmp/.netrc VOLUME /mnt/data VOLUME /mnt/results # USER toolkit ENV DATA_DIR=/mnt/data ENV RESULTS_DIR=/mnt/results ENV WANDB_DIR=/mnt/results # VOLUME /mnt/home # WORKDIR /mnt/home ENV PATH /home/toolkit/.local/bin:${PATH} # RUN cd /workspace/tools && git clone https://github.com/openai/gym.git && cd gym && pip install -e '.[all]' # RUN cd /workspace/tools && git clone https://github.com/openai/baselines.git && cd baselines && pip install -e . RUN cd /workspace/ && git clone https://github.com/lebrice/Sequoia.git RUN pip install -e /workspace/Sequoia[no_mujoco] ENTRYPOINT ["conda", "run", "--no-capture-output", "-n", "base", "/bin/bash", "-c"] ================================================ FILE: dockers/base/build.sh ================================================ #!/bin/bash set -o errexit # Used to exit upon error, avoiding cascading errors set -o errtrace # Show error trace set -o pipefail # Unveils hidden failures set -o nounset # Exposes unset variables if git diff-index --quiet HEAD --; then # No changes echo "All good, no uncommitted changes." else # Changes echo "Can't build dockers when there are uncommited changes!" exit 1 fi echo "Building the 'base' dockerfile" docker build . --file dockers/base/Dockerfile --tag sequoia:base REGISTRY=${REGISTRY:-`docker info | sed '/Username:/!d;s/.* //'`} echo "Using registry $REGISTRY" docker tag sequoia:base $REGISTRY/sequoia:base docker push $REGISTRY/sequoia:base ================================================ FILE: dockers/branch/Dockerfile ================================================ # syntax=docker/dockerfile:1 FROM lebrice/sequoia:base USER root SHELL [ "conda", "run", "-n", "base", "/bin/bash", "-c"] ARG BRANCH=master RUN conda install -y cudatoolkit RUN cd /workspace/Sequoia && git fetch -p && git checkout ${BRANCH} && pip install -e .[no_mujoco] ENTRYPOINT ["conda", "run", "--no-capture-output", "-n", "base", "/bin/bash", "-c"] ================================================ FILE: dockers/branch/build.sh ================================================ #!/bin/bash set -o errexit # Used to exit upon error, avoiding cascading errors set -o errtrace # Show error trace set -o pipefail # Unveils hidden failures set -o nounset # Exposes unset variables export CURRENT_BRANCH="`git branch --show-current`" export BRANCH=${BRANCH:-$CURRENT_BRANCH} echo "Using branch $BRANCH" export REGISTRY=${REGISTRY:-`docker info | sed '/Username:/!d;s/.* //'`} echo "Using registry $REGISTRY" if git diff-index --quiet HEAD --; then # No changes echo "all good." else # Changes echo "Can't build dockers when you have uncommited changes!" exit 1 fi git push echo "Building the container for branch $BRANCH (no cache)" docker build . --file dockers/branch/Dockerfile \ --no-cache \ --build-arg BRANCH=$BRANCH \ --tag sequoia:$BRANCH docker tag sequoia:$BRANCH $REGISTRY/sequoia:$BRANCH docker push $REGISTRY/sequoia:$BRANCH ================================================ FILE: docs/diagrams/src/gym.puml ================================================ @startuml gym package gym { package spaces as gym.spaces { abstract class Space { + contains(T sample) -> bool + sample() -> T } class Box extends Space { + low: np.ndarray + high: np.ndarray + shape: Tuple[int, ...] + dtype: np.dtype + contains(np.ndarray sample) -> bool + sample() -> np.ndarray } class Discrete extends Space { + n: int + contains(int sample) -> bool + sample() -> int } class Tuple extends Space { + spaces: Tuple[Space] + contains(Tuple sample) -> bool + sample() -> Tuple } ' Tuple spaces contain other spaces. Tuple *-- Space class Dict extends Space { + spaces: dict[str, Space] + contains(dict sample) -> bool + sample() -> dict } ' Same for Dicts. Dict *-- Space } abstract class gym.Env { + observation_space: Space + action_space: Space + step(Actions) -> Tuple[Obs, Rew, bool, dict] + reset() -> Obs } gym.Env .. Space abstract class Wrapper extends gym.Env{ + env: gym.Env } } @enduml ================================================ FILE: docs/diagrams/src/pytorch_lightning.puml ================================================ @startuml pytorch_lightning package pytorch_lightning { abstract class LightningDataModule { {abstract} + prepare_data() {abstract} + setup() {abstract} + train_dataloader(): torch.DataLoader {abstract} + val_dataloader(): torch.DataLoader {abstract} + test_dataloader(): torch.DataLoader } abstract class LightningModule { {abstract} + train_step(batch) + val_step() + test_step() } } @enduml ================================================ FILE: docs/diagrams/src/seq_diagram.puml ================================================ @startuml ContinualRLSetting header Page Header footer Page %page% of %lastpage% title Overall Evaluation loop - Sequoia note over User, Setting Even though this diagram is somewhat large, keep in mind that there are but a few key methods: 1. Method.configure() 2. Method.fit() 3. Method.get_actions() 4. Method.on_task_switch() end note actor User participant Setting << (A,#2121FF) Setting >> collections TrainEnv collections ValidEnv collections TestEnv ' autoactivate on participant Method << (C,#ADD1B2) Method >> participant Model << (C,#ADD1B2) nn.Module >> ' activate Setting ' autoactivate on User -> Setting: Create the Setting Setting -> TrainEnv: Create temp env return observation / action / reward spaces User <-- Setting User -> Method: Create the Method User <-- Method User -> Setting: setting.apply(method) Setting -> Method: **method.configure(setting)** Method -> Method: create model, optimizer, etc. ' deactivate Method Method -> Model: Create ' activate Model Setting <-- Method autoactivate off == training == group train_loop [for each task `i`] alt task_labels_at_train_time? else True Setting -> Method: **on_task_switch(i)** Method -> Method: consolidate knowledge, \n switch output heads, etc. Setting <-- Method else False Setting -> Method: **on_task_switch(None)** Method -> Method: consolidate knowledge etc. Setting <-- Method end Setting -> TrainEnv: Create train env for task i Setting -> ValidEnv: Create valid env for task i ' activate ValidEnv Setting -> Method: **Method.fit(train_env, valid_env)** ' loop ' alt loop group loop note right The Method is free to do whatever it wants with the Train and Valid envs of the current task. end note Method -> Model: train() return ' group training Model <--> TrainEnv: train with the env ... Method -> Model: eval() return Model <--> ValidEnv: Evaluate performance ... ' autoactivate on ' Model -> TrainEnv: reset ' return Observations ' Model -> TrainEnv: step(actions) ' return Observations, Rewards, done, info end end == testing == note over Setting, Method We currently only perform the test loop after training is complete on all tasks, however, in the future we will run this test loop after the end of training on each task. See issue#46 on GitHub for more info. end note group test_loop Setting --> Setting: Concatenate datasets for all tasks, \n create test wrappers, etc. Setting --> TestEnv: Create test environment (all tasks) autoactivate on Setting -> TestEnv: reset return observations ' loop alt else normal step Setting -> Method: **get_actions(observations)** Method -> Model: predict(x) return y_pred return actions Setting -> TestEnv: step(actions) return observations, rewards, done, info else end of episode reached Setting -> TestEnv: reset return observations else task boundary is reached ' TestEnv --> Method: **on_task_switch(i)** alt known_task_boundaries? else False: do nothing note over Method When known_task_boundaries=False, the Method doesn't get informed of task boundaries (it might have to perform some kind of change-point detection, for instance). end note else True note over TestEnv Minor note: here it's the TestEnv that calls the Method when a task boundary is reached. end note alt task_labels_at_test_time? else true ' note right of Setting: If task labels are given TestEnv -> Method: **on_task_switch(i)** autoactivate off Method -> Method autoactivate on return else false TestEnv -> Method: **on_task_switch(None)** autoactivate off Method -> Method autoactivate on return end end end autoactivate off note over TestEnv The test environment uses a `Monitor` wrapper, and gather statistics of interest like the mean reward, accuracy, etc. end note TestEnv -> Setting: report performance of the Method end Setting -> Setting: Weigh performance of each task \n depending on the Setting User <-- Setting: Results ' return Results @enduml ================================================ FILE: examples/README.md ================================================ # Examples Here's a brief description of the examples in this folder: ## Prerequisites: - [Intro to dataclasses & simple-parsing](prerequisites/dataclasses_example.py) - [Basics of openai gym](https://github.com/openai/gym#basics) ## Basic examples: - [pl_example.py](basic/pl_example.py): **Recommended entry-point for ML Practicioners**. Shows an example method and model using [PyTorch Lightning](https://github.com/PyTorchLightning/pytorch-lightning). This is the best way to get started if you don't mind some level of abstraction in your code (a good thing in general!) - [quick_demo.ipynb](basic/quick_demo.ipynb): **Recommended entry-point for new users**. Simple demo showing how to create a `Method` from scratch that targets a Supervised CL `Setting`, as well as how to improve this simple Method using a simple regularization loss. - [quick_demo.py](basic/quick_demo.py): First part of the above notebook: shows how to create a Method from scratch that targets a Supervised CL Setting. - [quick_demo_ewc.py](basic/quick_demo_ewc.py): Second part of the above notebook: shows how to improve upon an existing Method by adding a CL regularization loss. - [baseline_demo.py](basic/baseline_demo.py): Shows how the BaseMethod can be applied to get results in both RL and SL Settings. ## CLVision Workshop Submission Examples: Examples in this folder are aimed at solving the supervised learning track of the competition. Each example builds on top of the previous, in a manner that improves the overall performance you can expect on any given CL setting. As such, it is recommended that you take a look at the examples in the following order: 0. [DummyMethod](clcomp21/dummy_method.py) Non-parametric method that simply returns a random prediction for each observation. 1. [Simple Classifier](clcomp21/classifier.py): Standard neural net classifier without any CL-related mechanism. Works in the SL track, but has very poor performance. 2. [Multi-Head / Task Inference Classifier](clcomp21/multihead_classifier.py): Performs multi-head prediction, and a simple form of task inference. Gets better results that the example. 3. [CL Regularized Classifier](clcomp21/regularization_example.py): Adds a simple CL regularization loss to the multihead classifier above. ## Advanced examples: - [RL_and_SL_demo.py](advanced/RL_and_SL_demo.py): Example that shows how the BaseMethod can easily be extended by adding AuxiliaryTasks to it, allows you to get results in both RL and SL. - [continual_rl_demo.py](advanced/ewc_in_rl.py): Demonstrates how to create Reinforcement Learning (RL) Settings, as well as how methods from [stable-baselines3](https://github.com/DLR-RM/stable-baselines3) can be applied to these settings. - [Extending Stable-Baselines3 (RL Settings only)](advanced/ewc_in_rl.py): (Not recommended for new users!) Very specific example which shows how, if you really wanted to, you could extend one or more of the Methods from SB3 with some kind of regularization loss hooking into the internal optimization loop of SB3. ================================================ FILE: examples/__init__.py ================================================ ================================================ FILE: examples/advanced/RL_and_SL_demo.py ================================================ """ Demo where we add the same regularization loss from the other examples, but this time as an `AuxiliaryTask` on top of the BaseMethod. This makes it easy to create CL methods that apply to both RL and SL Settings! """ import copy import random import sys from argparse import Namespace from dataclasses import dataclass from typing import ClassVar, List import torch from simple_parsing import ArgumentParser, field from torch import Tensor # This "hack" is required so we can run `python examples/custom_baseline_demo.py` sys.path.extend([".", ".."]) from sequoia.common.config import Config from sequoia.common.loss import Loss from sequoia.methods import BaseMethod from sequoia.methods.aux_tasks import AuxiliaryTask from sequoia.methods.models import BaseModel, ForwardPass from sequoia.methods.trainer import TrainerConfig from sequoia.settings import Environment, RLSetting, Setting from sequoia.utils.utils import camel_case, dict_intersection from sequoia.utils.logging_utils import get_logger logger = get_logger(__name__) class SimpleRegularizationAuxTask(AuxiliaryTask): """Same regularization loss as in the previous examples, this time implemented as an `AuxiliaryTask`, which gets added to the BaseModel, making it applicable to both RL and SL. This adds a CL regularizaiton loss to the BaseModel. The most important methods of `AuxiliaryTask` is `get_loss`, which should return a `Loss` for the given forward pass and resulting rewards/labels. Take a look at the `AuxiliaryTask` class for more info. """ name: ClassVar[str] = "simple_regularization" @dataclass class Options(AuxiliaryTask.Options): """Hyper-parameters / configuration options of this auxiliary task.""" # Coefficient used to scale this regularization loss before it gets # added to the 'base' loss of the model. coefficient: float = 0.01 # Wether to use the absolute difference of the weights or the difference # in the `regularize` method below. use_abs_diff: bool = False # The norm term for the 'distance' between the current and old weights. distance_norm: int = 2 def __init__( self, *args, name: str = None, options: "SimpleRegularizationAuxTask.Options" = None, **kwargs, ): super().__init__(*args, options=options, name=name, **kwargs) self.options: SimpleRegularizationAuxTask.Options self.previous_task: int = None # TODO: Figure out a clean way to persist this dict into the state_dict. self.previous_model_weights: Dict[str, Tensor] = {} self.n_switches: int = 0 def get_loss(self, forward_pass: ForwardPass, y: Tensor = None) -> Loss: """Get a `Loss` for the given forward pass and resulting rewards/labels. Take a look at the `AuxiliaryTask` class for more info, NOTE: This is the same simplified version of EWC used throughout the other examples: the loss is the P-norm between the current weights and the weights as they were on the begining of the task. Also note, this particular example doesn't actually use the provided arguments. """ if self.previous_task is None: # We're in the first task: do nothing. return Loss(name=self.name) old_weights: Dict[str, Tensor] = self.previous_model_weights new_weights: Dict[str, Tensor] = dict(self.model.named_parameters()) loss = 0.0 for weight_name, (new_w, old_w) in dict_intersection(new_weights, old_weights): loss += torch.dist(new_w, old_w.type_as(new_w), p=self.options.distance_norm) ewc_loss = Loss(name=self.name, loss=loss) return ewc_loss def on_task_switch(self, task_id: int) -> None: """Executed when the task switches (to either a new or known task).""" if not self.enabled: return if self.previous_task is None and self.n_switches == 0: logger.debug(f"Starting the first task, no update.") pass elif task_id is None or task_id != self.previous_task: logger.debug( f"Switching tasks: {self.previous_task} -> {task_id}: " f"Updating the 'anchor' weights." ) self.previous_task = task_id self.previous_model_weights.clear() self.previous_model_weights.update( copy.deepcopy({k: v.detach() for k, v in self.model.named_parameters()}) ) self.n_switches += 1 class CustomizedBaselineModel(BaseModel): @dataclass class HParams(BaseModel.HParams): """Hyper-parameters of our customized baseline model.""" # Hyper-parameters of our simple new auxiliary task. simple_reg: SimpleRegularizationAuxTask.Options = field( default_factory=SimpleRegularizationAuxTask.Options ) def __init__( self, setting: Setting, hparams: "CustomizedBaselineModel.HParams", config: Config, ): super().__init__(setting=setting, hparams=hparams, config=config) self.hp: CustomizedBaselineModel.HParams # Here we add our new auxiliary task: self.add_auxiliary_task(SimpleRegularizationAuxTask(options=self.hp.simple_reg)) # Or, add replay buffers of some sort: self.replay_buffer: List = [] # (...) @dataclass class CustomMethod(BaseMethod, target_setting=Setting): """Example methods which adds regularization to the baseline in RL and SL. This extends the `BaseMethod` by adding the simple regularization auxiliary task defined above to the `BaseModel`. NOTE: Since this class inherits from `BaseMethod`, which targets the `Setting` setting, i.e. the "root" node, it is applicable to all settings, both in RL and SL. However, you could customize the `target_setting` argument above to limit this to any particular subtree (only SL, only RL, only when task labels are present, etc). """ # Hyper-parameters of the customized Baseline Model used by this method. hparams: CustomizedBaselineModel.HParams = field( default_factory=CustomizedBaselineModel.HParams ) def __init__( self, hparams: CustomizedBaselineModel.HParams = None, config: Config = None, trainer_options: TrainerConfig = None, **kwargs, ): super().__init__( hparams=hparams, config=config, trainer_options=trainer_options, **kwargs, ) def create_model(self, setting: Setting) -> CustomizedBaselineModel: """Creates the Model to be used for the given `Setting`.""" return CustomizedBaselineModel(setting=setting, hparams=self.hparams, config=self.config) def configure(self, setting: Setting): """Configure this Method before being trained / tested on this Setting.""" super().configure(setting) # For example, change the value of the coefficient of our # regularization loss when in RL vs SL: if isinstance(setting, RLSetting): self.hparams.simple_reg.coefficient = 0.01 else: self.hparams.simple_reg.coefficient = 1.0 def fit(self, train_env: Environment, valid_env: Environment): """Called by the Setting to let the Method train on a given task. You can do whatever you want with the train and valid environments. As it is currently, in most `Settings`, the valid environment will contain data from only the current task. (See issue at https://github.com/lebrice/Sequoia/issues/46 for more context). """ return super().fit(train_env=train_env, valid_env=valid_env) @classmethod def add_argparse_args(cls, parser: ArgumentParser): """Adds command-line arguments for this Method to an argument parser. NOTE: This doesn't do anything differently than the base implementation, but it's included here just for illustration purposes. """ # 'dest' is where the arguments will be stored on the namespace. dest = camel_case(cls.__qualname__) # Add all command-line arguments. This adds arguments for all fields of # this dataclass. parser.add_arguments(cls, dest=dest) # You could add arguments here if you wanted to: # parser.add_argument("--foo", default=1.23, help="example argument") @classmethod def from_argparse_args(cls, args: Namespace): """Create an instance of this class from the parsed arguments.""" # Retrieve the parsed arguments: dest = camel_case(cls.__qualname__) method: CustomMethod = getattr(args, dest) # You could retrieve other arguments like so: # foo: int = args.foo return method def demo_manual(): """Apply the custom method to a Setting, creating both manually in code.""" # Create any Setting from the tree: from sequoia.settings import TaskIncrementalRLSetting, TaskIncrementalSLSetting # setting = TaskIncrementalSLSetting(dataset="mnist", nb_tasks=5) # SL setting = TaskIncrementalRLSetting( # RL dataset="cartpole", train_task_schedule={ 0: {"gravity": 10, "length": 0.5}, 5000: {"gravity": 10, "length": 1.0}, }, train_max_steps=10_000, ) ## Create the BaseMethod: config = Config(debug=True) trainer_options = TrainerConfig(max_epochs=1) hparams = BaseModel.HParams() base_method = BaseMethod(hparams=hparams, config=config, trainer_options=trainer_options) ## Get the results of the baseline method: base_results = setting.apply(base_method, config=config) ## Create the CustomMethod: config = Config(debug=True) trainer_options = TrainerConfig(max_epochs=1) hparams = CustomizedBaselineModel.HParams() new_method = CustomMethod(hparams=hparams, config=config, trainer_options=trainer_options) ## Get the results for the 'improved' method: new_results = setting.apply(new_method, config=config) print(f"\n\nComparison: BaseMethod vs CustomMethod") print("\n BaseMethod results: ") print(base_results.summary()) print("\n CustomMethod results: ") print(new_results.summary()) def demo_command_line(): """Run the same demo as above, but customizing the Setting and Method from the command-line. NOTE: Remember to uncomment the function call below to use this instead of demo_simple! """ ## Create the `Setting` and the `Config` from the command-line, like in ## the other examples. parser = ArgumentParser(description=__doc__) ## Add command-line arguments for any Setting in the tree: from sequoia.settings import TaskIncrementalRLSetting, TaskIncrementalSLSetting # parser.add_arguments(TaskIncrementalSLSetting, dest="setting") parser.add_arguments(TaskIncrementalRLSetting, dest="setting") parser.add_arguments(Config, dest="config") # Add the command-line arguments for our CustomMethod (including the # arguments for our simple regularization aux task). CustomMethod.add_argparse_args(parser, dest="method") args = parser.parse_args() setting: ClassIncrementalSetting = args.setting config: Config = args.config # Create the BaseMethod: base_method = BaseMethod.from_argparse_args(args, dest="method") # Get the results of the BaseMethod: base_results = setting.apply(base_method, config=config) ## Create the CustomMethod: new_method = CustomMethod.from_argparse_args(args, dest="method") # Get the results for the CustomMethod: new_results = setting.apply(new_method, config=config) print(f"\n\nComparison: BaseMethod vs CustomMethod:") print(base_results.summary()) print(new_results.summary()) if __name__ == "__main__": demo_manual() # demo_command_line() ================================================ FILE: examples/advanced/continual_rl_demo.py ================================================ import sys # This "hack" is required so we can run `python examples/continual_rl_demo.py` sys.path.extend([".", ".."]) from sequoia.methods.stable_baselines3_methods import A2CMethod, DQNMethod from sequoia.settings import ( ContinualRLSetting, IncrementalRLSetting, RLSetting, TaskIncrementalRLSetting, ) if __name__ == "__main__": task_schedule = { 0: {"gravity": 10, "length": 0.2}, 1000: {"gravity": 100, "length": 1.2}, 2000: {"gravity": 10, "length": 0.2}, } setting = ContinualRLSetting( # setting = IncrementalRLSetting( # setting = TaskIncrementalRLSetting( # setting = RLSetting( dataset="CartPole-v1", train_max_steps=2000, train_task_schedule=task_schedule, ) # Create the method to use here: # NOTE: The DQN method doesn't seem to work nearly as well as A2C. # method = DQNMethod(train_steps_per_task=1_000) method = A2CMethod(train_steps_per_task=1_000) # You could change the hyper-parameters of the method too: # method.hparams.buffer_size = 100 results = setting.apply(method) print(results.summary()) ================================================ FILE: examples/advanced/ewc_in_rl.py ================================================ """ Example of how to add a simplified regularization method to algos from stable-baseline-3. """ from collections import deque from copy import deepcopy from dataclasses import dataclass from typing import ClassVar, Dict, List, Optional, Type, TypeVar, Union import gym import torch from nngeometry.generator.jacobian import Jacobian from nngeometry.layercollection import LayerCollection from nngeometry.object.pspace import PMatAbstract, PMatDiag, PMatKFAC, PVector from simple_parsing import choice from stable_baselines3.common.base_class import BaseAlgorithm from stable_baselines3.common.policies import BasePolicy from torch import Tensor from torch.utils.data import DataLoader, TensorDataset from sequoia.methods import register_method from sequoia.methods.stable_baselines3_methods import StableBaselines3Method from sequoia.methods.stable_baselines3_methods.policy_wrapper import PolicyWrapper from sequoia.settings import TaskIncrementalRLSetting from sequoia.settings.base import Actions, Environment, Method, Observations from sequoia.utils.utils import dict_intersection from sequoia.utils.logging_utils import get_logger logger = get_logger(__name__) Policy = TypeVar("Policy", bound=BasePolicy) class NormRegularizer(PolicyWrapper[Policy]): """A Wrapper class that adds a `on_task_switch` and a `ewc_loss` method to an nn.Module (in this particular case, a Policy from SB3.) By subclassing PolicyWrapper, this is able to leverage some 'hooks' into the optimizer of the policy. """ def __init__(self: Policy, *args, reg_coefficient: float = 1.0, ewc_p_norm: int = 2, **kwargs): super().__init__(*args, **kwargs) self.reg_coefficient = reg_coefficient self.ewc_p_norm = ewc_p_norm self.previous_model_weights: Dict[str, Tensor] = {} self._previous_task: Optional[int] = None self._n_switches: int = 0 def on_task_switch(self: Policy, task_id: Optional[int], *args, **kwargs) -> None: """Executed when the task switches (to either a known or unknown task).""" logger.info(f"On task switch called: task_id={task_id}") if self._previous_task is None and self._n_switches == 0 and not task_id: logger.info("Starting the first task, no EWC update.") elif task_id is None or task_id != self._previous_task: # NOTE: We also switch between unknown tasks. logger.info( f"Switching tasks: {self._previous_task} -> {task_id}: " f"Updating the EWC 'anchor' weights." ) self._previous_task = task_id self.previous_model_weights.clear() self.previous_model_weights.update( deepcopy({k: v.detach() for k, v in self.named_parameters()}) ) self._n_switches += 1 def get_loss(self: Policy) -> Union[float, Tensor]: """This will get called before the call to `policy.optimizer.step()` from within the `train` method of the algos from stable-baselines3. You can use this to return some kind of loss tensor to use. """ return self.reg_coefficient * self.ewc_loss() def after_zero_grad(self: Policy): """Called after `self.policy.optimizer.zero_grad()` in the training loop of the SB3 algos. """ # Backpropagate the loss here, by default, so that any grad clipping # also affects the grads of the loss, for instance. wrapper_loss = self.get_loss() if isinstance(wrapper_loss, Tensor) and wrapper_loss != 0.0 and wrapper_loss.requires_grad: logger.info(f"{type(self).__name__} loss: {wrapper_loss.item()}") wrapper_loss.backward(retain_graph=True) def before_optimizer_step(self: Policy): """Called before `self.policy.optimizer.step()` in the training loop of the SB3 algos. """ def ewc_loss(self: Policy) -> Union[float, Tensor]: """Gets an 'ewc-like' regularization loss. NOTE: This is a simplified version of EWC where the loss is the P-norm between the current weights and the weights as they were on the begining of the task. """ if self._previous_task is None: # We're in the first task: do nothing. return 0.0 old_weights: Dict[str, Tensor] = self.previous_model_weights new_weights: Dict[str, Tensor] = dict(self.named_parameters()) loss = 0.0 for weight_name, (new_w, old_w) in dict_intersection(new_weights, old_weights): loss += torch.dist(new_w, old_w.type_as(new_w), p=self.ewc_p_norm) return loss class EWCPolicy(NormRegularizer): """A Wrapper class that adds a `on_task_switch` and a `ewc_loss` method to an nn.Module (in this particular case, a Policy from SB3) and implements the EWC method. """ def __init__( self: Policy, *args, reg_coefficient: float = 1.0, ewc_p_norm: int = 2, fim_representation: PMatAbstract = PMatDiag, **kwargs, ): super().__init__(*args, reg_coefficient, ewc_p_norm, **kwargs) self.FIMs: List[PMatAbstract] = None self.previous_model_weights: PVector = None self.FIM_representation = fim_representation def consolidate(self, new_fims: List[PMatAbstract], task: int) -> None: """ Consolidates the previous FIMs and the new onces. See online EWC in https://arxiv.org/pdf/1805.06370.pdf. """ if self.FIMs is None: self.FIMs = new_fims return assert len(new_fims) == len(self.FIMs) for i, (fim_previous, fim_new) in enumerate(zip(self.FIMs, new_fims)): if fim_previous is None: self.FIMs[i] = fim_new else: # consolidate the FIMs self.FIMs[i] = EWCPolicy._consolidate_fims(fim_previous, fim_new, task) @staticmethod def _consolidate_fims( fim_previous: PMatAbstract, fim_new: PMatAbstract, task: int ) -> PMatAbstract: # consolidate the fim_new into fim_previous in place if isinstance(fim_new, PMatDiag): fim_previous.data = ((deepcopy(fim_new.data)) + fim_previous.data * (task)) / (task + 1) elif isinstance(fim_new.data, dict): for (n, p), (n_, p_) in zip(fim_previous.data.items(), fim_new.data.items()): for item, item_ in zip(p, p_): item.data = ((item.data * (task)) + deepcopy(item_.data)) / (task + 1) return fim_previous def on_task_switch( self: Policy, task_id: Optional[int], dataloader: DataLoader, method: str = "a2c" ) -> None: """Executed when the task switches (to either a known or unknown task).""" logger.info(f"On task switch called: task_id={task_id}") if self._previous_task is None and self._n_switches == 0 and not task_id: self._previous_task = task_id logger.info("Starting the first task, no EWC update.") self._n_switches += 1 elif task_id is None or self._previous_task is None or task_id > self._previous_task: # we dont want to go here at test tiem # NOTE: We also switch between unknown tasks. logger.info( f"Switching tasks: {self._previous_task} -> {task_id}: " f"Updating the EWC 'anchor' weights." ) self._previous_task = task_id self.previous_model_weights = PVector.from_model(self).clone().detach() # TODO: keepng to FIMs might be not the optimal way of doing this new_fims = [] if method == "dqn": function = self.q_net n_output = self.action_space.n else: function = self n_output = 1 # TODO: Import this FIM function, from wherever it was defined. new_fim = FIM( model=self, loader=dataloader, representation=self.FIM_representation, n_output=n_output, variant=method, function=function, device=self.device.type, ) new_fims.append(new_fim) if method == "a2c": # apply EWC also to the value net new_fim_critic = FIM( model=self, loader=dataloader, representation=self.FIM_representation, n_output=1, variant="regression", function=lambda *x: self(x[0])[1], device=self.device.type, ) new_fims.append(new_fim_critic) self.consolidate(new_fims, task=self._previous_task) self._n_switches += 1 def ewc_loss(self: Policy) -> Union[float, Tensor]: """Gets an 'ewc-like' regularization loss.""" regularizer = 0.0 if self._previous_task is None or self.reg_coefficient == 0 or self.FIMs is None: # We're in the first task: do nothing. return regularizer v_current = PVector.from_model(self) for fim in self.FIMs: regularizer += fim.vTMv(v_current - self.previous_model_weights) return regularizer from sequoia.methods.stable_baselines3_methods import ( A2CModel, DDPGModel, DQNModel, PPOModel, SACModel, TD3Model, ) @register_method @dataclass class ExampleRegularizationMethod(StableBaselines3Method): Model: ClassVar[Type[BaseAlgorithm]] # You could use any of these 'backbones' from SB3: Model = A2CModel # Works great! (fastest) # Model = PPOModel # Works great! (somewhat fast) # Model = SACModel # Works (seems to be quite a bit slower). # These don't yet work, they have the same error, which seems to be # related to the action space being Discrete: # stable_baselines3/td3/td3.py", line 143, in train # noise = replay_data.actions.clone().data.normal_(0, self.target_policy_noise) # RuntimeError: "normal_kernel_cuda" not implemented for 'Long' # Model = TD3Model # TODO # Model = DDPGModel # TODO # Model = DQNModel # Doesn't work: predictions have more than one value?! # Coefficient for the EWC-like loss. reg_coefficient: float = 1.0 # norm of the 'distance' used in the ewc-like loss above. ewc_p_norm: int = 2 def create_model(self, train_env: gym.Env, valid_env: gym.Env) -> BaseAlgorithm: # Create the model, as usual: model = super().create_model(train_env, valid_env) # 'Wrap' the algorithm's policy with the EWC wrapper. model = NormRegularizer.wrap_algorithm( model, reg_coefficient=self.reg_coefficient, ewc_p_norm=self.ewc_p_norm, ) return model def on_task_switch(self, task_id: Optional[int]) -> None: """Called when switching tasks in a CL setting. If task labels are available, `task_id` will correspond to the index of the new task. Otherwise, if task labels aren't available, `task_id` will be `None`. todo: use this to customize how your method handles task transitions. """ if self.model: self.model.policy.on_task_switch(task_id) @register_method @dataclass class EWCExampleMethod(StableBaselines3Method): Model: ClassVar[Type[BaseAlgorithm]] # Model = A2CModel # Works great! (fastest) Model = DQNModel # Works great! (fastest) # Coefficient for the EWC-like loss. reg_coefficient: float = 1.0 # Number of observations to use for FIM calculation total_steps_fim: int = 1000 # Fisher information type (diagonal or block diagobnal) fim_representation: PMatAbstract = choice( {"diagonal": PMatDiag, "block_diagonal": PMatKFAC}, default=PMatKFAC ) def create_model(self, train_env: gym.Env, valid_env: gym.Env) -> BaseAlgorithm: # Create the model, as usual: model = super().create_model(train_env, valid_env) # 'Wrap' the algorithm's policy with the EWC wrapper. model = EWCPolicy.wrap_algorithm( model, reg_coefficient=self.reg_coefficient, fim_representation=self.fim_representation, ) return model def on_task_switch(self, task_id: Optional[int]) -> None: """Called when switching tasks in a CL setting. If task labels are available, `task_id` will correspond to the index of the new task. Otherwise, if task labels aren't available, `task_id` will be `None`. """ if self.model: # create onbservation collection to use for FIM calculation observation_collection = [] while len(observation_collection) < self.total_steps_fim: state = self.model.env.reset() for _ in range(1000): action = self.get_actions(Observations(state), self.model.env.action_space) state, _, done, _ = self.model.env.step(action) observation_collection.append(torch.tensor(state).to(self.model.device)) if done: break dataloader = DataLoader( TensorDataset(torch.cat(observation_collection)), batch_size=100, shuffle=False ) if "a2c" in str(self.model.__class__): rl_method = "a2c" elif "dqn" in str(self.model.__class__): rl_method = "dqn" else: raise NotImplementedError self.model.policy.on_task_switch(task_id, dataloader, method=rl_method) if __name__ == "__main__": setting = TaskIncrementalRLSetting( dataset="cartpole", nb_tasks=2, train_task_schedule={ 0: {"gravity": 10, "length": 0.3}, 1000: {"gravity": 10, "length": 0.5}, # second task is 'easier' than the first one. }, train_max_steps=2000, ) method = EWCExampleMethod(reg_coefficient=0.0) results_without_reg = setting.apply(method) method = EWCExampleMethod(reg_coefficient=100) results_with_reg = setting.apply(method) print("-" * 40) print("WITHOUT EWC ") print(results_without_reg.summary()) print(f"With EWC (coefficient={method.reg_coefficient}):") print(results_with_reg.summary()) ================================================ FILE: examples/advanced/hat_demo.py ================================================ import sys from argparse import Namespace from dataclasses import dataclass from typing import Dict, NamedTuple, Optional, Tuple import gym import numpy as np import torch import tqdm from gym import Space, spaces from numpy import inf from simple_parsing import ArgumentParser from torch import Tensor from sequoia.common import Config from sequoia.common.spaces import Image from sequoia.methods import register_method from sequoia.settings import Environment, Method from sequoia.settings.sl import TaskIncrementalSLSetting from sequoia.settings.sl.environment import PassiveEnvironment from sequoia.settings.sl.incremental import Actions, Observations, Rewards class Masks(NamedTuple): """Named tuple for the masked tensors created in the HATNet.""" gc1: Tensor gc2: Tensor gc3: Tensor gfc1: Tensor gfc2: Tensor class HatNet(torch.nn.Module): """ @inproceedings{serra2018overcoming, title={Overcoming Catastrophic Forgetting with Hard Attention to the Task}, author={Serra, Joan and Suris, Didac and Miron, Marius and Karatzoglou, Alexandros}, booktitle={International Conference on Machine Learning}, pages={4548--4557}, year={2018} } The model is where the model weights are initialized. Just like a classic PyTorch, here the different layers and components of the model are defined """ def __init__(self, image_space: Image, n_classes_per_task: Dict[int, int], s_hat: int = 50): super().__init__() ncha = image_space.channels size = image_space.width self.n_classes_per_task = n_classes_per_task self.s_hat = s_hat self.c1 = torch.nn.Conv2d(ncha, 64, kernel_size=size // 8) s = compute_conv_output_size(size, size // 8) s //= 2 self.c2 = torch.nn.Conv2d(64, 128, kernel_size=size // 10) s = compute_conv_output_size(s, size // 10) s //= 2 self.c3 = torch.nn.Conv2d(128, 256, kernel_size=2) s = compute_conv_output_size(s, 2) s //= 2 self.smid = s self.maxpool = torch.nn.MaxPool2d(2) self.relu = torch.nn.ReLU() self.drop1 = torch.nn.Dropout(0.2) self.drop2 = torch.nn.Dropout(0.5) self.fc1 = torch.nn.Linear(256 * self.smid * self.smid, 2048) self.fc2 = torch.nn.Linear(2048, 2048) self.output_layers = torch.nn.ModuleList() n_tasks = len(self.n_classes_per_task) # TODO: (@lebrice) Here I'm 'fixing' this, by making it so each output head has # as many outputs as there are classes in total. It's not super efficient, but # it should work. total_classes = sum(self.n_classes_per_task.values()) for task_index, n_classes_in_task in self.n_classes_per_task.items(): self.output_layers.append(torch.nn.Linear(2048, total_classes)) self.gate = torch.nn.Sigmoid() # All embedding stuff should start with 'e' self.ec1 = torch.nn.Embedding(n_tasks, 64) self.ec2 = torch.nn.Embedding(n_tasks, 128) self.ec3 = torch.nn.Embedding(n_tasks, 256) self.efc1 = torch.nn.Embedding(n_tasks, 2048) self.efc2 = torch.nn.Embedding(n_tasks, 2048) self.flatten = torch.nn.Flatten() self.loss = torch.nn.CrossEntropyLoss() self.current_task: Optional[int] = 0 def forward(self, observations: TaskIncrementalSLSetting.Observations) -> Tuple[Tensor, Masks]: observations.as_list_of_tuples() x = observations.x t = observations.task_labels # BUG: This won't work if task_labels is None (which is the case at # test-time in the ClassIncrementalSetting) masks = self.mask(t, s_hat=self.s_hat) gc1, gc2, gc3, gfc1, gfc2 = masks # Gated h = self.maxpool(self.drop1(self.relu(self.c1(x)))) h = h * gc1.unsqueeze(2).unsqueeze(3) h = self.maxpool(self.drop1(self.relu(self.c2(h)))) h = h * gc2.unsqueeze(2).unsqueeze(3) h = self.maxpool(self.drop2(self.relu(self.c3(h)))) h = h * gc3.unsqueeze(2).unsqueeze(3) h = self.flatten(h) h = self.drop2(self.relu(self.fc1(h))) h = h * gfc1.expand_as(h) h = self.drop2(self.relu(self.fc2(h))) h = h * gfc2.expand_as(h) # Each batch can have elements of more than one Task (in test) # In Task Incremental Learning, each task have it own classification head. y: Optional[Tensor] = None task_masks = {} for task_id in set(t.tolist()): task_mask = t == task_id task_masks[task_id] = task_mask y_pred_t = self.output_layers[task_id](h.clone()) if y is None: y = y_pred_t else: y[task_mask] = y_pred_t[task_mask] assert y is not None return y, masks def mask(self, t: Tensor, s_hat: float) -> Masks: gc1 = self.gate(s_hat * self.ec1(t)) gc2 = self.gate(s_hat * self.ec2(t)) gc3 = self.gate(s_hat * self.ec3(t)) gfc1 = self.gate(s_hat * self.efc1(t)) gfc2 = self.gate(s_hat * self.efc2(t)) return Masks(gc1, gc2, gc3, gfc1, gfc2) def shared_step( self, batch: Tuple[Observations, Optional[Rewards]], environment: Environment ) -> Tuple[Tensor, Dict]: """Shared step used for both training and validation. Parameters ---------- batch : Tuple[Observations, Optional[Rewards]] Batch containing Observations, and optional Rewards. When the Rewards are None, it means that we'll need to provide the Environment with actions before we can get the Rewards (e.g. image labels) back. This happens for example when being applied in a Setting which cares about sample efficiency or training performance, for example. environment : Environment The environment we're currently interacting with. Used to provide the rewards when they aren't already part of the batch (as mentioned above). Returns ------- Tuple[Tensor, Dict] The Loss tensor, and a dict of metrics to be logged. """ # Since we're training on a Passive environment, we will get both observations # and rewards, unless we're being evaluated based on our training performance, # in which case we will need to send actions to the environments before we can # get the corresponding rewards (image labels) back. observations: Observations = batch[0] rewards: Optional[Rewards] = batch[1] # Get the predictions: logits, _ = self(observations) y_pred = logits.argmax(-1) if rewards is None: # If the rewards in the batch were None, it means we're expected to give # actions before we can get rewards back from the environment. # This happens when the Setting is monitoring our training performance. rewards = environment.send(Actions(y_pred)) assert rewards is not None image_labels = rewards.y loss = self.loss(logits, image_labels) accuracy = (y_pred == image_labels).sum().float() / len(image_labels) metrics_dict = {"accuracy": accuracy} return loss, metrics_dict def compute_conv_output_size( Lin: int, kernel_size: int, stride: int = 1, padding: int = 0, dilation: int = 1 ) -> int: return int(np.floor((Lin + 2 * padding - dilation * (kernel_size - 1) - 1) / float(stride) + 1)) @register_method class HatDemoMethod(Method, target_setting=TaskIncrementalSLSetting): """ Here we implement the method according to the characteristics and methodology of the current proposal. It should be as much as possible agnostic to the model and setting we are going to use. The method proposed can be specific to a setting to make comparisons easier. Here what we control is the model's training process, given a setting that delivers data in a certain way. """ @dataclass class HParams: """Hyper-parameters of the Settings.""" # Learning rate of the optimizer. learning_rate: float = 0.001 # Batch size batch_size: int = 128 # weight/importance of the task embedding to the gate function s_hat: float = 50.0 # Maximum number of training epochs per task max_epochs_per_task: int = 2 def __init__(self, hparams: HParams = None): self.hparams: HatDemoMethod.HParams = hparams or self.HParams() # We will create those when `configure` will be called, before training. self.model: HatNet self.optimizer: torch.optim.Optimizer def configure(self, setting: TaskIncrementalSLSetting): """Called before the method is applied on a setting (before training). You can use this to instantiate your model, for instance, since this is where you get access to the observation & action spaces. """ setting.batch_size = self.hparams.batch_size assert ( setting.increment == setting.test_increment ), "Assuming same number of classes per task for training and testing." n_classes_per_task = { i: setting.num_classes_in_task(i, train=True) for i in range(setting.nb_tasks) } image_space: Image = setting.observation_space["x"] self.model = HatNet( image_space=image_space, n_classes_per_task=n_classes_per_task, s_hat=self.hparams.s_hat, ) self.optimizer = torch.optim.Adam( self.model.parameters(), lr=self.hparams.learning_rate, ) def fit(self, train_env: PassiveEnvironment, valid_env: PassiveEnvironment): """ Train loop Different Settings can return elements from tasks in an other way, be it class incremental, task incremental, etc. Batch can have information about en environment, rewards, input, task labels, etc. And we call the forward training function of our method, independent of the settings """ # configure() will have been called by the setting before we get here, best_val_loss = inf best_epoch = 0 for epoch in range(self.hparams.max_epochs_per_task): self.model.train() print(f"Starting epoch {epoch}") # Training loop: with tqdm.tqdm(train_env) as train_pbar: postfix = {} train_pbar.set_description(f"Training Epoch {epoch}") for i, batch in enumerate(train_pbar): loss, metrics_dict = self.model.shared_step( batch, environment=train_env, ) self.optimizer.zero_grad() loss.backward() self.optimizer.step() postfix.update(metrics_dict) train_pbar.set_postfix(postfix) # Validation loop: self.model.eval() torch.set_grad_enabled(False) with tqdm.tqdm(valid_env) as val_pbar: postfix = {} val_pbar.set_description(f"Validation Epoch {epoch}") epoch_val_loss = 0.0 for i, batch in enumerate(val_pbar): batch_val_loss, metrics_dict = self.model.shared_step( batch, environment=valid_env, ) epoch_val_loss += batch_val_loss postfix.update(metrics_dict, val_loss=epoch_val_loss) val_pbar.set_postfix(postfix) torch.set_grad_enabled(True) if epoch_val_loss < best_val_loss: best_val_loss = epoch_val_loss best_epoch = i def get_actions(self, observations: Observations, action_space: gym.Space) -> Actions: """Get a batch of predictions (aka actions) for these observations.""" with torch.no_grad(): logits, _ = self.model(observations) # Get the predicted classes y_pred = logits.argmax(dim=-1) return self.target_setting.Actions(y_pred) def on_task_switch(self, task_id: Optional[int]): # This method gets called if task boundaries are known in the current # setting. Furthermore, if task labels are available, task_id will be # the index of the new task. If not, task_id will be None. # TODO: Does this method actually work when task_id is None? self.model.current_task = task_id @classmethod def add_argparse_args(cls, parser: ArgumentParser) -> None: parser.add_arguments(cls.HParams, dest="hparams") # You can also add arguments as usual: # parser.add_argument("--foo", default=123) @classmethod def from_argparse_args(cls, args: Namespace) -> "HatDemoMethod": hparams: HatDemoMethod.HParams = args.hparams # foo: int = args.foo method = cls(hparams=hparams) return method if __name__ == "__main__": # Example: Evaluate a Method on a single CL setting: parser = ArgumentParser(description=__doc__, add_dest_to_option_strings=False) """ We must define 3 main components: 1.- Setting: It is the continual learning scenario that we are working, SL or RL, TI or CI Each settings has it own parameters that can be customized. 2.- Model: Is the parameters and layers of the model, just like in PyTorch. We can use a predefined model or create your own 3.- Method: It is how we are going to use what the settings give us to train our model. Same as before, we can define our own or use pre-defined Methods. """ ## Add arguments for the Method, the Setting, and the Config. ## (Config contains options like the log_dir, the data_dir, etc.) HatDemoMethod.add_argparse_args(parser, dest="method") parser.add_arguments(TaskIncrementalSLSetting, dest="setting") parser.add_arguments(Config, "config") args = parser.parse_args() ## Create the Method from the args, and extract the Setting, and the Config: method: HatDemoMethod = HatDemoMethod.from_argparse_args(args, dest="method") setting: TaskIncrementalSLSetting = args.setting config: Config = args.config ## Apply the method to the setting, optionally passing in a Config, ## producing Results. results = setting.apply(method, config=config) print(results.summary()) print(f"objective: {results.objective}") ================================================ FILE: examples/advanced/hparam_tuning.py ================================================ """Runs a hyper-parameter tuning sweep, using Orion for HPO and wandb for visualization. # PREREQUISITES: 1. (Optional): If you want to run the sweep on the monsterkong env: At the time of writing, the monsterkong repo is private. Once the challenge is out, it will most probably be made public. In the meantime, you'll need to ask @mattriemer for access to the MonsterKong_examples repo. ``` pip install -e .[rl] ``` 2. Install the repo, along with the optional dependencies for Hyper-Parameter Optimization (HPO): ```console pip install -e .[hpo] ``` NOTE: You can also fuse the two steps above with `pip install -e .[rl,hpo]` 3. (Optional) Setup a database to hold the hyper-parameter configurations, following the [Orion database configuration documentation](https://orion.readthedocs.io/en/stable/install/database.html) The quickest way to get this setup is to run the `orion db setup` wizard, entering "pickleddb" as the database type: ```console $ orion db setup Enter the database type: (default: mongodb) pickleddb Enter the database name: (default: test) Enter the database host: (default: localhost) Default configuration file will be saved at: /home//.config/orion.core/orion_config.yaml ``` """ import wandb from sequoia.common import Config from sequoia.methods.base_method import BaseMethod from sequoia.settings import Results, Setting, TraditionalSLSetting from sequoia.utils.logging_utils import get_logger logger = get_logger(__name__) if __name__ == "__main__": from simple_parsing import ArgumentParser ## Create the Setting: from sequoia.settings import RLSetting setting = RLSetting(dataset="monsterkong") # from sequoia.settings import TaskIncrementalSLSetting # setting = TaskIncrementalSLSetting(dataset="cifar10") ## Create the BaseMethod: # Option 1: Create the method manually: # method = BaseMethod() # Option 2: From the command-line: method, unused_args = BaseMethod.from_known_args() # allow unused args. # parser = ArgumentParser(description=__doc__) # BaseMethod.add_argparse_args(parser, dest="method") # args, unused_args = parser.parse_known_args() # method: BaseMethod = BaseMethod.from_argparse_args(args, dest="method") # Search space for the Hyper-Parameter optimization algorithm. # NOTE: This is just a copy of the spaces that are auto-generated from the fields of # the `BaseModel.HParams` class. You can change those as you wish though. search_space = { "learning_rate": "loguniform(1e-06, 1e-02, default_value=0.001)", "weight_decay": "loguniform(1e-12, 1e-03, default_value=1e-06)", "optimizer": "choices(['sgd', 'adam', 'rmsprop'], default_value='adam')", "encoder": "choices({'resnet18': 0.5, 'simple_convnet': 0.5}, default_value='resnet18')", "output_head": { "activation": "choices(['relu', 'tanh', 'elu', 'gelu', 'relu6'], default_value='tanh')", "dropout_prob": "uniform(0, 0.8, default_value=0.2)", "gamma": "uniform(0.9, 0.999, default_value=0.99)", "normalize_advantages": "choices([True, False])", "actor_loss_coef": "uniform(0.1, 1, default_value=0.5)", "critic_loss_coef": "uniform(0.1, 1, default_value=0.5)", "entropy_loss_coef": "uniform(0, 1, discrete=True, default_value=0)", }, } best_hparams, best_results = method.hparam_sweep( setting, search_space=search_space, experiment_id="123" ) print(f"Best hparams: {best_hparams}, best perf: {best_results}") # results = setting.apply(method, config=Config(debug=True)) ================================================ FILE: examples/advanced/pnn/__init__.py ================================================ ================================================ FILE: examples/advanced/pnn/layers.py ================================================ import torch.nn as nn import torch.nn.functional as F from torchvision import transforms """ Based on https://github.com/TomVeniat/ProgressiveNeuralNetworks.pytorch """ class PNNConvLayer(nn.Module): def __init__(self, col, depth, n_in, n_out, kernel_size=3): super(PNNConvLayer, self).__init__() self.col = col self.layer = nn.Conv2d(n_in, n_out, kernel_size, stride=2, padding=1) self.u = nn.ModuleList() if depth > 0: self.u.extend( [nn.Conv2d(n_in, n_out, kernel_size, stride=2, padding=1) for _ in range(col)] ) def forward(self, inputs): if not isinstance(inputs, list): inputs = [inputs] cur_column_out = self.layer(inputs[-1]) prev_columns_out = [mod(x) for mod, x in zip(self.u, inputs)] return F.relu(cur_column_out + sum(prev_columns_out)) class PNNLinearBlock(nn.Module): def __init__(self, col: int, depth: int, n_in: int, n_out: int): super(PNNLinearBlock, self).__init__() self.layer = nn.Linear(n_in, n_out) self.u = nn.ModuleList() if depth > 0: self.u.extend([nn.Linear(n_in, n_out) for _ in range(col)]) def forward(self, inputs): if not isinstance(inputs, list): inputs = [inputs] cur_column_out = self.layer(inputs[-1]) prev_columns_out = [mod(x) for mod, x in zip(self.u, inputs)] return F.relu(cur_column_out + sum(prev_columns_out)) ================================================ FILE: examples/advanced/pnn/model_rl.py ================================================ import numpy as np import torch import torch.nn as nn import torch.nn.functional as F from torchvision import transforms from .layers import PNNConvLayer, PNNLinearBlock class PnnA2CAgent(nn.Module): """ @article{rusu2016progressive, title={Progressive neural networks}, author={Rusu, Andrei A and Rabinowitz, Neil C and Desjardins, Guillaume and Soyer, Hubert and Kirkpatrick, James and Kavukcuoglu, Koray and Pascanu, Razvan and Hadsell, Raia}, journal={arXiv preprint arXiv:1606.04671}, year={2016} } """ def __init__(self, arch="mlp", hidden_size=256): super(PnnA2CAgent, self).__init__() self.columns_actor = nn.ModuleList([]) self.columns_critic = nn.ModuleList([]) self.columns_conv = nn.ModuleList([]) self.arch = arch self.hidden_size = hidden_size # Original size 3 x 400 x 600 self.transformation = transforms.Compose( [ transforms.ToPILImage(), transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), ] ) def forward(self, observations): assert ( self.columns_actor ), "PNN should at least have one column (missing call to `new_task` ?)" t = observations.task_labels if self.arch == "mlp": x = torch.from_numpy(observations.x).unsqueeze(0).float() inputs_critic = [c[1](c[0](x)) for c in self.columns_critic] inputs_actor = [c[1](c[0](x)) for c in self.columns_actor] outputs_critic = [] outputs_actor = [] for i, column in enumerate(self.columns_critic): outputs_critic.append(column[2](inputs_critic[: i + 1])) outputs_actor.append(self.columns_actor[i][2](inputs_actor[: i + 1])) ind_depth = 3 else: x = self.transfor_img(observations.x).unsqueeze(0).float() inputs = [c[1](c[0](x)) for c in self.columns_conv] outputs = [] for i, column in enumerate(self.columns_conv): outputs.append(column[3](column[2](inputs[: i + 1]))) inputs = outputs outputs = [] for i, column in enumerate(self.columns_conv): outputs.append(column[5](column[4](inputs[: i + 1]))) inputs_critic = [c[6](outputs[i]).view(1, -1) for i, c in enumerate(self.columns_conv)] inputs_actor = inputs_critic[:] outputs_critic = [] outputs_actor = [] for i, column in enumerate(self.columns_critic): outputs_critic.append(column[0](inputs_critic[: i + 1])) outputs_actor.append(self.columns_actor[i][0](inputs_actor[: i + 1])) ind_depth = 1 critic = [] for i, column in enumerate(self.columns_critic): critic.append(column[ind_depth](outputs_critic[i])) actor = [] for i, column in enumerate(self.columns_actor): actor.append(F.softmax(column[ind_depth](outputs_actor[i]), dim=1)) return critic[t], actor[t] def new_task(self, device, num_inputs, num_actions=5): task_id = len(self.columns_actor) if self.arch == "conv": sizes = [num_inputs, 32, 64, self.hidden_size] modules_conv = nn.Sequential() modules_conv.add_module("Conv1", PNNConvLayer(task_id, 0, sizes[0], sizes[1])) modules_conv.add_module("MaxPool1", nn.MaxPool2d(3)) modules_conv.add_module("Conv2", PNNConvLayer(task_id, 1, sizes[1], sizes[2])) modules_conv.add_module("MaxPool2", nn.MaxPool2d(3)) modules_conv.add_module("Conv3", PNNConvLayer(task_id, 2, sizes[2], sizes[3])) modules_conv.add_module("MaxPool3", nn.MaxPool2d(3)) modules_conv.add_module("globavgpool2d", nn.AdaptiveAvgPool2d((1, 1))) self.columns_conv.append(modules_conv) modules_actor = nn.Sequential() modules_critic = nn.Sequential() if self.arch == "mlp": modules_actor.add_module("linAc1", nn.Linear(num_inputs, self.hidden_size)) modules_actor.add_module("relAc", nn.ReLU(inplace=True)) modules_actor.add_module( "linAc2", PNNLinearBlock(task_id, 1, self.hidden_size, self.hidden_size) ) modules_actor.add_module("linAc3", nn.Linear(self.hidden_size, num_actions)) if self.arch == "mlp": modules_critic.add_module("linCr1", nn.Linear(num_inputs, self.hidden_size)) modules_critic.add_module("relCr", nn.ReLU(inplace=True)) modules_critic.add_module( "linCr2", PNNLinearBlock(task_id, 1, self.hidden_size, self.hidden_size) ) modules_critic.add_module("linCr3", nn.Linear(self.hidden_size, 1)) self.columns_actor.append(modules_actor) self.columns_critic.append(modules_critic) print("Add column of the new task") def unfreeze_columns(self): for i, c in enumerate(self.columns_actor): for params in c.parameters(): params.requires_grad = True for params in self.columns_critic[i].parameters(): params.requires_grad = True for i, c in enumerate(self.columns_conv): for params in c.parameters(): params.requires_grad = True def freeze_columns(self, skip=None): if skip == None: skip = [] self.unfreeze_columns() for i, c in enumerate(self.columns_actor): if i not in skip: for params in c.parameters(): params.requires_grad = False for params in self.columns_critic[i].parameters(): params.requires_grad = False for i, c in enumerate(self.columns_conv): if i not in skip: for params in c.parameters(): params.requires_grad = False print("Freeze columns from previous tasks") def parameters(self, task_id): param = [] for p in self.columns_critic[task_id].parameters(): param.append(p) for p in self.columns_actor[task_id].parameters(): param.append(p) if len(self.columns_conv) > 0: for p in self.columns_conv[task_id].parameters(): param.append(p) return param def transfor_img(self, img): return self.transformation(img) # return lambda img: imresize(img[35:195].mean(2), (80,80)).astype(np.float32).reshape(1,80,80)/255. ================================================ FILE: examples/advanced/pnn/model_sl.py ================================================ from typing import Dict, List, Optional, Tuple import numpy as np import torch import torch.nn as nn import torch.nn.functional as F from torch import Tensor from sequoia.settings import Actions, PassiveEnvironment from sequoia.settings.sl.incremental import Observations, Rewards from .layers import PNNConvLayer, PNNLinearBlock class PnnClassifier(nn.Module): """ @article{rusu2016progressive, title={Progressive neural networks}, author={Rusu, Andrei A and Rabinowitz, Neil C and Desjardins, Guillaume and Soyer, Hubert and Kirkpatrick, James and Kavukcuoglu, Koray and Pascanu, Razvan and Hadsell, Raia}, journal={arXiv preprint arXiv:1606.04671}, year={2016} } """ def __init__(self, n_layers): super().__init__() self.n_layers = n_layers self.columns = nn.ModuleList([]) self.loss = torch.nn.CrossEntropyLoss() self.device = None self.n_tasks = 0 self.n_classes_per_task: List[int] = [] def forward(self, observations): assert self.columns, "PNN should at least have one column (missing call to `new_task` ?)" x = observations.x x = torch.flatten(x, start_dim=1) labels = observations.task_labels # TODO: Debug this: inputs = [ c[0](x) + n_classes_in_task for n_classes_in_task, c in zip(self.n_classes_per_task, self.columns) ] for l in range(1, self.n_layers): outputs = [] for i, column in enumerate(self.columns): outputs.append(column[l](inputs[: i + 1])) inputs = outputs y: Optional[Tensor] = None task_masks = {} for task_id in set(labels.tolist()): task_mask = labels == task_id task_masks[task_id] = task_mask if y is None: y = inputs[task_id] else: y[task_mask] = inputs[task_id][task_mask] assert y is not None, "Can't get prediction in model PNN" return y # def new_task(self, device, num_inputs, num_actions = 5): def new_task(self, device, sizes: List[int]): assert len(sizes) == self.n_layers + 1, ( f"Should have the out size for each layer + input size (got {len(sizes)} " f"sizes but {self.n_layers} layers)." ) self.n_tasks += 1 # TODO: Fix this to use the actual number of classes per task. self.n_classes_per_task.append(2) task_id = len(self.columns) modules = [] for i in range(0, self.n_layers): modules.append(PNNLinearBlock(col=task_id, depth=i, n_in=sizes[i], n_out=sizes[i + 1])) new_column = nn.ModuleList(modules).to(device) self.columns.append(new_column) self.device = device print("Add column of the new task") def freeze_columns(self, skip=None): if skip == None: skip = [] for i, c in enumerate(self.columns): for params in c.parameters(): params.requires_grad = True for i, c in enumerate(self.columns): if i not in skip: for params in c.parameters(): params.requires_grad = False print("Freeze columns from previous tasks") def shared_step( self, batch: Tuple[Observations, Optional[Rewards]], environment: PassiveEnvironment, ): """Shared step used for both training and validation. Parameters ---------- batch : Tuple[Observations, Optional[Rewards]] Batch containing Observations, and optional Rewards. When the Rewards are None, it means that we'll need to provide the Environment with actions before we can get the Rewards (e.g. image labels) back. This happens for example when being applied in a Setting which cares about sample efficiency or training performance, for example. environment : Environment The environment we're currently interacting with. Used to provide the rewards when they aren't already part of the batch (as mentioned above). Returns ------- Tuple[Tensor, Dict] The Loss tensor, and a dict of metrics to be logged. """ # Since we're training on a Passive environment, we will get both observations # and rewards, unless we're being evaluated based on our training performance, # in which case we will need to send actions to the environments before we can # get the corresponding rewards (image labels). observations: Observations = batch[0].to(self.device) rewards: Optional[Rewards] = batch[1] # Get the predictions: logits = self(observations) y_pred = logits.argmax(-1) # TODO: PNN is coded for the DomainIncrementalSetting, where the action space # is the same for each task. # Get the rewards, if necessary: if rewards is None: rewards = environment.send(Actions(y_pred)) image_labels = rewards.y.to(self.device) # print(logits.size()) loss = self.loss(logits, image_labels) accuracy = (y_pred == image_labels).sum().float() / len(image_labels) metrics_dict = {"accuracy": accuracy} return loss, metrics_dict def parameters(self, task_id): return self.columns[task_id].parameters() ================================================ FILE: examples/advanced/pnn/pnn_method.py ================================================ import sys from argparse import Namespace from dataclasses import dataclass from typing import Any, Dict, Optional, Tuple, Union import gym import numpy as np import torch import torch.nn as nn import torch.nn.functional as F import tqdm from gym import spaces from gym.spaces import Box from numpy import inf from scipy.signal import lfilter from simple_parsing import ArgumentParser from torchvision import transforms from examples.advanced.pnn.model_rl import PnnA2CAgent from examples.advanced.pnn.model_sl import PnnClassifier from sequoia import Environment from sequoia.common import Config from sequoia.common.spaces import Image from sequoia.common.transforms.utils import is_image from sequoia.settings import Actions, Method, Observations, Rewards, Setting from sequoia.settings.assumptions import IncrementalAssumption from sequoia.settings.rl import ActiveEnvironment, RLSetting, TaskIncrementalRLSetting from sequoia.settings.sl import ( DomainIncrementalSLSetting, PassiveEnvironment, SLSetting, TaskIncrementalSLSetting, ) class PnnMethod(Method, target_setting=Setting): """ Here we implement the PNN Method according to the characteristics and methodology of the current proposal. It should be as much as possible agnostic to the model and setting we are going to use. The method proposed can be specific to a setting to make comparisons easier. Here what we control is the model's training process, given a setting that delivers data in a certain way. """ @dataclass class HParams: """Hyper-parameters of the Pnn method.""" # Learning rate of the optimizer. Defauts to 0.0001 when in SL. learning_rate: float = 2e-4 num_steps: int = 200 # (only applicable in RL settings.) # Discount factor (Only used in RL settings). gamma: float = 0.99 # Number of hidden units (only used in RL settings.) hidden_size: int = 256 # Batch size in SL, and number of parallel environments in RL. # Defaults to None in RL, and 32 when in SL. batch_size: Optional[int] = None # Maximum number of training epochs per task. (only used in SL Settings) max_epochs_per_task: int = 2 def __init__(self, hparams: HParams = None): # We will create those when `configure` will be called, before training. self.config: Optional[Config] = None self.task_id: Optional[int] = 0 self.hparams: Optional[PnnMethod.HParams] = hparams self.model: Union[PnnA2CAgent, PnnClassifier] self.optimizer: torch.optim.Optimizer def configure(self, setting: Setting): """Called before the method is applied on a setting (before training). You can use this to instantiate your model, for instance, since this is where you get access to the observation & action spaces. """ input_space: Box = setting.observation_space["x"] task_label_space = setting.observation_space["task_labels"] # For now all Settings have `Discrete` (i.e. classification) action spaces. action_space: spaces.Discrete = setting.action_space self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") self.num_actions = action_space.n self.num_inputs = np.prod(input_space.shape) self.added_tasks = [] if isinstance(setting, RLSetting): # If we're applied to an RL setting: # Used these as the default hparams in RL: self.hparams = self.hparams or self.HParams( learning_rate=2e-4, num_steps=200, gamma=0.99, hidden_size=256, batch_size=None, ) assert self.hparams self.train_steps_per_task = setting.steps_per_task # We want a batch_size of None, i.e. only one observation at a time. setting.batch_size = None self.num_steps = self.hparams.num_steps # Otherwise, we can train basically as long as we want on each task. self.loss_function = { "gamma": self.hparams.gamma, } x_space = setting.observation_space.x if is_image(setting.observation_space.x): # Observing pixel input. self.arch = "conv" else: # Observing state input (e.g. the 4 floats in cartpole rather than images) self.arch = "mlp" self.model = PnnA2CAgent(self.arch, self.hparams.hidden_size) else: # If we're applied to a Supervised Learning setting: # Used these as the default hparams in SL: self.hparams = self.hparams or self.HParams( learning_rate=0.0001, batch_size=32, ) if self.hparams.batch_size is None: self.hparams.batch_size = 32 # Set the batch size on the setting. setting.batch_size = self.hparams.batch_size # For now all Settings on the supervised side of the tree have images as # inputs, so the observation spaces are of type `Image` (same as Box, but with # additional `h`, `w`, `c` and `b` attributes). assert isinstance(input_space, Image) assert ( setting.increment == setting.test_increment ), "Assuming same number of classes per task for training and testing." # TODO: (@lebrice): Temporarily 'fixing' this by making it so each output # head has as many outputs as there are classes in total, which might make # no sense, but currently works. # It would be better to refactor this so that each output head can have only # as many outputs as is required, and then reshape / offset the predictions. n_outputs = setting.increment n_outputs = setting.action_space.n self.layer_size = [self.num_inputs, 256, n_outputs] self.model = PnnClassifier( n_layers=len(self.layer_size) - 1, ) def on_task_switch(self, task_id: Optional[int]) -> None: """Called when switching tasks in a CL setting.""" # This method gets called if task boundaries are known in the current # setting. Furthermore, if task labels are available, task_id will be # the index of the new task. If not, task_id will be None. # For example, you could do something like this: # self.model.current_task = task_id # This freezes all columns except the one for the next task.. but there might # not yet be a column for the new task! self.model.freeze_columns(skip=[task_id]) if task_id not in self.added_tasks: if isinstance(self.model, PnnA2CAgent): self.model.new_task( device=self.device, num_inputs=self.num_inputs, num_actions=self.num_actions, ) else: self.model.new_task(device=self.device, sizes=self.layer_size) self.added_tasks.append(task_id) self.task_id = task_id def set_optimizer(self): self.optimizer = torch.optim.Adam( self.model.parameters(self.task_id), lr=self.hparams.learning_rate, ) def get_actions(self, observations: Observations, action_space: spaces.Space) -> Actions: """Get a batch of predictions (aka actions) for the given observations.""" observations = observations.to(self.device) with torch.no_grad(): if isinstance(self.model, PnnA2CAgent): predictions = self.model(observations) _, logit = predictions # get the predicted action: action = torch.argmax(logit).item() else: logits = self.model(observations) # Get the predicted classes y_pred = logits.argmax(dim=-1) action = y_pred assert action in action_space, (action, action_space) return action def fit(self, train_env: Environment, valid_env: Environment): """Train and validate this method using the "environments" for the current task. NOTE: `train_env` and `valid_env` are both `gym.Env`s as well as `DataLoader`s. This means that if you want to write a "regular" SL training loop, you totally can, and if you want to write you RL-style training loop, you can also do that. """ if isinstance(train_env.unwrapped, PassiveEnvironment): self.fit_sl(train_env, valid_env) else: self.fit_rl(train_env, valid_env) def fit_rl(self, train_env: gym.Env, valid_env: gym.Env): """Training loop for Reinforcement Learning (a.k.a. "active") environment.""" """ base on https://towardsdatascience.com/understanding-actor-critic-methods-931b97b6df3f """ if self.model is None: self.model = PnnA2CAgent(self.arch, self.hparams.hidden_size) assert isinstance(self.model, PnnA2CAgent) self.set_optimizer() assert self.hparams # self.model.float() all_lengths = [] average_lengths = [] all_rewards = [] entropy_term = 0 for episode in range(self.train_steps_per_task): values = [] rewards = [] log_probs = [] state = train_env.reset() for steps in range(self.num_steps): value, policy_dist = self.model(state) value = value.item() dist = policy_dist.detach().numpy() action = np.random.choice(self.num_actions, p=np.squeeze(dist)) log_prob = torch.log(policy_dist.squeeze(0)[action]) entropy = -np.sum(np.mean(dist) * np.log(dist)) new_state, reward, done, _ = train_env.step(action) rewards.append(reward.y) values.append(value) log_probs.append(log_prob) entropy_term += entropy state = new_state if done or steps == self.num_steps - 1: Qval, _ = self.model(state) Qval = Qval.item() all_rewards.append(np.sum(rewards)) all_lengths.append(steps) average_lengths.append(np.mean(all_lengths[-10:])) if episode % 10 == 0: print( f"episode: {episode}, " f"reward: {np.sum(rewards)}, " f"total length: {steps}, " f"average length: {average_lengths[-1]}" ) break Qvals = np.zeros_like(values) for t in reversed(range(len(rewards))): Qval = rewards[t] + self.hparams.gamma * Qval Qvals[t] = Qval # update actor critic values_tensor = torch.as_tensor(values, dtype=torch.float) Qvals = torch.as_tensor(Qvals, dtype=torch.float) log_probs_tensor = torch.stack(log_probs) advantage = Qvals - values_tensor actor_loss = (-log_probs_tensor * advantage).mean() critic_loss = 0.5 * advantage.pow(2).mean() ac_loss = actor_loss + critic_loss + 0.001 * entropy_term self.optimizer.zero_grad() ac_loss.backward() self.optimizer.step() def fit_sl(self, train_env: PassiveEnvironment, valid_env: PassiveEnvironment): """Train on a Supervised Learning (a.k.a. "passive") environment.""" observations: TaskIncrementalSLSetting.Observations = train_env.reset() cuda_observations = observations.to(self.device) assert isinstance(self.model, PnnClassifier) assert self.hparams self.set_optimizer() best_val_loss = inf best_epoch = 0 for epoch in range(self.hparams.max_epochs_per_task): self.model.train() print(f"Starting epoch {epoch}") # Training loop: with torch.set_grad_enabled(True), tqdm.tqdm(train_env) as train_pbar: postfix: Dict[str, Any] = {} train_pbar.set_description(f"Training Epoch {epoch}") for i, batch in enumerate(train_pbar): loss, metrics_dict = self.model.shared_step( batch, environment=train_env, ) self.optimizer.zero_grad() loss.backward() self.optimizer.step() postfix.update(metrics_dict) train_pbar.set_postfix(postfix) # Validation loop: self.model.eval() with torch.set_grad_enabled(False), tqdm.tqdm(valid_env) as val_pbar: postfix = {} val_pbar.set_description(f"Validation Epoch {epoch}") epoch_val_loss = 0.0 for i, batch in enumerate(val_pbar): batch_val_loss, metrics_dict = self.model.shared_step( batch, environment=valid_env, ) epoch_val_loss += batch_val_loss postfix.update(metrics_dict, val_loss=epoch_val_loss) val_pbar.set_postfix(postfix) @classmethod def add_argparse_args(cls, parser: ArgumentParser) -> None: parser.add_arguments(cls.HParams, dest="hparams", default=None) @classmethod def from_argparse_args(cls, args: Namespace) -> "PnnMethod": hparams: PnnMethod.HParams = args.hparams method = cls(hparams=hparams) return method def main_rl(): """Applies the PnnMethod in a RL Setting.""" parser = ArgumentParser(description=__doc__, add_dest_to_option_strings=False) Config.add_argparse_args(parser, dest="config") PnnMethod.add_argparse_args(parser, dest="method") setting = TaskIncrementalRLSetting( dataset="cartpole", nb_tasks=2, train_task_schedule={ 0: {"gravity": 10, "length": 0.3}, 1000: {"gravity": 10, "length": 0.5}, }, ) args = parser.parse_args() config: Config = Config.from_argparse_args(args, dest="config") method: PnnMethod = PnnMethod.from_argparse_args(args, dest="method") method.config = config # 2. Creating the Method # method = ImproveMethod() # 3. Applying the method to the setting: results = setting.apply(method, config=config) print(results.summary()) print(f"objective: {results.objective}") return results def main_sl(): """Applies the PnnMethod in a SL Setting.""" parser = ArgumentParser(description=__doc__, add_dest_to_option_strings=False) # Add arguments for the Setting # TODO: PNN is coded for the DomainIncrementalSetting, where the action space # is the same for each task. # parser.add_arguments(DomainIncrementalSetting, dest="setting") parser.add_arguments(TaskIncrementalSLSetting, dest="setting") # TaskIncrementalSLSetting.add_argparse_args(parser, dest="setting") Config.add_argparse_args(parser, dest="config") # Add arguments for the Method: PnnMethod.add_argparse_args(parser, dest="method") args = parser.parse_args() # setting: TaskIncrementalSLSetting = args.setting setting: TaskIncrementalSLSetting = TaskIncrementalSLSetting.from_argparse_args( # setting: DomainIncrementalSetting = DomainIncrementalSetting.from_argparse_args( args, dest="setting", ) config: Config = Config.from_argparse_args(args, dest="config") method: PnnMethod = PnnMethod.from_argparse_args(args, dest="method") method.config = config results = setting.apply(method, config=config) print(results.summary()) return results if __name__ == "__main__": # Run RL Setting main_sl() # Run SL Setting # main_rl() ================================================ FILE: examples/advanced/procgen_example.py ================================================ """ Example of how to create an incremental RL Setting with custom environments for each task. In this example, we create environments using [the `procgen` package](https://github.com/openai/procgen). """ import dataclasses from dataclasses import dataclass, replace from typing import Dict, List, NamedTuple, Optional, Type, TypeVar import gym import numpy as np from sequoia.settings.rl import ( IncrementalRLSetting, MultiTaskRLSetting, TaskIncrementalRLSetting, TraditionalRLSetting, ) @dataclass class ProcGenConfig: """Options for creating an environment from ProcGen. The fields on this dataclass match the arguments that can be passed to `gym.make`, based on the README of the procgen repo. """ # Name of environment, or comma-separate list of environment names to instantiate as each env # in the VecEnv. env_name: str = "coinrun-v0" # The number of unique levels that can be generated. Set to 0 to use unlimited levels. num_levels: int = 0 # The lowest seed that will be used to generated levels. 'start_level' and 'num_levels' fully # specify the set of possible levels. start_level: int = 0 # Paint player velocity info in the top left corner. Only supported by certain games. paint_vel_info: bool = False # Use randomly generated assets in place of human designed assets. use_generated_assets: bool = False # Set to True to use the debug build if building from source. debug: bool = False # Useful flag that's passed through to procgen envs. Use however you want during debugging. debug_mode: int = 0 # Determines whether observations are centered on the agent or display the full level. # Override at your own risk. center_agent: bool = True # When you reach the end of a level, the episode is ended and a new level is selected. # If use_sequential_levels is set to True, reaching the end of a level does not end the episode, # and the seed for the new level is derived from the current level seed. # If you combine this with start_level= and num_levels=1, you can have a single # linear series of levels similar to a gym-retro or ALE game. use_sequential_levels: bool = False # What variant of the levels to use, the options are "easy", "hard", "extreme", "memory", # "exploration". All games support "easy" and "hard", while other options are game-specific. # The default is "hard". Switching to "easy" will reduce the number of timesteps required to # solve each game and is useful for testing or when working with limited compute resources. distribution_mode: str = "hard" # Normally games use human designed backgrounds, if this flag is set to False, games will use # pure black backgrounds. use_backgrounds: bool = True # Some games select assets from multiple themes, if this flag is set to True, those games will # only use a single theme. restrict_themes: bool = False # If set to True, games will use monochromatic rectangles instead of human designed assets. # Best used with restrict_themes=True. use_monochrome_assets: bool = False def make_env(self) -> gym.Env: """Creates the environment using these options.""" env_id = f"procgen:procgen-{self.env_name}" # Create the env by passing the arguments to gym.make, same as what is done in the README of # the procgen repo. procgen_env = gym.make( id=env_id, num_levels=self.num_levels, start_level=self.start_level, paint_vel_info=self.paint_vel_info, use_generated_assets=self.use_generated_assets, debug=self.debug, center_agent=self.center_agent, use_sequential_levels=self.use_sequential_levels, distribution_mode=self.distribution_mode, use_backgrounds=self.use_backgrounds, restrict_themes=self.restrict_themes, use_monochrome_assets=self.use_monochrome_assets, ) # NOTE: The environments that are created with `gym.make("procgen:procgen-...")` are # instances of the `gym3.interop:ToGymEnv` class, which has a slightly different API than # the `gym.Env` class: # (Taken From gym3/interop.py:) # > - The `render()` method does nothing in "human" mode, in "rgb_array" mode the info dict # is checked for a key named "rgb" and info["rgb"][0] is returned if present # > - `seed()` and `close() are ignored since gym3 environments do not require these methods # # Therefore, for now, since in Sequoia we assume that the envs fit the gym.Env API, we have to # "patch" these different methods up a bit. Here I suggest we do this using a wrapper # (defined below) wrapped_env = SequoiaProcGenAdapterWrapper(env=procgen_env) return wrapped_env class SequoiaProcGenAdapterWrapper(gym.Wrapper): """A wrapper around an environment from ProcGen to patch up the methods/properties that differ from the gym API: - The `seed` method doesn't ahve the right number of arguments. - The `done` value is of type `np.bool_` instead of a plain bool. - `render` returns None. """ def __init__(self, env): super().__init__(env=env) def step(self, action): obs, rewards, done, info = self.env.step(action) if isinstance(done, np.bool_): done = bool(done) return obs, rewards, done, info def seed(self, seed: Optional[int] = None) -> List[int]: # The procgen env apparently doesn't have (or need?) a `seed` method, but they don't # implement it corrently, by not accepting a `seed` argument! return [] def render(self, mode: str = "rgb_array"): # note: rendering doesn't seem to be working: `self.env.render("rgb_array")` returns None. array: Optional[np.ndarray] = self.env.render("rgb_array") return array # Type variable for a type of setting that supports passing envs for each task (all settings below # `InrementalRLSetting`). SettingType = TypeVar("SettingType", bound=IncrementalRLSetting) available_envs = [ "bigfish", "bossfight", "caveflyer", "chaser", "climber", "coinrun", "dodgeball", "fruitbot", "heist", "jumper", "leaper", "maze", "miner", "ninja", "plunder", "starpilot", ] def make_procgen_setting( env_name: str, nb_tasks: int, num_levels_per_task: int = 1, overlapping_levels_between_tasks: int = 0, common_options: ProcGenConfig = None, setting_type: Type[SettingType] = TaskIncrementalRLSetting, ) -> SettingType: """Creates an RL Setting that uses environments from procgen. Parameters ---------- env_name : str Name of the environment from procgen to use. Should include the version tag. For example: "coinrun-v0". nb_tasks : int Number of tasks in the setting. num_levels_per_task : int, optional Number of generated levels per task, by default 1 overlapping_levels_between_tasks : int, optional Number of levels in common between neighbouring tasks. Needs to be less than `num_levels_per_task`. Defaults to 0, in which case all tasks distinct levels. common_options : ProcGenConfig, optional Set of options common to the envs of all the tasks. This can be used to set the starting level, for example. Defaults to None, in which case the default options from `ProcGenConfig` are used. setting_type : Type[SettingType], optional The type of setting to create, by default TaskIncrementalRLSetting. For example, say `nb_tasks`=5, `num_levels_per_task`=2, `overlapping_levels_between_tasks`=1: task #1: levels: [0, 1] task #2: levels: [1, 2] task #3: levels: [2, 3] task #4: levels: [3, 4] task #5: levels: [4, 5] For example, say `nb_tasks`=5, `num_levels_per_task`=5, `overlapping_levels_between_tasks`=2: task #1: levels: [0, 1, 2, 3, 4] task #2: levels: [3, 4, 5, 6, 7] task #3: levels: [6, 7, 8, 9, 10] task #4: levels: [9, 10, 11, 12, 13] task #5: levels: [12, 13, 14, 15, 16] NOTE: (lebrice): Maybe this (and other benchmark-creating functions) could be classmethods on the settings, instead of passing the setting_type as a parameter! Returns ------- SettingType A Setting of type `setting_type` (`TaskIncrementalRLSetting`) by default, where each task uses environments from ProcGen. """ assert overlapping_levels_between_tasks < num_levels_per_task # Create the options common to every task. if common_options is None: common_options = ProcGenConfig(env_name=env_name) else: common_options = dataclasses.replace(common_options, env_name=env_name) # Get the starting levels for each task, as shown in the docstring above. offset = num_levels_per_task - overlapping_levels_between_tasks first_task_start_level = common_options.start_level last_task_start_level = common_options.start_level + offset * nb_tasks start_levels: List[int] = list(range(first_task_start_level, last_task_start_level, offset)) # Create the configurations that will be used to create the train/valid/test environments for # each task by starting from the common options, and overwriting the values of `start_level`. train_env_configs: List[ProcGenConfig] = [ replace(common_options, start_level=start_levels[task_id], num_levels=num_levels_per_task) for task_id in range(nb_tasks) ] # NOTE: For now the validation and testing environment are the same as those for training. # This could easily be different though! # For example: # - the test environments could have a background while the train/valid envs don't! # --> This could be super interesting to researchers in Out-of-Distribution RL! valid_env_configs: List[ProcGenConfig] = train_env_configs.copy() test_env_configs: List[ProcGenConfig] = train_env_configs.copy() # Here we pass a list of functions to be called to create each env. This can be a bit better # than passing the envs themselves, as it saves some memory, and also because we'll be able to # close the envs after each task (since we can always re-create them). setting = setting_type( dataset=None, train_envs=[config.make_env for config in train_env_configs], val_envs=[config.make_env for config in valid_env_configs], test_envs=[config.make_env for config in test_env_configs], ) return setting from sequoia.common.config import Config from sequoia.methods.random_baseline import RandomBaselineMethod def main_simple(): # Simple example: Create a Task-Incremental RL setting using procgen envs. setting = make_procgen_setting(env_name="coinrun-v0", nb_tasks=5) method = RandomBaselineMethod() # NOTE: The `render` option isn't yet working (see above) results = setting.apply(method, config=Config(debug=True, render=False)) print(results.summary()) def main_using_other_setting(): # Example where we change what kind of setting we want to create. class Key(NamedTuple): stationary_context: bool task_labels_at_test_time: bool # This is here just to give an idea of the differences between these settings. available_settings: Dict[Key, Type[IncrementalRLSetting]] = { Key(task_labels_at_test_time=False, stationary_context=False): IncrementalRLSetting, Key(task_labels_at_test_time=True, stationary_context=False): TaskIncrementalRLSetting, Key(task_labels_at_test_time=False, stationary_context=True): TraditionalRLSetting, Key(task_labels_at_test_time=True, stationary_context=True): MultiTaskRLSetting, } # You can choose whichever setting you want, but for example: setting_type = available_settings[Key(task_labels_at_test_time=True, stationary_context=False)] # Create the Method. method = RandomBaselineMethod() setting = make_procgen_setting(env_name="coinrun-v0", nb_tasks=5, setting_type=setting_type) results = setting.apply(method, config=Config(debug=True, render=False)) print(results.summary()) if __name__ == "__main__": main_simple() ================================================ FILE: examples/basic/__init__.py ================================================ ================================================ FILE: examples/basic/base_method_demo.py ================================================ """ Example showing how the BaseMethod can be applied to get results in both RL and SL settings. """ from simple_parsing import ArgumentParser from sequoia.common import Config from sequoia.methods import BaseMethod from sequoia.settings import Setting, TaskIncrementalRLSetting, TaskIncrementalSLSetting def baseline_demo_simple(): config = Config() method = BaseMethod(config=config, max_epochs=1) ## Create *any* Setting from the tree, for example: # Supervised Learning Setting: setting = TaskIncrementalSLSetting( dataset="cifar10", nb_tasks=2, ) ## Reinforcement Learning Setting: # setting = TaskIncrementalRLSetting( # dataset="cartpole", # train_max_steps=4000, # nb_tasks=2, # ) results = setting.apply(method, config=config) print(results.summary()) return results def baseline_demo_command_line(): parser = ArgumentParser(__doc__, add_dest_to_option_strings=False) # Supervised Learning Setting: parser.add_arguments(TaskIncrementalSLSetting, dest="setting") # Reinforcement Learning Setting: # parser.add_arguments(TaskIncrementalRLSetting, dest="setting") parser.add_arguments(Config, dest="config") BaseMethod.add_argparse_args(parser, dest="method") args = parser.parse_args() setting: Setting = args.setting config: Config = args.config method: BaseMethod = BaseMethod.from_argparse_args(args, dest="method") results = setting.apply(method, config=config) print(results.summary()) return results if __name__ == "__main__": ### Option 1: Create the BaseMethod and Settings manually. baseline_demo_simple() ### Option 2: Create the BaseMethod and Settings from the command-line. # baseline_demo_command_line() ================================================ FILE: examples/basic/pl_example.py ================================================ """A simple example for creating a Method using PyTorch-Lightning. Run this as: ```console $> python examples/basic/pl_examples.py ``` """ from dataclasses import asdict, dataclass from typing import Optional, Tuple import torch from gym import spaces from pytorch_lightning import LightningModule, Trainer from torch import Tensor, nn from torch.optim import Adam from sequoia.common.config import Config from sequoia.common.spaces import Image from sequoia.methods import Method from sequoia.settings.assumptions.task_type import ClassificationActions from sequoia.settings.sl.continual import ( Actions, ContinualSLSetting, Observations, ObservationSpace, Rewards, ) class Model(LightningModule): """Example Pytorch Lightning model used for continual image classification. Used by the `ExampleMethod` below. """ @dataclass class HParams: """Hyper-parameters of our model. NOTE: dataclasses are totally optional. This is just much nicer than dicts or ugly namespaces. """ # Learning rate. learning_rate: float = 1e-3 # Maximum number of training epochs per task. max_epochs_per_task: int = 1 def __init__( self, input_space: ObservationSpace, output_space: spaces.Discrete, hparams: HParams = None, ): super().__init__() hparams = hparams or self.HParams() # NOTE: `input_space` is a subclass of `gym.spaces.Dict`. It contains (at least) # the `x` key, but can also contain other things, for example the task labels. # Doing things this way makes sure that this Model can also be applied to any # more specific Setting in the future (any setting with more information given)! image_space: Image = input_space.x # NOTE: `Image` is just a subclass of `gym.spaces.Box` with a few extra properties self.input_dims = image_space.shape # NOTE: Can't set the `hparams` attribute in PL, so use hp instead: self.hp = hparams self.save_hyperparameters({"hparams": asdict(hparams)}) in_channels: int = image_space.channels num_classes: int = output_space.n # Imitates the SimpleConvNet from sequoia.common.models.simple_convnet self.features = nn.Sequential( nn.Conv2d(in_channels, 6, kernel_size=5, stride=1, padding=1, bias=False), nn.BatchNorm2d(6), nn.ReLU(inplace=True), nn.Conv2d(6, 16, kernel_size=5, stride=1, padding=1, bias=False), nn.BatchNorm2d(16), nn.ReLU(inplace=True), nn.Conv2d(16, 16, kernel_size=3, stride=1, padding=1, bias=False), nn.BatchNorm2d(16), nn.AdaptiveAvgPool2d(output_size=(8, 8)), # [16, 8, 8] # [32, 6, 6] nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=0, bias=False), nn.BatchNorm2d(32), nn.ReLU(inplace=True), # [32, 4, 4] nn.Conv2d(32, 32, kernel_size=3, stride=1, padding=0, bias=False), nn.BatchNorm2d(32), nn.Flatten(), ) # Quick tip: In this case we have a fixed hidden size (thanks to the Adaptive # pooling layer above), but you could also use the cool new `nn.LazyLinear` when # you don't know the hidden size in advance! self.fc = nn.Sequential( nn.Flatten(), # nn.LazyLinear(out_features=120), nn.Linear(512, 120), nn.ReLU(), nn.Linear(120, 84), nn.ReLU(), nn.Linear(84, num_classes), ) self.loss = nn.CrossEntropyLoss() self.trainer: Trainer def forward(self, observations: ContinualSLSetting.Observations) -> Tensor: """Returns the logits for the given observation. Parameters ---------- observations : ContinualSLSetting.Observations dataclass with (at least) the following attributes: - "x" (Tensor): the samples (images) - "task_labels" (Optional[Tensor]): Task labels, when applicable. Returns ------- Tensor Classification logits for each class. """ x: Tensor = observations.x # Task labels for each sample. We don't use them in this example. t: Optional[Tensor] = observations.task_labels h_x = self.features(x) logits = self.fc(h_x) return logits def training_step( self, batch: Tuple[Observations, Optional[Rewards]], batch_idx: int ) -> Tensor: return self.shared_step(batch=batch, batch_idx=batch_idx, stage="train") def validation_step( self, batch: Tuple[Observations, Optional[Rewards]], batch_idx: int ) -> Tensor: return self.shared_step(batch=batch, batch_idx=batch_idx, stage="val") def test_step(self, batch: Tuple[Observations, Optional[Rewards]], batch_idx: int) -> Tensor: return self.shared_step(batch=batch, batch_idx=batch_idx, stage="test") def shared_step( self, batch: Tuple[Observations, Optional[Rewards]], batch_idx: int, stage: str, ) -> Tensor: observations, rewards = batch logits = self(observations) y_pred = logits.argmax(-1) actions = ClassificationActions(y_pred=y_pred, logits=logits) if rewards is None: environment: ContinualSLSetting.Environment # The rewards (image labels) might not be given at the same time as the # observations (images), for example during testing, or if we're being # evaluated based on our online performance during training! # # When that is the case, we need to send the "action" (predictions) to the # environment using `send()` to get the rewards. actions = y_pred # Get the current environment / dataloader from the Trainer. environment = self.trainer.request_dataloader(self, stage) rewards = environment.send(actions) y: Tensor = rewards.y accuracy = (y_pred == y).int().sum() / len(y) self.log(f"{stage}/accuracy", accuracy, prog_bar=True) loss = self.loss(logits, y) return loss def configure_optimizers(self): return Adam(self.parameters(), lr=self.hp.learning_rate) class ExampleMethod(Method, target_setting=ContinualSLSetting): """Example method for solving Continual SL Settings with PyTorch-Lightning This ExampleMethod declares that it can be applied to any `Setting` that inherits from this `ContinualSLSetting`. NOTE: Settings in Sequoia are a subclass of `LightningDataModule`, which create the training/validation/testing `Environment`s that methods will interact with. Each setting defines an `apply` method, which serves as a "main loop", and describes when and on what data to train the Method, and how it will be evaluated, according to the usual methodology for that setting in the litterature. Importantly, settings do NOT describe **how** the method is to be trained, that is entirely up to the Method! """ def __init__(self, hparams: Model.HParams = None): super().__init__() self.hparams = hparams or Model.HParams() self.current_task: Optional[int] = None # NOTE: These get assigned in `configure` below: self.model: Model self.trainer: Trainer def configure(self, setting: ContinualSLSetting): """Called by the Setting so the method can configure itself before training. This could be used to, for example, create a model, since the observation space (which describes the types and shapes of the data) and the `nb_tasks` can be read from the Setting. Parameters ---------- setting : ContinualSLSetting The research setting that this `Method` will be applied to. """ if not setting.known_task_boundaries_at_train_time: # If we're being applied on a Setting where we don't have access to task # boundaries, then there is only one training environment that transitions # between all tasks and then closes itself. # We therefore limit the number of epochs per task to 1 in that case. self.hparams.max_epochs_per_task = 1 self.model = Model( input_space=setting.observation_space, output_space=setting.action_space, hparams=self.hparams, ) def fit( self, train_env: ContinualSLSetting.Environment, valid_env: ContinualSLSetting.Environment, ): """Called by the Setting to allow the method to train. The passed environments inherit from `DataLoader` as well as from `gym.Env`. They produce `Observations` (which have an `x` Tensor field, for instance), and return `Rewards` when they receive `Actions`. This interface is the same between RL and SL, making it easy to create methods that can adapt to both domains. Parameters ---------- train_env : ContinualSLSetting.Environment The Training environment. In the case of a `ContinualSLSetting`, this environment will smoothly transition between the different tasks. NOTE: Regardless of what exact type of `Setting` this method is being applied to, this environment will always be a subclass of `ContinualSLSetting.Environment`, and the `Observations`, `Actions`, `Rewards` produced by this environment will also always follow this hierarchy. This is important to note, since it makes it possible to create a Method that also works in other settings which add extra information in the observations (e.g. task labels)! valid_env : ContinualSLSetting.Environment The Validation environment. """ # NOTE: Currently have to 'reset' the Trainer for each call to `fit`. self.trainer = Trainer( gpus=torch.cuda.device_count(), max_epochs=self.hparams.max_epochs_per_task, ) self.trainer.fit(self.model, train_dataloader=train_env, val_dataloaders=valid_env) def test(self, test_env: ContinualSLSetting.Environment): """Called to let the Method handle the test loop by itself. The `test_env` will only give back rewards (y) once an action (y_pred) is sent to it via its `send` method. This test environment keeps track of some metrics of interest for its `Setting` (accuracy in this case) and reports them back to the `Setting` once the test environment has been exhausted. NOTE: The test environment will close itself when done, signifying the end of the test period. At that point, `test_env.is_closed()` will return `True`. """ # BUG: There is currently a bug with the test loop with Trainer: on_task_switch # doesn't get called properly. raise NotImplementedError # Use ckpt_path=None to use the current weights, rather than the "best" ones. self.trainer.test(self.model, ckpt_path=None, test_dataloaders=test_env) def get_actions(self, observations: Observations, action_space: spaces.MultiDiscrete): """Called by the Setting to query for individual predictions. You currently have to implement this, but if `test` is implemented, it will be used instead. Sorry if this isn't super clear. """ self.model.eval() with torch.no_grad(): logits = self.model(observations.to(self.model.device)) y_pred = logits.argmax(-1) return Actions(y_pred=y_pred) def on_task_switch(self, task_id: Optional[int]) -> None: """Can be called by the Setting when a task boundary is reached. This will be called if `setting.known_task_boundaries_at_[train/test]_time` is True, depending on if this is called during training or during testing. If `setting.task_labels_at_[train/test]_time` is True, then `task_id` will be the identifyer (index) of the next task. If the value is False, then `task_id` will be None. """ if task_id != self.current_task: phase = "training" if self.training else "testing" print(f"Switching tasks during {phase}: {self.current_task} -> {task_id}") self.current_task = task_id def main(): """Runs the example: applies the method on a Continual Supervised Learning Setting.""" # You could use any of the settings in SL, since this example methods targets the # most general Continual SL Setting in Sequoia: `ContinualSLSetting`: # from sequoia.settings.sl import ClassIncrementalSetting # Create the Setting: # NOTE: Since our model above uses an adaptive pooling layer, it should work on any # dataset! setting = ContinualSLSetting(dataset="mnist", monitor_training_performance=True) # Create the Method: method = ExampleMethod() # Create a config for the experiment (just so we can set a few options for this # example) config = Config(debug=True, log_dir="results/pl_example") # Launch the experiment: trains and tests the method according to the chosen # setting and returns a Results object. results = setting.apply(method, config=config) # Print the results, and show some plots! print(results.summary()) for figure_name, figure in results.make_plots().items(): print("Figure:", figure_name) figure.show() # figure.waitforbuttonpress(10) if __name__ == "__main__": main() ================================================ FILE: examples/basic/pl_example_packnet.py ================================================ from dataclasses import dataclass from typing import Optional import torch from simple_parsing import mutable_field from examples.basic.pl_example import ExampleMethod, Model from sequoia.common import Config from sequoia.methods import BaseModel from sequoia.methods.packnet_method import PackNet from sequoia.methods.trainer import Trainer, TrainerConfig from sequoia.settings.sl import ContinualSLSetting, TaskIncrementalSLSetting class ExamplePackNetMethod(ExampleMethod, target_setting=TaskIncrementalSLSetting): def __init__(self, hparams: Model.HParams = None, packnet_hparams: PackNet.HParams = None): super().__init__(hparams=hparams) self.packnet_hparams = packnet_hparams or PackNet.HParams() # TODO: Modify `hparams.max_epochs_per_task` to at least be enough so that # PackNet will work. min_epochs = self.packnet_hparams.train_epochs + self.packnet_hparams.fine_tune_epochs if self.hparams.max_epochs_per_task < min_epochs: self.hparams.max_epochs_per_task = min_epochs self.p_net: PackNet def configure(self, setting: TaskIncrementalSLSetting): super().configure(setting) # TODO: Why does PackNet need access to the number of tasks again? self.p_net = PackNet( n_tasks=setting.nb_tasks, hparams=self.packnet_hparams, ) # TODO: This could be set as default values in the PackNet constructor. self.p_net.current_task = -1 self.p_net.config_instructions() def fit( self, train_env: TaskIncrementalSLSetting.Environment, valid_env: TaskIncrementalSLSetting.Environment, ): # NOTE: PackNet is not compatible with EarlyStopping, thus we set max_epochs==min_epochs self.trainer = Trainer( gpus=torch.cuda.device_count(), min_epochs=self.p_net.total_epochs(), max_epochs=self.p_net.total_epochs(), callbacks=[self.p_net], ) self.trainer.fit(self.model, train_dataloader=train_env, val_dataloaders=valid_env) def on_task_switch(self, task_id: Optional[int]): """Called when switching between tasks. Args: task_id (int, optional): the id of the new task. When None, we are basically being informed that there is a task boundary, but without knowing what task we're switching to. """ super().on_task_switch(task_id=task_id) if task_id is not None and len(self.p_net.masks) > task_id: self.p_net.load_final_state(model=self.model) self.p_net.apply_eval_mask(task_idx=task_id, model=self.model) self.p_net.current_task = task_id def main(): """Runs the example: applies the method on a Continual Supervised Learning Setting.""" # You could use any of the settings in SL, since this example methods targets the # most general Continual SL Setting in Sequoia: `ContinualSLSetting`: # from sequoia.settings.sl import ClassIncrementalSetting # Create the Setting: # NOTE: Since our model above uses an adaptive pooling layer, it should work on any # dataset! setting = TaskIncrementalSLSetting( dataset="mnist", nb_tasks=5, monitor_training_performance=True ) # Create the Method: method = ExamplePackNetMethod() # Create a config for the experiment (just so we can set a few options for this # example) config = Config(debug=False, log_dir="results/pl_example_packnet") # Launch the experiment: trains and tests the method according to the chosen # setting and returns a Results object. results = setting.apply(method, config=config) # Print the results, and show some plots! print(results.summary()) for figure_name, figure in results.make_plots().items(): print("Figure:", figure_name) figure.show() # figure.waitforbuttonpress(10) if __name__ == "__main__": main() ================================================ FILE: examples/basic/pl_example_test.py ================================================ """ Unit-tests for the PyTorch-Lightning Example. Can be run like so: ```console $ pytest examples/basic/pl_example_test.py ``` """ from typing import Type import pytest from examples.basic.pl_example import ExampleMethod, Model from sequoia.common.config import Config from sequoia.common.metrics import ClassificationMetrics from sequoia.methods import Method from sequoia.methods.method_test import MethodTests, config, session_config # type: ignore from sequoia.settings import Results from sequoia.settings.sl import ContinualSLSetting, IncrementalSLSetting class TestPLExample(MethodTests): """Tests for this PL Example. This `MethodTests` base class generates a `test_debug` test for us. """ Method: Type[Method] = ExampleMethod @pytest.fixture() def method(self, config: Config): """Required fixture, which creates a Method that can be used for quick tests.""" return ExampleMethod(hparams=Model.HParams(max_epochs_per_task=1)) def validate_results( self, setting: ContinualSLSetting, method: ExampleMethod, results: Results ): """This gets called by `test_debug` to check that the results make sense for the given setting and method. """ # NOTE: This particular example isn't that great: We just check that the average # final test accuracy and the average online accuracy are both non-zero. # It would be best to do some kind of branching depending on what type of # Setting was used, since each setting can produce different types of results. print(results.summary()) average_metrics: ClassificationMetrics online_metrics: ClassificationMetrics assert setting.monitor_training_performance todo = 0.0 if isinstance(setting, IncrementalSLSetting): # The results in this case include the entire nb_tasks x nb_tasks transfer # matrix. assert isinstance(results, IncrementalSLSetting.Results) average_metrics = results.average_final_performance online_metrics = results.average_online_performance if setting.stationary_context: # Example: Should expect better performance if the data is i.i.d! assert average_metrics.accuracy > todo else: assert average_metrics.accuracy > todo if setting.monitor_training_performance: assert online_metrics.accuracy > todo else: # In this case, there aren't clear 'tasks' to speak of, so the results are # just aggregated metrics for each test batch: assert isinstance(results, ContinualSLSetting.Results) average_metrics = results.average_metrics online_metrics = results.online_performance_metrics assert average_metrics.accuracy > todo assert online_metrics.accuracy > todo ================================================ FILE: examples/basic/quick_demo.ipynb ================================================ { "metadata": { "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5-final" }, "orig_nbformat": 2, "kernelspec": { "name": "python38364bitpy38conda80a8f432976e4e99926307fddceb6e0b", "display_name": "Python 3.8.3 64-bit ('py38': conda)", "language": "python" } }, "nbformat": 4, "nbformat_minor": 2, "cells": [ { "source": [ "# Quick Demo (Notebook version)\n", "\n", "(I hate notebooks.)\n", "\n", "In this demo, we will create a simple method and apply it to various Continual Learning settings.\n", "\n", "For the purposes of this demo, we will restrict ourselves to classification problems on the mnist and fashion-mnist datasets." ], "cell_type": "markdown", "metadata": {} }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "# Imports:\n", "import sys\n", "from dataclasses import dataclass\n", "from typing import Dict, Optional, Tuple, Type\n", "\n", "import gym\n", "import torch\n", "from gym import spaces\n", "from torch import Tensor, nn\n", "from simple_parsing import ArgumentParser\n", "\n", "sys.path.extend([\".\", \"..\"])\n", "from sequoia.settings import Method, Setting\n", "from sequoia.settings.sl.class_incremental import ClassIncrementalSetting, DomainIncrementalSetting\n", "from sequoia.settings.sl.class_incremental.objects import (\n", " Actions,\n", " Environment,\n", " Observations,\n", " PassiveEnvironment,\n", " Results,\n", " Rewards,\n", ")" ] }, { "source": [ "# Basic Model:" ], "cell_type": "markdown", "metadata": {} }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "\n", "class MyModel(nn.Module):\n", " \"\"\" Simple classification model without any CL-related mechanism.\n", "\n", " To keep things simple, this demo model is designed for supervised\n", " (classification) settings where observations have shape [3, 28, 28] (ie the\n", " MNIST variants: Mnist, FashionMnist, RotatedMnist, EMnist, etc.)\n", " \"\"\"\n", " def __init__(self,\n", " observation_space: gym.Space,\n", " action_space: gym.Space,\n", " reward_space: gym.Space):\n", " super().__init__()\n", " image_shape = observation_space["x"].shape\n", " assert image_shape == (3, 28, 28)\n", " assert isinstance(action_space, spaces.Discrete)\n", " assert action_space == reward_space\n", " n_classes = action_space.n\n", " image_channels = image_shape[0]\n", "\n", " self.encoder = nn.Sequential(\n", " nn.Conv2d(image_channels, 6, 5),\n", " nn.ReLU(),\n", " nn.MaxPool2d(2),\n", " nn.Conv2d(6, 16, 5),\n", " nn.ReLU(),\n", " nn.MaxPool2d(2),\n", " )\n", " self.classifier = nn.Sequential(\n", " nn.Flatten(),\n", " nn.Linear(256, 120),\n", " nn.ReLU(),\n", " nn.Linear(120, 84),\n", " nn.ReLU(),\n", " nn.Linear(84, n_classes),\n", " )\n", " self.loss = nn.CrossEntropyLoss()\n", "\n", " def forward(self, observations: Observations) -> Tensor:\n", " # NOTE: here we don't make use of the task labels.\n", " x = observations.x\n", " task_labels = observations.task_labels\n", " features = self.encoder(x)\n", " logits = self.classifier(features)\n", " return logits\n", "\n", " def shared_step(\n", " self, batch: Tuple[Observations, Optional[Rewards]], environment: Environment\n", " ) -> Tuple[Tensor, Dict]:\n", " \"\"\"Shared step used for both training and validation.\n", " \n", " Parameters\n", " ----------\n", " batch : Tuple[Observations, Optional[Rewards]]\n", " Batch containing Observations, and optional Rewards. When the Rewards are\n", " None, it means that we'll need to provide the Environment with actions\n", " before we can get the Rewards (e.g. image labels) back.\n", " \n", " This happens for example when being applied in a Setting which cares about\n", " sample efficiency or training performance, for example.\n", " \n", " environment : Environment\n", " The environment we're currently interacting with. Used to provide the\n", " rewards when they aren't already part of the batch (as mentioned above).\n", "\n", " Returns\n", " -------\n", " Tuple[Tensor, Dict]\n", " The Loss tensor, and a dict of metrics to be logged.\n", " \"\"\"\n", " # Since we're training on a Passive environment, we will get both observations\n", " # and rewards, unless we're being evaluated based on our training performance,\n", " # in which case we will need to send actions to the environments before we can\n", " # get the corresponding rewards (image labels).\n", " observations: Observations = batch[0]\n", " rewards: Optional[Rewards] = batch[1]\n", " # Get the predictions:\n", " logits = self(observations)\n", " y_pred = logits.argmax(-1)\n", "\n", " if rewards is None:\n", " # If the rewards in the batch is None, it means we're expected to give\n", " # actions before we can get rewards back from the environment.\n", " rewards = environment.send(Actions(y_pred))\n", "\n", " assert rewards is not None\n", " image_labels = rewards.y\n", "\n", " loss = self.loss(logits, image_labels)\n", "\n", " accuracy = (y_pred == image_labels).sum().float() / len(image_labels)\n", " metrics_dict = {\"accuracy\": accuracy.item()}\n", " return loss, metrics_dict\n" ] }, { "source": [ "## Creating our Method\n", "\n", "Here by subclassing 'MethodABC' and passing in a target_setting, we indicate that we are creating a new method, and that it will work on any Setting that is an instance of ClassIncrementalSetting or one of its subclasses. " ], "cell_type": "markdown", "metadata": {} }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "\n", "class DemoMethod(Method, target_setting=ClassIncrementalSetting):\n", " \"\"\" Minimal example of a Method targetting the Class-Incremental CL setting.\n", " \n", " For a quick intro to dataclasses, see examples/dataclasses_example.py \n", " \"\"\"\n", "\n", " @dataclass\n", " class HParams:\n", " \"\"\" Hyper-parameters of the demo model. \"\"\"\n", " # Learning rate of the optimizer.\n", " learning_rate: float = 0.001\n", " \n", " def __init__(self, hparams: HParams):\n", " self.hparams: DemoMethod.HParams = hparams\n", " self.max_epochs: int = 1\n", " self.early_stop_patience: int = 2\n", "\n", " # We will create those when `configure` will be called, before training.\n", " self.model: MyModel\n", " self.optimizer: torch.optim.Optimizer\n", "\n", " def configure(self, setting: ClassIncrementalSetting):\n", " \"\"\" Called before the method is applied on a setting (before training). \n", "\n", " You can use this to instantiate your model, for instance, since this is\n", " where you get access to the observation & action spaces.\n", " \"\"\"\n", " self.model = MyModel(\n", " observation_space=setting.observation_space,\n", " action_space=setting.action_space,\n", " reward_space=setting.reward_space,\n", " )\n", " self.optimizer = torch.optim.Adam(self.model.parameters(), lr=self.hparams.learning_rate)\n", "\n", " def fit(self, train_env: PassiveEnvironment, valid_env: PassiveEnvironment):\n", " # configure() will have been called by the setting before we get here.\n", " import tqdm\n", " from numpy import inf\n", " best_val_loss = inf\n", " best_epoch = 0\n", " for epoch in range(self.max_epochs):\n", " self.model.train()\n", " # Training loop:\n", " with tqdm.tqdm(train_env) as train_pbar:\n", " train_pbar.set_description(f\"Training Epoch {epoch}\")\n", " for i, batch in enumerate(train_pbar):\n", " loss, metrics_dict = self.model.shared_step(batch, environment=train_env)\n", " self.optimizer.zero_grad()\n", " loss.backward()\n", " self.optimizer.step()\n", " train_pbar.set_postfix(**metrics_dict)\n", "\n", " # Validation loop:\n", " self.model.eval()\n", " torch.set_grad_enabled(False)\n", " with tqdm.tqdm(valid_env) as val_pbar:\n", " val_pbar.set_description(f\"Validation Epoch {epoch}\")\n", " epoch_val_loss = 0.\n", "\n", " for i, batch in enumerate(val_pbar):\n", " batch_val_loss, metrics_dict = self.model.shared_step(batch, environment=valid_env)\n", " epoch_val_loss += batch_val_loss\n", " val_pbar.set_postfix(**metrics_dict, val_loss=epoch_val_loss)\n", " torch.set_grad_enabled(True)\n", "\n", " if epoch_val_loss < best_val_loss:\n", " best_val_loss = valid_env\n", " best_epoch = epoch\n", " if epoch - best_epoch > self.early_stop_patience:\n", " print(f\"Early stopping at epoch {i}.\")\n", " break\n", "\n", " def get_actions(self, observations: Observations, action_space: gym.Space) -> Actions:\n", " \"\"\" Get a batch of predictions (aka actions) for these observations. \"\"\" \n", " with torch.no_grad():\n", " logits = self.model(observations)\n", " # Get the predicted classes\n", " y_pred = logits.argmax(dim=-1)\n", " return self.target_setting.Actions(y_pred)\n", " \n", " @classmethod\n", " def add_argparse_args(cls, parser: ArgumentParser, dest: str = \"\"):\n", " \"\"\"Adds command-line arguments for this Method to an argument parser.\"\"\"\n", " parser.add_arguments(cls.HParams, \"hparams\")\n", "\n", " @classmethod\n", " def from_argparse_args(cls, args, dest: str = \"\"):\n", " \"\"\"Creates an instance of this Method from the parsed arguments.\"\"\"\n", " hparams: cls.HParams = args.hparams\n", " return cls(hparams=hparams)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "output_type": "stream", "name": "stderr", "text": [ "2021-02-25:17:29:01,958 INFO [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:184] Starting training on task 0.\n", "2021-02-25:17:29:01,959 WARNING [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:148] On a task boundary, but since your method doesn't have an `on_task_switch` method, it won't know about it! \n", "2021-02-25:17:29:02,13 INFO [/home/fabrice/repos/Sequoia/sequoia/settings/passive/cl/class_incremental_setting.py:433] Number of train tasks: 5.\n", "2021-02-25:17:29:02,14 INFO [/home/fabrice/repos/Sequoia/sequoia/settings/passive/cl/class_incremental_setting.py:434] Number of test tasks: 5.\n", "Training Epoch 0: 100%|██████████| 300/300 [00:04<00:00, 64.17it/s, accuracy=1]\n", "Validation Epoch 0: 100%|██████████| 75/75 [00:00<00:00, 155.53it/s, accuracy=1, val_loss=tensor(3.1905)]\n", "2021-02-25:17:29:07,205 INFO [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:212] Finished Training on task 0.\n", "2021-02-25:17:29:07,246 INFO [/home/fabrice/repos/Sequoia/sequoia/settings/passive/cl/class_incremental_setting.py:433] Number of train tasks: 5.\n", "2021-02-25:17:29:07,246 INFO [/home/fabrice/repos/Sequoia/sequoia/settings/passive/cl/class_incremental_setting.py:434] Number of test tasks: 5.\n", "2021-02-25:17:29:07,274 INFO [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:347] Will query the method for actions at each step, since it doesn't implement a `test` method.\n", "Test: 0%| | 0/312 [00:00}" ] }, "metadata": {}, "execution_count": 6 }, { "output_type": "display_data", "data": { "text/plain": "
", "image/svg+xml": "\n\n\n\n \n \n \n \n 2021-02-25T17:29:31.358397\n image/svg+xml\n \n \n Matplotlib v3.3.4, https://matplotlib.org/\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYIAAAEWCAYAAABrDZDcAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAAAdO0lEQVR4nO3de7wVdd328c+1YXPLVu+QQCXQSINQCXe6RTuY3CpEaHqTGeKBDj7QCStPBSqmhlooeaRb8cmbNExNyVBRKNuJ8oiAhoqSCUaCmghBHrah6Pf5YwZcbPZhbWDWYu+53q/Xejnzm9+a9Z3lZq41v5k1SxGBmZnlV0W5CzAzs/JyEJiZ5ZyDwMws5xwEZmY55yAwM8s5B4GZWc45CCx3JE2RNL7cdZhtLxwEtt2T9EbB4z1JbxXMn1SiGqZIWi+pWylez6yUHAS23YuInTY8gBeALxS0Tc369SXtCBwH/As4OevXq/fa7Uv5epZPDgJrtST1l/SIpLWSXpZ0raQO6TJJukLSSkmvSXpKUt8G1rGzpFpJV0tSIy91HLAWuAj4Sr3nd5b0v5JekrRG0l0Fy46VtDB9/aWSBqftyyQdWdDvAkm/Sqd7SgpJp0p6Afhj2v4bSf+Q9C9JsyXtV/D8jpImSvp7uvzhtO1eSafVq/dJSUNb8DZbDjgIrDV7Fzgd6AJ8EjgC+Ha6bBDwWaA38AHgy8DqwidL+iDwADAnIr4bjd9v5SvAr4FbgT6SDixYdjNQBewH7Apcka67P3ATcDbQKa1lWQu27TBgH+Bz6fx9QK/0NR4HCo+ELgcOBD4FdAZ+ALwH/JKCIxhJ+wPdgXtbUAeSvidpkaSnJX1/w7rSEH5K0t2S/jNt/3QaNgsk9UrbOkmaJalV7G8k3Zh+gFhU0NZZ0u8lPZf+d5e0XemHiCXpdh+Qtn9M0mNp2yfTtvaS/iCpqjxb1oSI8MOPVvMg2Zke2ciy7wO/TacPB/4KHAJU1Os3BbgRWASc3czr7UmyU61O52cCV6XT3dJluzTwvOuBK4rZBuAC4FfpdE8ggL2aqKlT2ucDJB/m3gL2b6DfDsAaoFc6fznw8xa+333T96kKaA/8AfgoMB84LO3zdeDH6fQ0oAfwGWBiwesOKPffTgu2+bPAAcCigrYJwJh0egzw03R6CElIK/1bezRt/1n6HvQA7kzbTgO+Wu7ta+jRKhLarCGSeku6Jx0yeQ24hOTogIj4I3AtMAlYKWnyhk+tqaOAjsB1zbzMKcDiiFiYzk8FTpRUCewB/DMi1jTwvD2ApVu4aQDLN0xIaifpJ+nw0mu8f2TRJX3s0NBrRcS/gduAk9NP48NJjmBaYh+SnVtdRKwHHgS+SHKkNTvt83uS4TOAd0hCowp4R9LewB4R8acWvm7ZRMRs4J/1mo8lOcIi/e9/F7TfFIm5QKf0goL670Mn4AskR4nbHQeBtWb/A/yF5BPvfwLnkHwyAyAiro6IA4F9SXZcZxc89wbgfmBGejK4MSOAvdKw+QfJJ70uJJ8ElwOd03/k9S0H9m5knW+S7CA22L2BPoXDVCeS7HCOJDkK6Jm2C1gF/LuJ1/olcBLJsFldRDzSSL/GLAIOlfTBdEhjCEnIPZ3WBHB82gZwKcnObixJEF8MnNfC19we7RYRL6fT/wB2S6e7UxDawIq0bRLJ3+MvST6gjAMuiYj3SlNuyzgIrDXbGXgNeENSH+BbGxZIOkjSwekn9zdJdpb1/xGOBp4F7pbUsf7K07HdvYH+QHX66AvcAoxIdwz3AT+XtIukSkmfTZ/+C+Brko6QVCGpe1ojwELghLR/DfClIrZzHck5jiqSHQsA6Y7lRuBnkj6UHj18UtJ/pMsfSbd7Ii0/GiAiFgM/BWaRBOdCknMzXwe+LemxtL630/4LI+KQiPgvYC/gZZKh9Nsk/UrSbg28TKsSyThPk/fvj4gXImJARHwSqCMZIlos6eb0vehdilqLVu6xKT/8aMmDgvF1krHcvwBvAA+RXNXzcLrsCODJdNkqkiGdndJlU4Dx6XQFySfYWcAO9V7rOtLx3Xrt/Ul2zJ3Txy+BV0jG46cV9Bua1vA6sAT4XNq+F/BoWtu9wNVsfo6gfcF6dgJ+l67n7yRHKQF8NF3eEbgSeJHkEtfZQMeC559HM+cdWvD+XwJ8u15bb2BevTal72nn9L3/MMkJ8IvL/TdU5Hb2ZNNzBM8C3dLpbsCz6fT1wPCG+hW03UZyov/i9D34MDC13NtY+FBaqJm1UZJGAKMi4jNb+PxdI2KlpD1Jdu6HAB3StgqSYP1TRNxY8JyvkJxEv1LSb4HvkuxcvxgRp2/dFmVPUk/gnojom85fBqyOiJ9IGgN0jogfSDqK5MhyCHAwcHVE9C9Yz2HAf0fE6ZKuIDmZviztt91cxusvq5i1Yem4/reBn2/Fau5ML7V9B/hORKxNLyn9Trp8GvC/9V7zqySX8EJyXmUGyfDRiVtRR0lI+jUwAOgiaQXwI+AnwO2STiU5Kvty2n0GSQgsIRkC+lrBekRyNDYsbZpMcnTUnoJhzO1BZkcEkm4EjgZWbkjVessFXEXyJtaRXFb1eCbFmOWQpM+R7KT/ABwXyVU/ZpvJ8mTxFGBwE8s/TzJu1gsYRXIFiJltIxExMyJ2jIhjHQLWlMyCIBq+FrdQY9ffmplZCZXzHEFj19++XL+jpFEkRw3suOOOB/bp06d+FzMza8Jjjz22KiK6NrSsVZwsjojJJCdaqKmpiQULFpS5IjNrLXqOadGtlbZry35y1BY/V9LfG1tWziB4kfe/jQjJFy5eLFMtZm1aW9kZbs2O0BpXzm8WTwdGpHfvOwT4V7z/FW4zMyuRzI4IGrkWtxIgIq6jietvzcysdDILgogY3szyAL7TVB8zM8uebzpnVoSrrrqKvn37st9++3HllVcCcPbZZ9OnTx/69evH0KFDWbt2LQBz5syhX79+1NTU8NxzzwGwdu1aBg0axHvvbZc3n7SccxCYNWPRokXccMMNzJs3jyeeeIJ77rmHJUuWMHDgQBYtWsSTTz5J7969ufTSSwGYOHEiM2bM4Morr+S665KfOxg/fjznnHMOFRX+J2fbH/9VmjVj8eLFHHzwwVRVVdG+fXsOO+wwpk2bxqBBg2jfPhldPeSQQ1ixYgUAlZWV1NXVUVdXR2VlJUuXLmX58uUMGDCgjFth1rhW8T0Cs3Lq27cv5557LqtXr6Zjx47MmDGDmpqaTfrceOONDBuW3Fts7NixjBgxgo4dO3LzzTdz1llnMX78+HKUblYUB4FZM/bZZx9++MMfMmjQIHbccUeqq6tp167dxuUXX3wx7du356STTgKgurqauXPnAjB79my6detGRDBs2DAqKyuZOHEiu+3W6n+fxdoQDw2ZFeHUU0/lscceY/bs2eyyyy707p38wNSUKVO45557mDp1KskNdd8XEYwfP55x48Zx4YUXMmHCBEaOHMnVV19djk0wa5SPCMyKsHLlSnbddVdeeOEFpk2bxty5c7n//vuZMGECDz74IFVVVZs956abbmLIkCF07tyZuro6KioqqKiooK6urgxbYNY4B4FZEY477jhWr15NZWUlkyZNolOnTowePZp169YxcOBAIDlhvOEqobq6OqZMmcKsWbMAOOOMMxgyZAgdOnTglltuKdt2mDXEQWBWhIceemiztiVLljTav6qqitra2o3zhx56KE899VQmtZltLZ8jMDPLOQeBmVnOOQjMzHLO5wgsF9rK/fjB9+S3bc9HBGZmOecgMDPLOQeBmVnOOQjMzHLOQWBmlnMOAjOznHMQmJnlnIPAzCznHARmZjnnIDAzyzkHgZlZzjkIzMxyzkFgZpZzDgIzs5xzEJiZ5ZyDwMws5xwEZmY55yAwM8s5B4GZWc45CMzMcs5BYGaWcw4CM7OcyzQIJA2W9KykJZLGNLB8T0m1kv4s6UlJQ7Ksx8zMNpdZEEhqB0wCPg/sCwyXtG+9bucBt0fEJ4ATgJ9nVY+ZmTUsyyOC/sCSiHg+It4GbgWOrdcngP9Mpz8AvJRhPWZm1oAsg6A7sLxgfkXaVugC4GRJK4AZwGkNrUjSKEkLJC149dVXs6jVzCy3yn2yeDgwJSJ6AEOAmyVtVlNETI6Imoio6dq1a8mLNDNry7IMgheBPQrme6RthU4FbgeIiEeAHYAuGdZkZmb1ZBkE84Fekj4iqQPJyeDp9fq8ABwBIGkfkiDw2I+ZWQllFgQRsR4YDcwEFpNcHfS0pIskHZN2OxMYKekJ4NfAVyMisqrJzMw21z7LlUfEDJKTwIVt5xdMPwN8OssazMysaeU+WWxmZmXmIDAzyzkHgZlZzjkIzMxyzkFgZpZzDgIzs5xzEJiZ5ZyDwMws5xwEZmY55yAwM8s5B4GZWc45CMzMcs5BYGaWcw4CM7OccxCYmeWcg8DMLOccBGZmOecgMDPLOQeBmVnOOQjMzHLOQWBmlnMOAjOznHMQmJnlnIPAzCznHARmZjnnIDAzyzkHgZlZzjkIzMxyzkFgZpZzDgIzs5xzEJiZ5ZyDwMws5xwEZmY55yAwM8u5TINA0mBJz0paImlMI32+LOkZSU9LuiXLeszMbHPts1qxpHbAJGAgsAKYL2l6RDxT0KcXMBb4dESskbRrVvWYmVnDsjwi6A8siYjnI+Jt4Fbg2Hp9RgKTImINQESszLAe20o9e/bk4x//ONXV1dTU1ABwwQUX0L17d6qrq6murmbGjBkAzJkzh379+lFTU8Nzzz0HwNq1axk0aBDvvfde2bbBzDaX2REB0B1YXjC/Aji4Xp/eAJLmAO2ACyLi/vorkjQKGAWw5557ZlKsFae2tpYuXbps0nb66adz1llnbdI2ceJEZsyYwbJly7juuuuYOHEi48eP55xzzqGiwqemzLYn5f4X2R7oBQwAhgM3SOpUv1NETI6Imoio6dq1a2krtC1SWVlJXV0ddXV1VFZWsnTpUpYvX86AAQPKXZqZ1dNsEEj6gqQtCYwXgT0K5nukbYVWANMj4p2I+BvwV5Jg2C41NDSywcSJE5HEqlWrALjzzjvZb7/9OPTQQ1m9ejUAS5cuZdiwYSWve1uRxKBBgzjwwAOZPHnyxvZrr72Wfv368fWvf501a9YAMHbsWEaMGMGll17K6NGjOffccxk/fny5SjezJhSzgx8GPCdpgqQ+LVj3fKCXpI9I6gCcAEyv1+cukqMBJHUhGSp6vgWvUXK1tbUsXLiQBQsWbGxbvnw5s2bN2mTY6pprrmH+/Pl84xvf4JZbkouhzjvvvFa9M3z44Yd5/PHHue+++5g0aRKzZ8/mW9/6FkuXLmXhwoV069aNM888E4Dq6mrmzp1LbW0tzz//PN26dSMiGDZsGCeffDKvvPJKmbfGzDZoNggi4mTgE8BSYIqkRySNkrRzM89bD4wGZgKLgdsj4mlJF0k6Ju02E1gt6RmgFjg7IlZvxfaUxemnn86ECROQtLGtoqKCdevWbRwaeeihh9h9993p1Wu7PeBpVvfu3QHYddddGTp0KPPmzWO33XajXbt2VFRUMHLkSObNm7fJcyKC8ePHM27cOC688EImTJjAyJEjufrqq8uxCWbWgKKGfCLiNeAOkit/ugFDgcclndbM82ZERO+I2DsiLk7bzo+I6el0RMQZEbFvRHw8Im7dqq3JWENDI7/73e/o3r07+++//yZ9x44dy5FHHsndd9/N8OHD+fGPf8y4cePKUfY28eabb/L6669vnJ41axZ9+/bl5Zdf3tjnt7/9LX379t3keTfddBNDhgyhc+fO1NXVUVFRQUVFBXV1dSWt38wa1+xVQ+mn968BHwVuAvpHxEpJVcAzwDXZlrj9ePjhh+nevTsrV65k4MCB9OnTh0suuYRZs2Zt1nfgwIEMHDgQeH9n+Ne//pXLL7+cXXbZhauuuoqqqqpSb8IWe+WVVxg6dCgA69ev58QTT2Tw4MGccsopLFy4EEn07NmT66+/fuNz6urqmDJlysb354wzzmDIkCF06NBh43CZmZVfMZePHgdcERGzCxsjok7SqdmUtX2qPzTy4IMP8re//W3j0cCKFSs44IADmDdvHrvvvjvw/s5w5syZHH300UybNo077riDqVOnMnLkyLJtS0vttddePPHEE5u133zzzY0+p6qqitra2o3zhx56KE899VQm9ZnZlitmaOgCYOPAr6SOknoCRMQD2ZS1/WloaOSggw5i5cqVLFu2jGXLltGjRw8ef/zxjSEAcNlll/Hd736XyspK3nrrLSR5aMTMtivFHBH8BvhUwfy7adtBmVS0nWpsaKQpL730EvPmzeNHP/oRAKeddhoHHXQQnTp14q677sq6ZDOzohQTBO3TW0QAEBFvp5eD5kpjQyOFli1btsn8hz70Ie69996N88cffzzHH398FuWZmW2xYoLgVUnHbLjSR9KxwKpsy7Is9Bxzb/OdWoFlPzmq3CWYtSnFBME3gamSrgVEcv+gEZlWZWZmJdNsEETEUuAQSTul829kXpWZmZVMUXcflXQUsB+ww4Zvz0bERRnWlYm2MjQCHh4xs22nmJvOXUdyv6HTSIaGjgc+nHFdZmZWIsV8j+BTETECWBMRFwKfJP0dATMza/2KCYJ/p/+tk/Qh4B2S+w2ZmVkbUMw5grvTH4u5DHgcCOCGLIsyM7PSaTII0h+keSAi1gJ3SroH2CEi/lWK4szMLHtNDg1FxHvApIL5dQ4BM7O2pZhzBA9IOk6Fv7piZmZtRjFB8A2Sm8ytk/SapNclvZZxXWZmViLFfLO4yZ+kNDOz1q2YXyj7bEPt9X+oxszMWqdiLh89u2B6B6A/8BhweCYVmZlZSRUzNPSFwnlJewBXZlWQmZmVVjEni+tbAeyzrQsxM7PyKOYcwTUk3yaGJDiqSb5hbGZmbUAx5wgWFEyvB34dEXMyqsfMzEqsmCC4A/h3RLwLIKmdpKqIqMu2NDMzK4WivlkMdCyY7wj8IZtyzMys1IoJgh0Kf54yna7KriQzMyulYoLgTUkHbJiRdCDwVnYlmZlZKRVzjuD7wG8kvUTyU5W7k/x0pZmZtQHFfKFsvqQ+wMfSpmcj4p1syzIzs1Ip5sfrvwPsGBGLImIRsJOkb2dfmpmZlUIx5whGpr9QBkBErAFGZlaRmZmVVDFB0K7wR2kktQM6ZFeSmZmVUjEni+8HbpN0fTr/DeC+7EoyM7NSKiYIfgiMAr6Zzj9JcuWQmZm1Ac0ODaU/YP8osIzktwgOBxYXs3JJgyU9K2mJpDFN9DtOUkiqKa5sMzPbVho9IpDUGxiePlYBtwFExH8Vs+L0XMIkYCDJravnS5oeEc/U67cz8D2SsDEzsxJr6ojgLySf/o+OiM9ExDXAuy1Yd39gSUQ8HxFvA7cCxzbQ78fAT4F/t2DdZma2jTQVBF8EXgZqJd0g6QiSbxYXqzuwvGB+Rdq2UXrrij0i4t6mViRplKQFkha8+uqrLSjBzMya02gQRMRdEXEC0AeoJbnVxK6S/kfSoK19YUkVwM+AM5vrGxGTI6ImImq6du26tS9tZmYFijlZ/GZE3JL+dnEP4M8kVxI150Vgj4L5HmnbBjsDfYE/SVoGHAJM9wljM7PSatFvFkfEmvTT+RFFdJ8P9JL0EUkdgBOA6QXr+ldEdImInhHRE5gLHBMRCxpenZmZZWFLfry+KBGxHhgNzCS53PT2iHha0kWSjsnqdc3MrGWK+ULZFouIGcCMem3nN9J3QJa1mJlZwzI7IjAzs9bBQWBmlnMOAjOznHMQmJnlnIPAzCznHARmZjnnIDAzyzkHgZlZzjkIzMxyzkFgZpZzDgIzs5xzEJiZ5ZyDwMws5xwEZmY55yAwM8s5B4GZWc45CMzMcs5BYGaWcw4CM7OccxCYmeWcg8DMLOccBGZmOecgMDPLOQeBmVnOOQjMzHLOQWBmlnMOAjOznHMQmJnlnIPAzCznHARmZjnnIDAzyzkHgZlZzjkIzMxyzkFgZpZzmQaBpMGSnpW0RNKYBpafIekZSU9KekDSh7Osx8zMNpdZEEhqB0wCPg/sCwyXtG+9bn8GaiKiH3AHMCGreszMrGFZHhH0B5ZExPMR8TZwK3BsYYeIqI2IunR2LtAjw3rMzKwBWQZBd2B5wfyKtK0xpwL3NbRA0ihJCyQtePXVV7dhiWZmtl2cLJZ0MlADXNbQ8oiYHBE1EVHTtWvX0hZnZtbGtc9w3S8CexTM90jbNiHpSOBc4LCIWJdhPWZm1oAsjwjmA70kfURSB+AEYHphB0mfAK4HjomIlRnWYmZmjcgsCCJiPTAamAksBm6PiKclXSTpmLTbZcBOwG8kLZQ0vZHVmZlZRrIcGiIiZgAz6rWdXzB9ZJavb2ZmzdsuThabmVn5OAjMzHLOQWBmlnMOAjOznHMQmJnlnIPAzCznHARmZjnnIDAzyzkHgZlZzjkIzMxyzkFgZpZzDgIzs5xzEJiZ5ZyDwMws5xwEZmY55yAwM8s5B4GZWc45CMzMcs5BYGaWcw4CM7OccxCYmeWcg8DMLOccBGZmOecgMDPLOQeBmVnOOQjMzHLOQWBmlnMOAjOznHMQmJnlnIPAzCznHARmZjnnIDAzyzkHgZlZzjkIzMxyzkFgZpZzmQaBpMGSnpW0RNKYBpb/h6Tb0uWPSuqZZT1mZra5zIJAUjtgEvB5YF9guKR963U7FVgTER8FrgB+mlU9ZmbWsCyPCPoDSyLi+Yh4G7gVOLZen2OBX6bTdwBHSFKGNZmZWT2KiGxWLH0JGBwR/yedPwU4OCJGF/RZlPZZkc4vTfusqreuUcCodPZjwLOZFL3tdAFWNdurbfK251eet781bPuHI6JrQwval7qSLRERk4HJ5a6jWJIWRERNuesoB297Prcd8r39rX3bsxwaehHYo2C+R9rWYB9J7YEPAKszrMnMzOrJMgjmA70kfURSB+AEYHq9PtOBr6TTXwL+GFmNVZmZWYMyGxqKiPWSRgMzgXbAjRHxtKSLgAURMR34BXCzpCXAP0nCoi1oNcNYGfC251eet79Vb3tmJ4vNzKx18DeLzcxyzkFgZpZzDoJtqLlbarRlkm6UtDL9bkiuSNpDUq2kZyQ9Lel75a6pVCTtIGmepCfSbb+w3DWVg6R2kv4s6Z5y17IlHATbSJG31GjLpgCDy11EmawHzoyIfYFDgO/k6P/9OuDwiNgfqAYGSzqkvCWVxfeAxeUuYks5CLadYm6p0WZFxGySK79yJyJejojH0+nXSXYI3ctbVWlE4o10tjJ95OoKFEk9gKOA/1vuWraUg2Db6Q4sL5hfQU52Bva+9A66nwAeLXMpJZMOiywEVgK/j4jcbHvqSuAHwHtlrmOLOQjMthFJOwF3At+PiNfKXU+pRMS7EVFNcveA/pL6lrmkkpF0NLAyIh4rdy1bw0Gw7RRzSw1royRVkoTA1IiYVu56yiEi1gK15Otc0aeBYyQtIxkOPlzSr8pbUss5CLadYm6pYW1Qeuv0XwCLI+Jn5a6nlCR1ldQpne4IDAT+UtaiSigixkZEj4joSfJv/o8RcXKZy2oxB8E2EhHrgQ231FgM3B4RT5e3qtKR9GvgEeBjklZIOrXcNZXQp4FTSD4NLkwfQ8pdVIl0A2olPUnyYej3EdEqL6HMM99iwsws53xEYGaWcw4CM7OccxCYmeWcg8DMLOccBGZmOdcqfrzerJwkfRB4IJ3dHXgXeDWd75/eW6qp538VqImI0ZkVabYVHARmzYiI1SR31kTSBcAbEXF5OWsy25Y8NGS2BSSNlDQ/vQ//nZKq0vbjJS1K22c38LyjJD0iqUvpqzZrmIPAbMtMi4iD0vvwLwY2fJP6fOBzafsxhU+QNBQYAwyJiFUlrdasCR4aMtsyfSWNBzoBO5HcWgRgDjBF0u1A4c3nDgdqgEF5ujOptQ4+IjDbMlOA0RHxceBCYAeAiPgmcB7JnWgfS080AywFdgZ6l75Us6Y5CMy2zM7Ay+ntp0/a0Chp74h4NCLOJ7myaMOtyf8OHAfcJGm/kldr1gQHgdmWGUfyK2Rz2PS2y5dJekrSIuD/AU9sWBARfyEJjd9I2ruUxZo1xXcfNTPLOR8RmJnlnIPAzCznHARmZjnnIDAzyzkHgZlZzjkIzMxyzkFgZpZz/x/jOYg2+yx1FwAAAABJRU5ErkJggg==\n" }, "metadata": { "needs_background": "light" } } ], "source": [ "results.make_plots()" ] }, { "source": [ "As you can see, our model's performance quickly deteriorates as new tasks are learned, a process refered to as \"Catastrophic Forgetting\".\n", "Next, we'll try to do something about it.\n" ], "cell_type": "markdown", "metadata": {} }, { "source": [ "## Adding a CL Mechanism\n", "\n", "First, by taking a look at the logs above, you will notice that we are told that our Method doesn't have an `on_task_switch` method.\n", "\n", "A Setting would call this `on_task_switch` method during training or evaluation if we are allowed to know when task boundaries occur in that setting. Additionally, if it's allowed in that Setting, we might also receive the index of the new task we are switching to.\n", "\n", "Using this information, here we will add an EWC-like penalty to our model, which will prevent its weights from changing too much between tasks. We'll use the `on_task_switch` method to update the 'anchor' weights everytime a task boundary is encountered.\n" ], "cell_type": "markdown", "metadata": {} }, { "source": [], "cell_type": "markdown", "metadata": {} }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "from copy import deepcopy\n", "from sequoia.utils import dict_intersection\n", "\n", "class MyImprovedModel(MyModel):\n", " \"\"\" Adds an ewc-like penalty to the demo model. \"\"\"\n", " def __init__(self,\n", " observation_space: gym.Space,\n", " action_space: gym.Space,\n", " reward_space: gym.Space,\n", " ewc_coefficient: float = 1.0,\n", " ewc_p_norm: int = 2,\n", " ):\n", " super().__init__(\n", " observation_space,\n", " action_space,\n", " reward_space,\n", " )\n", " self.ewc_coefficient = ewc_coefficient\n", " self.ewc_p_norm = ewc_p_norm\n", "\n", " self.previous_model_weights: Dict[str, Tensor] = {}\n", "\n", " self._previous_task: Optional[int] = None\n", " self._n_switches: int = 0\n", "\n", " def shared_step(self, batch: Tuple[Observations, Rewards], *args, **kwargs):\n", " base_loss, metrics = super().shared_step(batch, *args, **kwargs)\n", " ewc_loss = self.ewc_coefficient * self.ewc_loss()\n", " metrics[\"ewc_loss\"] = ewc_loss\n", " return base_loss + ewc_loss, metrics\n", "\n", " def on_task_switch(self, task_id: Optional[int])-> None:\n", " \"\"\" Executed when the task switches (to either a known or unknown task).\n", " \"\"\"\n", " if self._previous_task is None and self._n_switches == 0:\n", " print(\"Starting the first task, no EWC update.\")\n", " elif task_id is None or task_id != self._previous_task:\n", " # NOTE: We also switch between unknown tasks.\n", " print(f\"Switching tasks: {self._previous_task} -> {task_id}: \")\n", " print(f\"Updating the EWC 'anchor' weights.\")\n", " self._previous_task = task_id\n", " self.previous_model_weights.clear()\n", " self.previous_model_weights.update(deepcopy({\n", " k: v.detach() for k, v in self.named_parameters()\n", " }))\n", " self._n_switches += 1\n", "\n", " def ewc_loss(self) -> Tensor:\n", " \"\"\"Gets an 'ewc-like' regularization loss.\n", "\n", " NOTE: This is a simplified version of EWC where the loss is the P-norm\n", " between the current weights and the weights as they were on the begining\n", " of the task.\n", " \"\"\"\n", " if self._previous_task is None:\n", " # We're in the first task: do nothing.\n", " return 0.\n", "\n", " old_weights: Dict[str, Tensor] = self.previous_model_weights\n", " new_weights: Dict[str, Tensor] = dict(self.named_parameters())\n", "\n", " loss = 0.\n", " for weight_name, (new_w, old_w) in dict_intersection(new_weights, old_weights):\n", " loss += torch.dist(new_w, old_w.type_as(new_w), p=self.ewc_p_norm)\n", " return loss\n" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "\n", "class ImprovedDemoMethod(DemoMethod):\n", " \"\"\" Improved version of the demo method, that adds an ewc-like regularizer.\n", " \"\"\"\n", " # Name of this method: \n", " @dataclass\n", " class HParams(DemoMethod.HParams):\n", " \"\"\" Hyperparameters of this new improved method. (Adds ewc params).\"\"\"\n", " # Coefficient of the ewc-like loss.\n", " ewc_coefficient: float = 1.0\n", " # Distance norm used in the ewc loss.\n", " ewc_p_norm: int = 2\n", "\n", " def __init__(self, hparams: HParams):\n", " super().__init__(hparams=hparams)\n", " \n", " def configure(self, setting: ClassIncrementalSetting):\n", " # Use the improved model, with the added EWC-like term.\n", " self.model = MyImprovedModel(\n", " observation_space=setting.observation_space,\n", " action_space=setting.action_space,\n", " reward_space=setting.reward_space,\n", " ewc_coefficient=self.hparams.ewc_coefficient,\n", " ewc_p_norm = self.hparams.ewc_p_norm,\n", " )\n", " self.optimizer = torch.optim.Adam(self.model.parameters(), lr=self.hparams.learning_rate)\n", "\n", " def on_task_switch(self, task_id: Optional[int]):\n", " self.model.on_task_switch(task_id)" ] }, { "source": [ "## Running the \"Improved\" method" ], "cell_type": "markdown", "metadata": {} }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "output_type": "stream", "name": "stderr", "text": [ "2021-02-25:17:29:31,526 INFO [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:184] Starting training on task 0.\n", "2021-02-25:17:29:31,580 INFO [/home/fabrice/repos/Sequoia/sequoia/settings/passive/cl/class_incremental_setting.py:433] Number of train tasks: 5.\n", "2021-02-25:17:29:31,581 INFO [/home/fabrice/repos/Sequoia/sequoia/settings/passive/cl/class_incremental_setting.py:434] Number of test tasks: 5.\n", "Training Epoch 0: 0%| | 0/300 [00:00 None: \n", "Updating the EWC 'anchor' weights.\n", "Switching tasks: None -> None: \n", "Updating the EWC 'anchor' weights.\n", "Switching tasks: None -> None: \n", "Updating the EWC 'anchor' weights.\n", "Switching tasks: None -> None: \n", "Updating the EWC 'anchor' weights.\n", "Switching tasks: None -> None: \n", "Updating the EWC 'anchor' weights.\n", "Test: 100%|██████████| 312/312 [00:01<00:00, 239.22it/s]\n", "2021-02-25:17:29:37,352 INFO [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:217] Resulting objective of Test Loop: 0.690505\n", "2021-02-25:17:29:37,353 INFO [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:184] Starting training on task 1.\n", "Training Epoch 0: 0%| | 0/300 [00:00 1: \n", "Updating the EWC 'anchor' weights.\n", "Training Epoch 0: 100%|██████████| 300/300 [00:05<00:00, 59.70it/s, accuracy=0.875, ewc_loss=tensor(0.2296, grad_fn=)]\n", "Validation Epoch 0: 100%|██████████| 75/75 [00:00<00:00, 143.94it/s, accuracy=0.969, ewc_loss=tensor(0.2221), val_loss=tensor(33.0478)]\n", "2021-02-25:17:29:42,905 INFO [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:212] Finished Training on task 1.\n", "2021-02-25:17:29:42,909 INFO [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:347] Will query the method for actions at each step, since it doesn't implement a `test` method.\n", "Test: 12%|█▎ | 39/312 [00:00<00:01, 190.68it/s]Switching tasks: 1 -> None: \n", "Updating the EWC 'anchor' weights.\n", "Switching tasks: None -> None: \n", "Updating the EWC 'anchor' weights.\n", "Switching tasks: None -> None: \n", "Updating the EWC 'anchor' weights.\n", "Switching tasks: None -> None: \n", "Updating the EWC 'anchor' weights.\n", "Switching tasks: None -> None: \n", "Updating the EWC 'anchor' weights.\n", "Test: 100%|██████████| 312/312 [00:01<00:00, 218.28it/s]\n", "2021-02-25:17:29:44,441 INFO [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:217] Resulting objective of Test Loop: 0.745092\n", "2021-02-25:17:29:44,442 INFO [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:184] Starting training on task 2.\n", "Training Epoch 0: 0%| | 0/300 [00:00 2: \n", "Updating the EWC 'anchor' weights.\n", "Training Epoch 0: 100%|██████████| 300/300 [00:05<00:00, 54.67it/s, accuracy=0.906, ewc_loss=tensor(0.3728, grad_fn=)]\n", "Validation Epoch 0: 100%|██████████| 75/75 [00:00<00:00, 162.51it/s, accuracy=0.906, ewc_loss=tensor(0.3689), val_loss=tensor(43.5458)]\n", "2021-02-25:17:29:50,398 INFO [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:212] Finished Training on task 2.\n", "2021-02-25:17:29:50,402 INFO [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:347] Will query the method for actions at each step, since it doesn't implement a `test` method.\n", "Test: 15%|█▍ | 46/312 [00:00<00:01, 231.12it/s]Switching tasks: 2 -> None: \n", "Updating the EWC 'anchor' weights.\n", "Switching tasks: None -> None: \n", "Updating the EWC 'anchor' weights.\n", "Switching tasks: None -> None: \n", "Updating the EWC 'anchor' weights.\n", "Switching tasks: None -> None: \n", "Updating the EWC 'anchor' weights.\n", "Switching tasks: None -> None: \n", "Updating the EWC 'anchor' weights.\n", "Test: 100%|██████████| 312/312 [00:01<00:00, 239.81it/s]\n", "2021-02-25:17:29:51,801 INFO [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:217] Resulting objective of Test Loop: 0.915665\n", "2021-02-25:17:29:51,801 INFO [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:184] Starting training on task 3.\n", "Training Epoch 0: 0%| | 0/300 [00:00 3: \n", "Updating the EWC 'anchor' weights.\n", "Training Epoch 0: 100%|██████████| 300/300 [00:05<00:00, 54.25it/s, accuracy=1, ewc_loss=tensor(0.0175, grad_fn=)]\n", "Validation Epoch 0: 100%|██████████| 75/75 [00:00<00:00, 144.31it/s, accuracy=0.969, ewc_loss=tensor(0.0182), val_loss=tensor(8.4141)]\n", "2021-02-25:17:29:57,857 INFO [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:212] Finished Training on task 3.\n", "2021-02-25:17:29:57,861 INFO [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:347] Will query the method for actions at each step, since it doesn't implement a `test` method.\n", "Test: 13%|█▎ | 42/312 [00:00<00:01, 211.24it/s]Switching tasks: 3 -> None: \n", "Updating the EWC 'anchor' weights.\n", "Switching tasks: None -> None: \n", "Updating the EWC 'anchor' weights.\n", "Switching tasks: None -> None: \n", "Updating the EWC 'anchor' weights.\n", "Switching tasks: None -> None: \n", "Updating the EWC 'anchor' weights.\n", "Switching tasks: None -> None: \n", "Updating the EWC 'anchor' weights.\n", "Test: 100%|██████████| 312/312 [00:01<00:00, 231.53it/s]\n", "2021-02-25:17:29:59,316 INFO [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:217] Resulting objective of Test Loop: 0.917368\n", "2021-02-25:17:29:59,317 INFO [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:184] Starting training on task 4.\n", "Training Epoch 0: 0%| | 0/300 [00:00 4: \n", "Updating the EWC 'anchor' weights.\n", "Training Epoch 0: 100%|██████████| 300/300 [00:05<00:00, 55.17it/s, accuracy=1, ewc_loss=tensor(0.0487, grad_fn=)]\n", "Validation Epoch 0: 100%|██████████| 75/75 [00:00<00:00, 147.18it/s, accuracy=0.938, ewc_loss=tensor(0.0635), val_loss=tensor(14.3717)]\n", "2021-02-25:17:30:05,271 INFO [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:212] Finished Training on task 4.\n", "2021-02-25:17:30:05,276 INFO [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:347] Will query the method for actions at each step, since it doesn't implement a `test` method.\n", "Test: 14%|█▍ | 45/312 [00:00<00:01, 219.80it/s]Switching tasks: 4 -> None: \n", "Updating the EWC 'anchor' weights.\n", "Switching tasks: None -> None: \n", "Updating the EWC 'anchor' weights.\n", "Switching tasks: None -> None: \n", "Updating the EWC 'anchor' weights.\n", "Switching tasks: None -> None: \n", "Updating the EWC 'anchor' weights.\n", "Switching tasks: None -> None: \n", "Updating the EWC 'anchor' weights.\n", "Test: 100%|██████████| 312/312 [00:01<00:00, 219.23it/s]\n", "2021-02-25:17:30:06,803 INFO [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:217] Resulting objective of Test Loop: 0.90605\n", "2021-02-25:17:30:06,804 INFO [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:237] Finished main loop in 36.293361921000006 seconds.\n", "2021-02-25:17:30:06,894 INFO [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:257] {\n", "\t\"Task 0\": {\n", "\t\t\"Task 0\": {\n", "\t\t\t\"n_samples\": 1984,\n", "\t\t\t\"accuracy\": 0.981351\n", "\t\t},\n", "\t\t\"Task 1\": {\n", "\t\t\t\"n_samples\": 2016,\n", "\t\t\t\"accuracy\": 0.752976\n", "\t\t},\n", "\t\t\"Task 2\": {\n", "\t\t\t\"n_samples\": 1984,\n", "\t\t\t\"accuracy\": 0.53125\n", "\t\t},\n", "\t\t\"Task 3\": {\n", "\t\t\t\"n_samples\": 2016,\n", "\t\t\t\"accuracy\": 0.640377\n", "\t\t},\n", "\t\t\"Task 4\": {\n", "\t\t\t\"n_samples\": 1984,\n", "\t\t\t\"accuracy\": 0.546371\n", "\t\t}\n", "\t},\n", "\t\"Task 1\": {\n", "\t\t\"Task 0\": {\n", "\t\t\t\"n_samples\": 1984,\n", "\t\t\t\"accuracy\": 0.927419\n", "\t\t},\n", "\t\t\"Task 1\": {\n", "\t\t\t\"n_samples\": 2016,\n", "\t\t\t\"accuracy\": 0.896825\n", "\t\t},\n", "\t\t\"Task 2\": {\n", "\t\t\t\"n_samples\": 1984,\n", "\t\t\t\"accuracy\": 0.457157\n", "\t\t},\n", "\t\t\"Task 3\": {\n", "\t\t\t\"n_samples\": 2016,\n", "\t\t\t\"accuracy\": 0.700397\n", "\t\t},\n", "\t\t\"Task 4\": {\n", "\t\t\t\"n_samples\": 1984,\n", "\t\t\t\"accuracy\": 0.741935\n", "\t\t}\n", "\t},\n", "\t\"Task 2\": {\n", "\t\t\"Task 0\": {\n", "\t\t\t\"n_samples\": 1984,\n", "\t\t\t\"accuracy\": 0.970766\n", "\t\t},\n", "\t\t\"Task 1\": {\n", "\t\t\t\"n_samples\": 2016,\n", "\t\t\t\"accuracy\": 0.780258\n", "\t\t},\n", "\t\t\"Task 2\": {\n", "\t\t\t\"n_samples\": 1984,\n", "\t\t\t\"accuracy\": 0.94254\n", "\t\t},\n", "\t\t\"Task 3\": {\n", "\t\t\t\"n_samples\": 2016,\n", "\t\t\t\"accuracy\": 0.990079\n", "\t\t},\n", "\t\t\"Task 4\": {\n", "\t\t\t\"n_samples\": 1984,\n", "\t\t\t\"accuracy\": 0.895665\n", "\t\t}\n", "\t},\n", "\t\"Task 3\": {\n", "\t\t\"Task 0\": {\n", "\t\t\t\"n_samples\": 1984,\n", "\t\t\t\"accuracy\": 0.972278\n", "\t\t},\n", "\t\t\"Task 1\": {\n", "\t\t\t\"n_samples\": 2016,\n", "\t\t\t\"accuracy\": 0.770833\n", "\t\t},\n", "\t\t\"Task 2\": {\n", "\t\t\t\"n_samples\": 1984,\n", "\t\t\t\"accuracy\": 0.939516\n", "\t\t},\n", "\t\t\"Task 3\": {\n", "\t\t\t\"n_samples\": 2016,\n", "\t\t\t\"accuracy\": 0.990575\n", "\t\t},\n", "\t\t\"Task 4\": {\n", "\t\t\t\"n_samples\": 1984,\n", "\t\t\t\"accuracy\": 0.914819\n", "\t\t}\n", "\t},\n", "\t\"Task 4\": {\n", "\t\t\"Task 0\": {\n", "\t\t\t\"n_samples\": 1984,\n", "\t\t\t\"accuracy\": 0.970766\n", "\t\t},\n", "\t\t\"Task 1\": {\n", "\t\t\t\"n_samples\": 2016,\n", "\t\t\t\"accuracy\": 0.708333\n", "\t\t},\n", "\t\t\"Task 2\": {\n", "\t\t\t\"n_samples\": 1984,\n", "\t\t\t\"accuracy\": 0.88004\n", "\t\t},\n", "\t\t\"Task 3\": {\n", "\t\t\t\"n_samples\": 2016,\n", "\t\t\t\"accuracy\": 0.989583\n", "\t\t},\n", "\t\t\"Task 4\": {\n", "\t\t\t\"n_samples\": 1984,\n", "\t\t\t\"accuracy\": 0.983367\n", "\t\t}\n", "\t},\n", "\t\"Final/Average Online Performance\": 0,\n", "\t\"Final/Average Final Performance\": 0.90605,\n", "\t\"Final/Runtime (seconds)\": 36.293361921000006,\n", "\t\"Final/CL Score\": 0.74363\n", "}\n", "\n", "2021-02-25:17:30:06,997 INFO [/home/fabrice/repos/Sequoia/sequoia/settings/passive/cl/class_incremental_setting.py:395] {\n", "\t\"Task 0\": {\n", "\t\t\"Task 0\": {\n", "\t\t\t\"n_samples\": 1984,\n", "\t\t\t\"accuracy\": 0.981351\n", "\t\t},\n", "\t\t\"Task 1\": {\n", "\t\t\t\"n_samples\": 2016,\n", "\t\t\t\"accuracy\": 0.752976\n", "\t\t},\n", "\t\t\"Task 2\": {\n", "\t\t\t\"n_samples\": 1984,\n", "\t\t\t\"accuracy\": 0.53125\n", "\t\t},\n", "\t\t\"Task 3\": {\n", "\t\t\t\"n_samples\": 2016,\n", "\t\t\t\"accuracy\": 0.640377\n", "\t\t},\n", "\t\t\"Task 4\": {\n", "\t\t\t\"n_samples\": 1984,\n", "\t\t\t\"accuracy\": 0.546371\n", "\t\t}\n", "\t},\n", "\t\"Task 1\": {\n", "\t\t\"Task 0\": {\n", "\t\t\t\"n_samples\": 1984,\n", "\t\t\t\"accuracy\": 0.927419\n", "\t\t},\n", "\t\t\"Task 1\": {\n", "\t\t\t\"n_samples\": 2016,\n", "\t\t\t\"accuracy\": 0.896825\n", "\t\t},\n", "\t\t\"Task 2\": {\n", "\t\t\t\"n_samples\": 1984,\n", "\t\t\t\"accuracy\": 0.457157\n", "\t\t},\n", "\t\t\"Task 3\": {\n", "\t\t\t\"n_samples\": 2016,\n", "\t\t\t\"accuracy\": 0.700397\n", "\t\t},\n", "\t\t\"Task 4\": {\n", "\t\t\t\"n_samples\": 1984,\n", "\t\t\t\"accuracy\": 0.741935\n", "\t\t}\n", "\t},\n", "\t\"Task 2\": {\n", "\t\t\"Task 0\": {\n", "\t\t\t\"n_samples\": 1984,\n", "\t\t\t\"accuracy\": 0.970766\n", "\t\t},\n", "\t\t\"Task 1\": {\n", "\t\t\t\"n_samples\": 2016,\n", "\t\t\t\"accuracy\": 0.780258\n", "\t\t},\n", "\t\t\"Task 2\": {\n", "\t\t\t\"n_samples\": 1984,\n", "\t\t\t\"accuracy\": 0.94254\n", "\t\t},\n", "\t\t\"Task 3\": {\n", "\t\t\t\"n_samples\": 2016,\n", "\t\t\t\"accuracy\": 0.990079\n", "\t\t},\n", "\t\t\"Task 4\": {\n", "\t\t\t\"n_samples\": 1984,\n", "\t\t\t\"accuracy\": 0.895665\n", "\t\t}\n", "\t},\n", "\t\"Task 3\": {\n", "\t\t\"Task 0\": {\n", "\t\t\t\"n_samples\": 1984,\n", "\t\t\t\"accuracy\": 0.972278\n", "\t\t},\n", "\t\t\"Task 1\": {\n", "\t\t\t\"n_samples\": 2016,\n", "\t\t\t\"accuracy\": 0.770833\n", "\t\t},\n", "\t\t\"Task 2\": {\n", "\t\t\t\"n_samples\": 1984,\n", "\t\t\t\"accuracy\": 0.939516\n", "\t\t},\n", "\t\t\"Task 3\": {\n", "\t\t\t\"n_samples\": 2016,\n", "\t\t\t\"accuracy\": 0.990575\n", "\t\t},\n", "\t\t\"Task 4\": {\n", "\t\t\t\"n_samples\": 1984,\n", "\t\t\t\"accuracy\": 0.914819\n", "\t\t}\n", "\t},\n", "\t\"Task 4\": {\n", "\t\t\"Task 0\": {\n", "\t\t\t\"n_samples\": 1984,\n", "\t\t\t\"accuracy\": 0.970766\n", "\t\t},\n", "\t\t\"Task 1\": {\n", "\t\t\t\"n_samples\": 2016,\n", "\t\t\t\"accuracy\": 0.708333\n", "\t\t},\n", "\t\t\"Task 2\": {\n", "\t\t\t\"n_samples\": 1984,\n", "\t\t\t\"accuracy\": 0.88004\n", "\t\t},\n", "\t\t\"Task 3\": {\n", "\t\t\t\"n_samples\": 2016,\n", "\t\t\t\"accuracy\": 0.989583\n", "\t\t},\n", "\t\t\"Task 4\": {\n", "\t\t\t\"n_samples\": 1984,\n", "\t\t\t\"accuracy\": 0.983367\n", "\t\t}\n", "\t},\n", "\t\"Final/Average Online Performance\": 0,\n", "\t\"Final/Average Final Performance\": 0.90605,\n", "\t\"Final/Runtime (seconds)\": 36.293361921000006,\n", "\t\"Final/CL Score\": 0.74363\n", "}\n", "\n" ] } ], "source": [ "improved_method = ImprovedDemoMethod(hparams=ImprovedDemoMethod.HParams())\n", "setting = DomainIncrementalSetting(dataset=\"fashionmnist\")\n", "improved_results = setting.apply(improved_method)" ] }, { "source": [ "## Improved Results" ], "cell_type": "code", "metadata": {}, "execution_count": 10, "outputs": [] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "{\n\t\"Task 0\": {\n\t\t\"Task 0\": {\n\t\t\t\"n_samples\": 1984,\n\t\t\t\"accuracy\": 0.981351\n\t\t},\n\t\t\"Task 1\": {\n\t\t\t\"n_samples\": 2016,\n\t\t\t\"accuracy\": 0.752976\n\t\t},\n\t\t\"Task 2\": {\n\t\t\t\"n_samples\": 1984,\n\t\t\t\"accuracy\": 0.53125\n\t\t},\n\t\t\"Task 3\": {\n\t\t\t\"n_samples\": 2016,\n\t\t\t\"accuracy\": 0.640377\n\t\t},\n\t\t\"Task 4\": {\n\t\t\t\"n_samples\": 1984,\n\t\t\t\"accuracy\": 0.546371\n\t\t}\n\t},\n\t\"Task 1\": {\n\t\t\"Task 0\": {\n\t\t\t\"n_samples\": 1984,\n\t\t\t\"accuracy\": 0.927419\n\t\t},\n\t\t\"Task 1\": {\n\t\t\t\"n_samples\": 2016,\n\t\t\t\"accuracy\": 0.896825\n\t\t},\n\t\t\"Task 2\": {\n\t\t\t\"n_samples\": 1984,\n\t\t\t\"accuracy\": 0.457157\n\t\t},\n\t\t\"Task 3\": {\n\t\t\t\"n_samples\": 2016,\n\t\t\t\"accuracy\": 0.700397\n\t\t},\n\t\t\"Task 4\": {\n\t\t\t\"n_samples\": 1984,\n\t\t\t\"accuracy\": 0.741935\n\t\t}\n\t},\n\t\"Task 2\": {\n\t\t\"Task 0\": {\n\t\t\t\"n_samples\": 1984,\n\t\t\t\"accuracy\": 0.970766\n\t\t},\n\t\t\"Task 1\": {\n\t\t\t\"n_samples\": 2016,\n\t\t\t\"accuracy\": 0.780258\n\t\t},\n\t\t\"Task 2\": {\n\t\t\t\"n_samples\": 1984,\n\t\t\t\"accuracy\": 0.94254\n\t\t},\n\t\t\"Task 3\": {\n\t\t\t\"n_samples\": 2016,\n\t\t\t\"accuracy\": 0.990079\n\t\t},\n\t\t\"Task 4\": {\n\t\t\t\"n_samples\": 1984,\n\t\t\t\"accuracy\": 0.895665\n\t\t}\n\t},\n\t\"Task 3\": {\n\t\t\"Task 0\": {\n\t\t\t\"n_samples\": 1984,\n\t\t\t\"accuracy\": 0.972278\n\t\t},\n\t\t\"Task 1\": {\n\t\t\t\"n_samples\": 2016,\n\t\t\t\"accuracy\": 0.770833\n\t\t},\n\t\t\"Task 2\": {\n\t\t\t\"n_samples\": 1984,\n\t\t\t\"accuracy\": 0.939516\n\t\t},\n\t\t\"Task 3\": {\n\t\t\t\"n_samples\": 2016,\n\t\t\t\"accuracy\": 0.990575\n\t\t},\n\t\t\"Task 4\": {\n\t\t\t\"n_samples\": 1984,\n\t\t\t\"accuracy\": 0.914819\n\t\t}\n\t},\n\t\"Task 4\": {\n\t\t\"Task 0\": {\n\t\t\t\"n_samples\": 1984,\n\t\t\t\"accuracy\": 0.970766\n\t\t},\n\t\t\"Task 1\": {\n\t\t\t\"n_samples\": 2016,\n\t\t\t\"accuracy\": 0.708333\n\t\t},\n\t\t\"Task 2\": {\n\t\t\t\"n_samples\": 1984,\n\t\t\t\"accuracy\": 0.88004\n\t\t},\n\t\t\"Task 3\": {\n\t\t\t\"n_samples\": 2016,\n\t\t\t\"accuracy\": 0.989583\n\t\t},\n\t\t\"Task 4\": {\n\t\t\t\"n_samples\": 1984,\n\t\t\t\"accuracy\": 0.983367\n\t\t}\n\t},\n\t\"Final/Average Online Performance\": 0,\n\t\"Final/Average Final Performance\": 0.90605,\n\t\"Final/Runtime (seconds)\": 36.293361921000006,\n\t\"Final/CL Score\": 0.74363\n}\n\n" ] } ], "source": [ "print(improved_results.summary())" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "{'task_metrics':
}" ] }, "metadata": {}, "execution_count": 12 }, { "output_type": "display_data", "data": { "text/plain": "
", "image/svg+xml": "\n\n\n\n \n \n \n \n 2021-02-25T17:30:07.306773\n image/svg+xml\n \n \n Matplotlib v3.3.4, https://matplotlib.org/\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYIAAAEWCAYAAABrDZDcAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAAAcv0lEQVR4nO3de7xUdb3/8dd76ybBS0RCyUUxDyoXE3GHpNmxLNJtiYimmFodf2IXTEXzaL/0qGEXO4QHo6NmHryDphUZikSURxJ1k4ggoWgkFwskhGRUbp/zx1rosNmX2ciaYe/1fj4e83DWmu+s9VkI857v97vWGkUEZmaWX1WVLsDMzCrLQWBmlnMOAjOznHMQmJnlnIPAzCznHARmZjnnILDckTRB0uhK12G2s3AQ2E5P0utFj82S3iha/kKZapggaaOkfcqxP7NychDYTi8i9tjyAF4GPle07q6s9y9pd2AYsAY4M+v91dv3ruXcn+WTg8BaLUkDJT0u6TVJr0j6saR26WuSNFbSCklrJT0rqV8D29hT0gxJ4ySpkV0NA14DrgG+WO/9nST9j6TlklZL+mXRa0MkzUn3/6Kk49L1iyV9qqjdVZLuTJ/3lBSSzpH0MvC7dP19kv4maY2kRyX1LXp/e0ljJP01ff2xdN1vJJ1fr965koa24I/ZcsBBYK3ZJuAiYG/go8CxwNfS1wYDHwcOBN4LfB5YVfxmSe8HpgMzI+Ib0fj9Vr4I3ANMBA6WdHjRa3cAHYC+QBdgbLrtgcDtwDeBjmkti1twbP8K9AY+ky4/BPRK9/EnoLgn9J/A4cCRQCfgUmAzcBtFPRhJhwLdgN+0oA7LAQeBtVoRMTsiZkXExohYDNxE8gEKsAHYEzgYUEQsiIhXit7eFfgDcF9EfLuxfUjaF/gEcHdE/J0kOM5OX9sHOB74SkSsjogNEfGH9K3nALdGxLSI2BwRyyLizy04vKsiYl1EvJEe660R8c+IeAu4CjhU0nslVQH/BlyQ7mNTRPwxbTcZOFBSr3SbZwGTImJ9C+pA0gWS5kmaL+nCdN2haW/sWUm/lrRXuv6otNdRt2W/kjpKeiSt1XZC/h9jrZakAyU9mA6ZrAW+S9I7ICJ+B/wYGA+skHTzlg+r1AlAe+DGZnZzFrAgIuaky3cBZ0iqBnoA/4iI1Q28rwfw4nYeGsCSLU8k7SLp++nw0lre6VnsnT52a2hfEfEmMAk4M/0QHk7SgylZOpx2LjAQOBT4rKR/AW4BLouIQ4BfkPR8AC4GaoELga+k674NfDciNrdk31Y+DgJrzf4b+DPQKyL2Ar4FvD3OHxHjIuJwoA/JENE3i977U+BhYEo6GdyYs4EPpWHzN+BHJB++tSQf1p0kdWzgfUuAAxrZ5jqS4aQtPthAm+JhqjOAIcCnSIa5eqbrBbwKvNnEvm4DvkAybFaIiMcbadeY3sATEVGIiI0kvaiTSf48H03bTCOZR4GkJ9YhfWyQdADQIyJ+38L97jQa6RH1lzQrnQOqS4cCkTQsbfe/6dAjkg6QNKmCh9AsB4G1ZnsCa4HXJR0MfHXLC5I+IumI9Jv7OpIPy/rfSEcCC4FfS2pff+OSPkryATsQ6J8++gF3A2enQ00PAT+R9D5J1ZI+nr79Z8CXJR0rqUpSt7RGgDnA6Wn7GuCUEo7zLZI5jg4kPR8A0m/ZtwI/ktQ17T18VNJ70tcfT497DC3sDaTmAUdLer+kDiQB2AOYTxJOAKem6wC+RzI3cjlJj+xakh5Bq9REj+g64OqI6A9cmS4DnA98hGSY8ox03Wh28j8DB4G1ZpeQ/GP7J8k3/OJvXXul61YDfyX5EP1h8ZvTyeERwFLgV5J2q7f9LwK/iohnI+JvWx7Af5F8IHQiGTraQNIzWUEyJEJEPAl8mWTyeA3JN+n90u1eQRIwq4GrSYKlKbenx7AMeA6Y1cCfw7PAU8A/gB+w9b/t24FDgDub2c82ImJBur1HSHpQc0gm6f8N+Jqk2SRBtT5tPyciBkXEJ4APAa+QnMQ1SdKdkj7Q0hoqrLEeUZD8HYOkl7Y8fb4ZeA/v9IiOBv4WES+Ut+wWigg/SnwAF5B8Q5oPXJium0Tyj2MOydjtnHT9UcBcoI5k6AKSs0ceAaoqfSx+5OdBMrz12A7a1neBr9VbdyDwZL11Sv+udyKZV9mPZCL/2kr/ebTweHsDzwPvJ/lwfxy4IV3/MskQ4DJgv7T9p4HZwK9JAuIRoFOlj6O5hy9WKVG9LuJ64GFJD0bEaUVtxpB8+4N3Js16kkyaXYwnzazM0uGcrwE/eRfb6BIRK9IzqE4GBhWtqyL5e11/0v1sYEpE/COtYXP66EArEhELJG3pEa3jnR7RV4GLIuJ+SZ8nGQr8VERMI5kzQdLZwBSSM7cuIekBXhARhfIfSdMyGxqSdKuSi3nmNfK6lFzEsyg93WxAVrXsII11EYHkeEjOVb8nXdXmJs2sdZH0GWAl8HeaH35qyv2SniP5lvv1iHgNGC7peZIhseXA/xTttwPwJZIztiCZYJ8CXE/zZ2ntdCLiZxFxeER8nOTD/HmSYcMH0ib3kXxBfFu9P4Or0/aPkUzc73wy7FJ9HBgAzGvk9VqSiTYBg0g+ZCveRWppF7He8dYVLfcnGcudAXQnuRipV6WPww8//GjZA+iS/ndfkuDrCCwAjknXHwvMrvee/wBOSp8/mn5mnEXSI6j4MdV/ZDY0FBGPSurZRJMhwO2R/EnNSi862Se2vuhnpxGNdxG3GM47vQEiOe98EEB6Jsnbk2YkvYWLI7lAycx2bvenp4JuIO0RSToX+C8l94J6k+SkAwAkdQUGRsTV6aobSCbyXwNOKmfhpVKaWNlsPAmCByOioXu8PAh8PyIeS5enA/8eEXUNtB1B+ge9++67H37wwQfXb1J2y5Yto7q6mi5duhARzJ07l969e9OuXbut2kUEL7zwAh/60IdYsmQJXbt2Zf369axdu5Zu3bpVqHozy5vZs2e/GhGdG3qtVUwWR8TNwM0ANTU1UVe3TVaUxYoVK+jSpQsvv/wygwcPZtasWXTs2JGHH36Y733ve/zhD3/Y5j233XYbq1ev5sILL2To0KGMGzeOxYsX88ADDzB27NgKHIWZ5ZGkvzb2WiWDYBnvXIQCyTj6sgrVUpJhw4axatUqqqurGT9+PB07dgRg4sSJDB8+fJv2hUKBCRMm8MgjjwAwatQoamtradeuHXff/W7m7szMdpxKDg2dQHJlZy1wBDAuIgbWb1dfJXsEZq1Vz8vaxg1HF3//hEqX0GpJmh0RNQ29llmPQNI9wDHA3pKWksyiVwNExI0kp5PVAouAAslVmGZmO1RbCUHILgizPGto27GSrV8P4OtZ7d/MzErjew2ZmeVcqzhraEdxF9HMbFvuEZiZ5ZyDwMws5xwEZmY55yAwM8s5B4GZWc45CMzMcs5BYFaCsWPH0rdvX/r168fw4cN58803mT59OgMGDKB///587GMfY9GiRQDccMMN9OvXj9raWtavXw/AY489xkUXXVTJQzBrlIPArBnLli1j3Lhx1NXVMW/ePDZt2sTEiRP56le/yl133cWcOXM444wzGD16NAB33XUXc+fO5cgjj2Tq1KlEBN/5zne44oorKnwkZg1zEJiVYOPGjbzxxhts3LiRQqFA165dkcTatWsBWLNmDV27dgWS36DYsGEDhUKB6upq7rzzTo4//ng6depUyUMwa1Suriw22x7dunXjkksuYd9996V9+/YMHjyYwYMHc8stt1BbW0v79u3Za6+9mDVrFgAjR45k0KBB9O3bl6OOOoohQ4YwderUCh+FWePcIzBrxurVq/nVr37FX/7yF5YvX866deu48847GTt2LFOmTGHp0qV8+ctfZtSoUQCcddZZPP3002+3+cY3vsFDDz3EKaecwkUXXcTmzZsrfERmW3MQmDXjt7/9Lfvvvz+dO3emurqak08+mZkzZ/LMM89wxBFHAHDaaafxxz/+cav3LV++nCeffJKTTjqJMWPGMGnSJDp27Mj06dMrcRhmjXIQmDVj3333ZdasWRQKBSKC6dOn06dPH9asWcPzzz8PwLRp0+jdu/dW77viiiu45pprAHjjjTeQRFVVFYVCoezHYNYUzxGYNeOII47glFNOYcCAAey6664cdthhjBgxgu7duzNs2DCqqqp43/vex6233vr2e55++mkABgwYAMAZZ5zBIYccQo8ePbj00ksrchxmjcn0pyqz8G5+qtK3oba8ait/97fn731bOXZ4d//um/qpSg8NmZnlnIPAzCznHARmZjnnyWLLBY8TmzXOPQIzs5xzEJiZ5ZyDwMws5xwEZmY55yAwM8s5B4GZWc45CMzMcs5BYGaWcw4CM7OccxCYmeWcg8DMLOccBGZmOecgMDPLOQeBmVnOZRoEko6TtFDSIkmXNfD6vpJmSHpa0lxJtVnWY2Zm28osCCTtAowHjgf6AMMl9anX7NvAvRFxGHA68JOs6jEzs4Zl2SMYCCyKiJciYj0wERhSr00Ae6XP3wssz7AeMzNrQJZB0A1YUrS8NF1X7CrgTElLgSnA+Q1tSNIISXWS6lauXJlFrWZmuVXpyeLhwISI6A7UAndI2qamiLg5ImoioqZz585lL9LMrC3LMgiWAT2Klrun64qdA9wLEBGPA7sBe2dYk5mZ1ZNlEDwF9JK0v6R2JJPBk+u1eRk4FkBSb5Ig8NiPmVkZZRYEEbERGAlMBRaQnB00X9I1kk5Mm10MnCvpGeAe4EsREVnVZGZm29o1y41HxBSSSeDidVcWPX8OOCrLGszMrGmVniw2M7MKcxCYmeWcg8BKsnDhQvr37//2Y6+99uL666/nvvvuo2/fvlRVVVFXV/d2+5kzZ/LhD3+YmpoaXnjhBQBee+01Bg8ezObNmyt1GGbWgEznCKztOOigg5gzZw4AmzZtolu3bgwdOpRCocADDzzAeeedt1X7MWPGMGXKFBYvXsyNN97ImDFjGD16NN/61reoqvL3D7OdiYPAWmz69OkccMAB7Lfffo22qa6uplAoUCgUqK6u5sUXX2TJkiUcc8wx5SvUzEriILAWmzhxIsOHD2+yzeWXX87ZZ59N+/btueOOO7jkkksYPXp0mSo0s5ZwH91aZP369UyePJlTTz21yXb9+/dn1qxZzJgxg5deeol99tmHiOC0007jzDPP5O9//3uZKjaz5rhHYC3y0EMPMWDAAD7wgQ+U1D4iGD16NBMnTuT888/nuuuuY/HixYwbN45rr70242rNrBTuEViL3HPPPc0OCxW7/fbbqa2tpVOnThQKBaqqqqiqqqJQKGRYpZm1hHsEVrJ169Yxbdo0brrpprfX/eIXv+D8889n5cqVnHDCCfTv35+pU6cCUCgUmDBhAo888ggAo0aNora2lnbt2nH33XdX5BjMbFsOAivZ7rvvzqpVq7ZaN3ToUIYOHdpg+w4dOjBjxoy3l48++mieffbZTGs0s5bz0JCZWc45CMzMcs5BYGaWc54jyJGel/2m0iXsEIu/f0KlSzBrU9wjMDPLOQeBmVnOOQjMzHLOQWBmlnMOAjOznHMQmJnlnIPAzCznHARmZjnnIDAzyzkHgZlZzjkIzMxyzkFgZpZzDgIzs5xzEJiZ5ZyDwMws5xwEZmY55yAwM8s5B4GZWc45CMzMci7TIJB0nKSFkhZJuqyRNp+X9Jyk+ZLuzrIeMzPbVmY/Xi9pF2A88GlgKfCUpMkR8VxRm17A5cBREbFaUpes6jEzs4Zl2SMYCCyKiJciYj0wERhSr825wPiIWA0QESsyrMfMzBqQZRB0A5YULS9N1xU7EDhQ0kxJsyQd19CGJI2QVCepbuXKlRmVa2aWT5WeLN4V6AUcAwwHfiqpY/1GEXFzRNRERE3nzp3LW6GZWRvXbBBI+pyk7QmMZUCPouXu6bpiS4HJEbEhIv4CPE8SDGZmVialfMCfBrwg6TpJB7dg208BvSTtL6kdcDowuV6bX5L0BpC0N8lQ0Ust2IeZmb1LzQZBRJwJHAa8CEyQ9Hg6Zr9nM+/bCIwEpgILgHsjYr6kaySdmDabCqyS9BwwA/hmRKx6F8djZmYtVNLpoxGxVtLPgfbAhcBQ4JuSxkXEDU28bwowpd66K4ueBzAqfZiZWQWUMkdwoqRfAL8HqoGBEXE8cChwcbblmZlZ1krpEQwDxkbEo8UrI6Ig6ZxsyjIzs3IpJQiuAl7ZsiCpPfCBiFgcEdOzKszMzMqjlLOG7gM2Fy1vSteZmVkbUEoQ7JreIgKA9Hm77EoyM7NyKiUIVhad7omkIcCr2ZVkZmblVMocwVeAuyT9GBDJ/YPOzrQqMzMrm2aDICJeBAZJ2iNdfj3zqszMrGxKuqBM0glAX2A3SQBExDUZ1mVmZmVSygVlN5Lcb+h8kqGhU4H9Mq7LzMzKpJTJ4iMj4mxgdURcDXyU5OZwZmbWBpQSBG+m/y1I6gpsAPbJriQzMyunUuYIfp3+WMwPgT8BAfw0y6LMzKx8mgyC9AdppkfEa8D9kh4EdouINeUozszMstfk0FBEbAbGFy2/5RAwM2tbSpkjmC5pmLacN2pmZm1KKUFwHslN5t6StFbSPyWtzbguMzMrk1KuLG7yJynNzKx1azYIJH28ofX1f6jGzMxap1JOH/1m0fPdgIHAbOCTmVRkZmZlVcrQ0OeKlyX1AK7PqiAzMyuvUiaL61sK9N7RhZiZWWWUMkdwA8nVxJAER3+SK4zNzKwNKGWOoK7o+UbgnoiYmVE9ZmZWZqUEwc+BNyNiE4CkXSR1iIhCtqWZmVk5lHRlMdC+aLk98NtsyjEzs3IrJQh2K/55yvR5h+xKMjOzciolCNZJGrBlQdLhwBvZlWRmZuVUyhzBhcB9kpaT/FTlB0l+utLMzNqAUi4oe0rSwcBB6aqFEbEh27LMzKxcSvnx+q8Du0fEvIiYB+wh6WvZl2ZmZuVQyhzBuekvlAEQEauBczOryMzMyqqUINil+EdpJO0CtMuuJDMzK6dSJosfBiZJuildPg94KLuSzMysnEoJgn8HRgBfSZfnkpw5ZGZmbUCzQ0PpD9g/ASwm+S2CTwILStm4pOMkLZS0SNJlTbQbJikk1ZRWtpmZ7SiN9ggkHQgMTx+vApMAIuITpWw4nUsYD3ya5NbVT0maHBHP1Wu3J3ABSdiYmVmZNdUj+DPJt//PRsTHIuIGYFMLtj0QWBQRL0XEemAiMKSBdt8BfgC82YJtm5nZDtJUEJwMvALMkPRTSceSXFlcqm7AkqLlpem6t6W3rugREb9pakOSRkiqk1S3cuXKFpRgZmbNaTQIIuKXEXE6cDAwg+RWE10k/bekwe92x5KqgB8BFzfXNiJujoiaiKjp3Lnzu921mZkVKWWyeF1E3J3+dnF34GmSM4maswzoUbTcPV23xZ5AP+D3khYDg4DJnjA2MyuvFv1mcUSsTr+dH1tC86eAXpL2l9QOOB2YXLStNRGxd0T0jIiewCzgxIioa3hzZmaWhe358fqSRMRGYCQwleR003sjYr6kaySdmNV+zcysZUq5oGy7RcQUYEq9dVc20vaYLGsxM7OGZdYjMDOz1sFBYGaWcw4CM7OccxCYmeWcg8DMLOccBGZmOecgMDPLOQeBmVnOOQjMzHLOQWBmlnMOAjOznHMQmJnlnIPAzCznHARmZjnnIDAzyzkHgZlZzjkIzMxyzkFgZpZzDgIzs5xzEJiZ5ZyDwMws5xwEZmY55yAwM8s5B4GZWc45CMzMcs5BYGaWcw4CM7OccxCYmeWcg8DMLOccBGZmOecgMDPLOQeBmVnOOQjMzHLOQWBmlnOZBoGk4yQtlLRI0mUNvD5K0nOS5kqaLmm/LOsxM7NtZRYEknYBxgPHA32A4ZL61Gv2NFATER8Gfg5cl1U9ZmbWsCx7BAOBRRHxUkSsByYCQ4obRMSMiCiki7OA7hnWY2ZmDcgyCLoBS4qWl6brGnMO8FBDL0gaIalOUt3KlSt3YIlmZrZTTBZLOhOoAX7Y0OsRcXNE1ERETefOnctbnJlZG7drhtteBvQoWu6ertuKpE8B/x/414h4K8N6zMysAVn2CJ4CeknaX1I74HRgcnEDSYcBNwEnRsSKDGsxM7NGZBYEEbERGAlMBRYA90bEfEnXSDoxbfZDYA/gPklzJE1uZHNmZpaRLIeGiIgpwJR6664sev6pLPdvZmbN2ykmi83MrHIcBGZmOecgMDPLOQeBmVnOOQjMzHLOQWBmlnMOAjOznHMQmJnlnIPAzCznHARmZjnnIDAzyzkHgZlZzjkIzMxyzkFgZpZzDgIzs5xzEJiZ5ZyDwMws5xwEZmY55yAwM8s5B4GZWc45CMzMcs5BYGaWcw4CM7OccxCYmeWcg8DMLOccBGZmOecgMDPLOQeBmVnOOQjMzHLOQWBmlnMOAjOznHMQmJnlnIPAzCznHARmZjnnIDAzy7lMg0DScZIWSlok6bIGXn+PpEnp609I6pllPWZmtq3MgkDSLsB44HigDzBcUp96zc4BVkfEvwBjgR9kVY+ZmTUsyx7BQGBRRLwUEeuBicCQem2GALelz38OHCtJGdZkZmb1KCKy2bB0CnBcRPy/dPks4IiIGFnUZl7aZmm6/GLa5tV62xoBjEgXDwIWZlL0jrM38GqzrdomH3t+5fn4W8Ox7xcRnRt6YddyV7I9IuJm4OZK11EqSXURUVPpOirBx57PY4d8H39rP/Ysh4aWAT2Klrun6xpsI2lX4L3AqgxrMjOzerIMgqeAXpL2l9QOOB2YXK/NZOCL6fNTgN9FVmNVZmbWoMyGhiJio6SRwFRgF+DWiJgv6RqgLiImAz8D7pC0CPgHSVi0Ba1mGCsDPvb8yvPxt+pjz2yy2MzMWgdfWWxmlnMOAjOznHMQ7EDN3VKjLZN0q6QV6bUhuSKph6QZkp6TNF/SBZWuqVwk7SbpSUnPpMd+daVrqgRJu0h6WtKDla5lezgIdpASb6nRlk0Ajqt0ERWyEbg4IvoAg4Cv5+j//VvAJyPiUKA/cJykQZUtqSIuABZUuojt5SDYcUq5pUabFRGPkpz5lTsR8UpE/Cl9/k+SD4Rula2qPCLxerpYnT5ydQaKpO7ACcAtla5lezkIdpxuwJKi5aXk5MPA3pHeQfcw4IkKl1I26bDIHGAFMC0icnPsqeuBS4HNFa5juzkIzHYQSXsA9wMXRsTaStdTLhGxKSL6k9w9YKCkfhUuqWwkfRZYERGzK13Lu+Eg2HFKuaWGtVGSqklC4K6IeKDS9VRCRLwGzCBfc0VHASdKWkwyHPxJSXdWtqSWcxDsOKXcUsPaoPTW6T8DFkTEjypdTzlJ6iypY/q8PfBp4M8VLaqMIuLyiOgeET1J/s3/LiLOrHBZLeYg2EEiYiOw5ZYaC4B7I2J+ZasqH0n3AI8DB0laKumcStdURkcBZ5F8G5yTPmorXVSZ7APMkDSX5MvQtIholadQ5plvMWFmlnPuEZiZ5ZyDwMws5xwEZmY55yAwM8s5B4GZWc61ih+vN6skSe8HpqeLHwQ2ASvT5YHpvaWaev+XgJqIGJlZkWbvgoPArBkRsYrkzppIugp4PSL+s5I1me1IHhoy2w6SzpX0VHof/vsldUjXnyppXrr+0Qbed4KkxyXtXf6qzRrmIDDbPg9ExEfS+/AvALZcSX0l8Jl0/YnFb5A0FLgMqI2IV8tarVkTPDRktn36SRoNdAT2ILm1CMBMYIKke4Him899EqgBBufpzqTWOrhHYLZ9JgAjI+IQ4GpgN4CI+ArwbZI70c5OJ5oBXgT2BA4sf6lmTXMQmG2fPYFX0ttPf2HLSkkHRMQTEXElyZlFW25N/ldgGHC7pL5lr9asCQ4Cs+1zBcmvkM1k69su/1DSs5LmAX8EntnyQkT8mSQ07pN0QDmLNWuK7z5qZpZz7hGYmeWcg8DMLOccBGZmOecgMDPLOQeBmVnOOQjMzHLOQWBmlnP/B0iPrwaXcQuCAAAAAElFTkSuQmCC\n" }, "metadata": { "needs_background": "light" } } ], "source": [ "improved_results.make_plots()" ] }, { "source": [ "## Final Results\n" ], "cell_type": "markdown", "metadata": {} }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "{'task_metrics':
}" ] }, "metadata": {}, "execution_count": 13 }, { "output_type": "display_data", "data": { "text/plain": "
", "image/svg+xml": "\n\n\n\n \n \n \n \n 2021-02-25T17:30:07.489874\n image/svg+xml\n \n \n Matplotlib v3.3.4, https://matplotlib.org/\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYIAAAEWCAYAAABrDZDcAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAAAdO0lEQVR4nO3de7wVdd328c+1YXPLVu+QQCXQSINQCXe6RTuY3CpEaHqTGeKBDj7QCStPBSqmhlooeaRb8cmbNExNyVBRKNuJ8oiAhoqSCUaCmghBHrah6Pf5YwZcbPZhbWDWYu+53q/Xejnzm9+a9Z3lZq41v5k1SxGBmZnlV0W5CzAzs/JyEJiZ5ZyDwMws5xwEZmY55yAwM8s5B4GZWc45CCx3JE2RNL7cdZhtLxwEtt2T9EbB4z1JbxXMn1SiGqZIWi+pWylez6yUHAS23YuInTY8gBeALxS0Tc369SXtCBwH/As4OevXq/fa7Uv5epZPDgJrtST1l/SIpLWSXpZ0raQO6TJJukLSSkmvSXpKUt8G1rGzpFpJV0tSIy91HLAWuAj4Sr3nd5b0v5JekrRG0l0Fy46VtDB9/aWSBqftyyQdWdDvAkm/Sqd7SgpJp0p6Afhj2v4bSf+Q9C9JsyXtV/D8jpImSvp7uvzhtO1eSafVq/dJSUNb8DZbDjgIrDV7Fzgd6AJ8EjgC+Ha6bBDwWaA38AHgy8DqwidL+iDwADAnIr4bjd9v5SvAr4FbgT6SDixYdjNQBewH7Apcka67P3ATcDbQKa1lWQu27TBgH+Bz6fx9QK/0NR4HCo+ELgcOBD4FdAZ+ALwH/JKCIxhJ+wPdgXtbUAeSvidpkaSnJX1/w7rSEH5K0t2S/jNt/3QaNgsk9UrbOkmaJalV7G8k3Zh+gFhU0NZZ0u8lPZf+d5e0XemHiCXpdh+Qtn9M0mNp2yfTtvaS/iCpqjxb1oSI8MOPVvMg2Zke2ciy7wO/TacPB/4KHAJU1Os3BbgRWASc3czr7UmyU61O52cCV6XT3dJluzTwvOuBK4rZBuAC4FfpdE8ggL2aqKlT2ucDJB/m3gL2b6DfDsAaoFc6fznw8xa+333T96kKaA/8AfgoMB84LO3zdeDH6fQ0oAfwGWBiwesOKPffTgu2+bPAAcCigrYJwJh0egzw03R6CElIK/1bezRt/1n6HvQA7kzbTgO+Wu7ta+jRKhLarCGSeku6Jx0yeQ24hOTogIj4I3AtMAlYKWnyhk+tqaOAjsB1zbzMKcDiiFiYzk8FTpRUCewB/DMi1jTwvD2ApVu4aQDLN0xIaifpJ+nw0mu8f2TRJX3s0NBrRcS/gduAk9NP48NJjmBaYh+SnVtdRKwHHgS+SHKkNTvt83uS4TOAd0hCowp4R9LewB4R8acWvm7ZRMRs4J/1mo8lOcIi/e9/F7TfFIm5QKf0goL670Mn4AskR4nbHQeBtWb/A/yF5BPvfwLnkHwyAyAiro6IA4F9SXZcZxc89wbgfmBGejK4MSOAvdKw+QfJJ70uJJ8ElwOd03/k9S0H9m5knW+S7CA22L2BPoXDVCeS7HCOJDkK6Jm2C1gF/LuJ1/olcBLJsFldRDzSSL/GLAIOlfTBdEhjCEnIPZ3WBHB82gZwKcnObixJEF8MnNfC19we7RYRL6fT/wB2S6e7UxDawIq0bRLJ3+MvST6gjAMuiYj3SlNuyzgIrDXbGXgNeENSH+BbGxZIOkjSwekn9zdJdpb1/xGOBp4F7pbUsf7K07HdvYH+QHX66AvcAoxIdwz3AT+XtIukSkmfTZ/+C+Brko6QVCGpe1ojwELghLR/DfClIrZzHck5jiqSHQsA6Y7lRuBnkj6UHj18UtJ/pMsfSbd7Ii0/GiAiFgM/BWaRBOdCknMzXwe+LemxtL630/4LI+KQiPgvYC/gZZKh9Nsk/UrSbg28TKsSyThPk/fvj4gXImJARHwSqCMZIlos6eb0vehdilqLVu6xKT/8aMmDgvF1krHcvwBvAA+RXNXzcLrsCODJdNkqkiGdndJlU4Dx6XQFySfYWcAO9V7rOtLx3Xrt/Ul2zJ3Txy+BV0jG46cV9Bua1vA6sAT4XNq+F/BoWtu9wNVsfo6gfcF6dgJ+l67n7yRHKQF8NF3eEbgSeJHkEtfZQMeC559HM+cdWvD+XwJ8u15bb2BevTal72nn9L3/MMkJ8IvL/TdU5Hb2ZNNzBM8C3dLpbsCz6fT1wPCG+hW03UZyov/i9D34MDC13NtY+FBaqJm1UZJGAKMi4jNb+PxdI2KlpD1Jdu6HAB3StgqSYP1TRNxY8JyvkJxEv1LSb4HvkuxcvxgRp2/dFmVPUk/gnojom85fBqyOiJ9IGgN0jogfSDqK5MhyCHAwcHVE9C9Yz2HAf0fE6ZKuIDmZviztt91cxusvq5i1Yem4/reBn2/Fau5ML7V9B/hORKxNLyn9Trp8GvC/9V7zqySX8EJyXmUGyfDRiVtRR0lI+jUwAOgiaQXwI+AnwO2STiU5Kvty2n0GSQgsIRkC+lrBekRyNDYsbZpMcnTUnoJhzO1BZkcEkm4EjgZWbkjVessFXEXyJtaRXFb1eCbFmOWQpM+R7KT/ABwXyVU/ZpvJ8mTxFGBwE8s/TzJu1gsYRXIFiJltIxExMyJ2jIhjHQLWlMyCIBq+FrdQY9ffmplZCZXzHEFj19++XL+jpFEkRw3suOOOB/bp06d+FzMza8Jjjz22KiK6NrSsVZwsjojJJCdaqKmpiQULFpS5IjNrLXqOadGtlbZry35y1BY/V9LfG1tWziB4kfe/jQjJFy5eLFMtZm1aW9kZbs2O0BpXzm8WTwdGpHfvOwT4V7z/FW4zMyuRzI4IGrkWtxIgIq6jietvzcysdDILgogY3szyAL7TVB8zM8uebzpnVoSrrrqKvn37st9++3HllVcCcPbZZ9OnTx/69evH0KFDWbt2LQBz5syhX79+1NTU8NxzzwGwdu1aBg0axHvvbZc3n7SccxCYNWPRokXccMMNzJs3jyeeeIJ77rmHJUuWMHDgQBYtWsSTTz5J7969ufTSSwGYOHEiM2bM4Morr+S665KfOxg/fjznnHMOFRX+J2fbH/9VmjVj8eLFHHzwwVRVVdG+fXsOO+wwpk2bxqBBg2jfPhldPeSQQ1ixYgUAlZWV1NXVUVdXR2VlJUuXLmX58uUMGDCgjFth1rhW8T0Cs3Lq27cv5557LqtXr6Zjx47MmDGDmpqaTfrceOONDBuW3Fts7NixjBgxgo4dO3LzzTdz1llnMX78+HKUblYUB4FZM/bZZx9++MMfMmjQIHbccUeqq6tp167dxuUXX3wx7du356STTgKgurqauXPnAjB79my6detGRDBs2DAqKyuZOHEiu+3W6n+fxdoQDw2ZFeHUU0/lscceY/bs2eyyyy707p38wNSUKVO45557mDp1KskNdd8XEYwfP55x48Zx4YUXMmHCBEaOHMnVV19djk0wa5SPCMyKsHLlSnbddVdeeOEFpk2bxty5c7n//vuZMGECDz74IFVVVZs956abbmLIkCF07tyZuro6KioqqKiooK6urgxbYNY4B4FZEY477jhWr15NZWUlkyZNolOnTowePZp169YxcOBAIDlhvOEqobq6OqZMmcKsWbMAOOOMMxgyZAgdOnTglltuKdt2mDXEQWBWhIceemiztiVLljTav6qqitra2o3zhx56KE899VQmtZltLZ8jMDPLOQeBmVnOOQjMzHLO5wgsF9rK/fjB9+S3bc9HBGZmOecgMDPLOQeBmVnOOQjMzHLOQWBmlnMOAjOznHMQmJnlnIPAzCznHARmZjnnIDAzyzkHgZlZzjkIzMxyzkFgZpZzDgIzs5xzEJiZ5ZyDwMws5xwEZmY55yAwM8s5B4GZWc45CMzMcs5BYGaWcw4CM7OcyzQIJA2W9KykJZLGNLB8T0m1kv4s6UlJQ7Ksx8zMNpdZEEhqB0wCPg/sCwyXtG+9bucBt0fEJ4ATgJ9nVY+ZmTUsyyOC/sCSiHg+It4GbgWOrdcngP9Mpz8AvJRhPWZm1oAsg6A7sLxgfkXaVugC4GRJK4AZwGkNrUjSKEkLJC149dVXs6jVzCy3yn2yeDgwJSJ6AEOAmyVtVlNETI6Imoio6dq1a8mLNDNry7IMgheBPQrme6RthU4FbgeIiEeAHYAuGdZkZmb1ZBkE84Fekj4iqQPJyeDp9fq8ABwBIGkfkiDw2I+ZWQllFgQRsR4YDcwEFpNcHfS0pIskHZN2OxMYKekJ4NfAVyMisqrJzMw21z7LlUfEDJKTwIVt5xdMPwN8OssazMysaeU+WWxmZmXmIDAzyzkHgZlZzjkIzMxyzkFgZpZzDgIzs5xzEJiZ5ZyDwMws5xwEZmY55yAwM8s5B4GZWc45CMzMcs5BYGaWcw4CM7OccxCYmeWcg8DMLOccBGZmOecgMDPLOQeBmVnOOQjMzHLOQWBmlnMOAjOznHMQmJnlnIPAzCznHARmZjnnIDAzyzkHgZlZzjkIzMxyzkFgZpZzDgIzs5xzEJiZ5ZyDwMws5xwEZmY55yAwM8u5TINA0mBJz0paImlMI32+LOkZSU9LuiXLeszMbHPts1qxpHbAJGAgsAKYL2l6RDxT0KcXMBb4dESskbRrVvWYmVnDsjwi6A8siYjnI+Jt4Fbg2Hp9RgKTImINQESszLAe20o9e/bk4x//ONXV1dTU1ABwwQUX0L17d6qrq6murmbGjBkAzJkzh379+lFTU8Nzzz0HwNq1axk0aBDvvfde2bbBzDaX2REB0B1YXjC/Aji4Xp/eAJLmAO2ACyLi/vorkjQKGAWw5557ZlKsFae2tpYuXbps0nb66adz1llnbdI2ceJEZsyYwbJly7juuuuYOHEi48eP55xzzqGiwqemzLYn5f4X2R7oBQwAhgM3SOpUv1NETI6Imoio6dq1a2krtC1SWVlJXV0ddXV1VFZWsnTpUpYvX86AAQPKXZqZ1dNsEEj6gqQtCYwXgT0K5nukbYVWANMj4p2I+BvwV5Jg2C41NDSywcSJE5HEqlWrALjzzjvZb7/9OPTQQ1m9ejUAS5cuZdiwYSWve1uRxKBBgzjwwAOZPHnyxvZrr72Wfv368fWvf501a9YAMHbsWEaMGMGll17K6NGjOffccxk/fny5SjezJhSzgx8GPCdpgqQ+LVj3fKCXpI9I6gCcAEyv1+cukqMBJHUhGSp6vgWvUXK1tbUsXLiQBQsWbGxbvnw5s2bN2mTY6pprrmH+/Pl84xvf4JZbkouhzjvvvFa9M3z44Yd5/PHHue+++5g0aRKzZ8/mW9/6FkuXLmXhwoV069aNM888E4Dq6mrmzp1LbW0tzz//PN26dSMiGDZsGCeffDKvvPJKmbfGzDZoNggi4mTgE8BSYIqkRySNkrRzM89bD4wGZgKLgdsj4mlJF0k6Ju02E1gt6RmgFjg7IlZvxfaUxemnn86ECROQtLGtoqKCdevWbRwaeeihh9h9993p1Wu7PeBpVvfu3QHYddddGTp0KPPmzWO33XajXbt2VFRUMHLkSObNm7fJcyKC8ePHM27cOC688EImTJjAyJEjufrqq8uxCWbWgKKGfCLiNeAOkit/ugFDgcclndbM82ZERO+I2DsiLk7bzo+I6el0RMQZEbFvRHw8Im7dqq3JWENDI7/73e/o3r07+++//yZ9x44dy5FHHsndd9/N8OHD+fGPf8y4cePKUfY28eabb/L6669vnJ41axZ9+/bl5Zdf3tjnt7/9LX379t3keTfddBNDhgyhc+fO1NXVUVFRQUVFBXV1dSWt38wa1+xVQ+mn968BHwVuAvpHxEpJVcAzwDXZlrj9ePjhh+nevTsrV65k4MCB9OnTh0suuYRZs2Zt1nfgwIEMHDgQeH9n+Ne//pXLL7+cXXbZhauuuoqqqqpSb8IWe+WVVxg6dCgA69ev58QTT2Tw4MGccsopLFy4EEn07NmT66+/fuNz6urqmDJlysb354wzzmDIkCF06NBh43CZmZVfMZePHgdcERGzCxsjok7SqdmUtX2qPzTy4IMP8re//W3j0cCKFSs44IADmDdvHrvvvjvw/s5w5syZHH300UybNo077riDqVOnMnLkyLJtS0vttddePPHEE5u133zzzY0+p6qqitra2o3zhx56KE899VQm9ZnZlitmaOgCYOPAr6SOknoCRMQD2ZS1/WloaOSggw5i5cqVLFu2jGXLltGjRw8ef/zxjSEAcNlll/Hd736XyspK3nrrLSR5aMTMtivFHBH8BvhUwfy7adtBmVS0nWpsaKQpL730EvPmzeNHP/oRAKeddhoHHXQQnTp14q677sq6ZDOzohQTBO3TW0QAEBFvp5eD5kpjQyOFli1btsn8hz70Ie69996N88cffzzHH398FuWZmW2xYoLgVUnHbLjSR9KxwKpsy7Is9Bxzb/OdWoFlPzmq3CWYtSnFBME3gamSrgVEcv+gEZlWZWZmJdNsEETEUuAQSTul829kXpWZmZVMUXcflXQUsB+ww4Zvz0bERRnWlYm2MjQCHh4xs22nmJvOXUdyv6HTSIaGjgc+nHFdZmZWIsV8j+BTETECWBMRFwKfJP0dATMza/2KCYJ/p/+tk/Qh4B2S+w2ZmVkbUMw5grvTH4u5DHgcCOCGLIsyM7PSaTII0h+keSAi1gJ3SroH2CEi/lWK4szMLHtNDg1FxHvApIL5dQ4BM7O2pZhzBA9IOk6Fv7piZmZtRjFB8A2Sm8ytk/SapNclvZZxXWZmViLFfLO4yZ+kNDOz1q2YXyj7bEPt9X+oxszMWqdiLh89u2B6B6A/8BhweCYVmZlZSRUzNPSFwnlJewBXZlWQmZmVVjEni+tbAeyzrQsxM7PyKOYcwTUk3yaGJDiqSb5hbGZmbUAx5wgWFEyvB34dEXMyqsfMzEqsmCC4A/h3RLwLIKmdpKqIqMu2NDMzK4WivlkMdCyY7wj8IZtyzMys1IoJgh0Kf54yna7KriQzMyulYoLgTUkHbJiRdCDwVnYlmZlZKRVzjuD7wG8kvUTyU5W7k/x0pZmZtQHFfKFsvqQ+wMfSpmcj4p1syzIzs1Ip5sfrvwPsGBGLImIRsJOkb2dfmpmZlUIx5whGpr9QBkBErAFGZlaRmZmVVDFB0K7wR2kktQM6ZFeSmZmVUjEni+8HbpN0fTr/DeC+7EoyM7NSKiYIfgiMAr6Zzj9JcuWQmZm1Ac0ODaU/YP8osIzktwgOBxYXs3JJgyU9K2mJpDFN9DtOUkiqKa5sMzPbVho9IpDUGxiePlYBtwFExH8Vs+L0XMIkYCDJravnS5oeEc/U67cz8D2SsDEzsxJr6ojgLySf/o+OiM9ExDXAuy1Yd39gSUQ8HxFvA7cCxzbQ78fAT4F/t2DdZma2jTQVBF8EXgZqJd0g6QiSbxYXqzuwvGB+Rdq2UXrrij0i4t6mViRplKQFkha8+uqrLSjBzMya02gQRMRdEXEC0AeoJbnVxK6S/kfSoK19YUkVwM+AM5vrGxGTI6ImImq6du26tS9tZmYFijlZ/GZE3JL+dnEP4M8kVxI150Vgj4L5HmnbBjsDfYE/SVoGHAJM9wljM7PSatFvFkfEmvTT+RFFdJ8P9JL0EUkdgBOA6QXr+ldEdImInhHRE5gLHBMRCxpenZmZZWFLfry+KBGxHhgNzCS53PT2iHha0kWSjsnqdc3MrGWK+ULZFouIGcCMem3nN9J3QJa1mJlZwzI7IjAzs9bBQWBmlnMOAjOznHMQmJnlnIPAzCznHARmZjnnIDAzyzkHgZlZzjkIzMxyzkFgZpZzDgIzs5xzEJiZ5ZyDwMws5xwEZmY55yAwM8s5B4GZWc45CMzMcs5BYGaWcw4CM7OccxCYmeWcg8DMLOccBGZmOecgMDPLOQeBmVnOOQjMzHLOQWBmlnMOAjOznHMQmJnlnIPAzCznHARmZjnnIDAzyzkHgZlZzjkIzMxyzkFgZpZzmQaBpMGSnpW0RNKYBpafIekZSU9KekDSh7Osx8zMNpdZEEhqB0wCPg/sCwyXtG+9bn8GaiKiH3AHMCGreszMrGFZHhH0B5ZExPMR8TZwK3BsYYeIqI2IunR2LtAjw3rMzKwBWQZBd2B5wfyKtK0xpwL3NbRA0ihJCyQtePXVV7dhiWZmtl2cLJZ0MlADXNbQ8oiYHBE1EVHTtWvX0hZnZtbGtc9w3S8CexTM90jbNiHpSOBc4LCIWJdhPWZm1oAsjwjmA70kfURSB+AEYHphB0mfAK4HjomIlRnWYmZmjcgsCCJiPTAamAksBm6PiKclXSTpmLTbZcBOwG8kLZQ0vZHVmZlZRrIcGiIiZgAz6rWdXzB9ZJavb2ZmzdsuThabmVn5OAjMzHLOQWBmlnMOAjOznHMQmJnlnIPAzCznHARmZjnnIDAzyzkHgZlZzjkIzMxyzkFgZpZzDgIzs5xzEJiZ5ZyDwMws5xwEZmY55yAwM8s5B4GZWc45CMzMcs5BYGaWcw4CM7OccxCYmeWcg8DMLOccBGZmOecgMDPLOQeBmVnOOQjMzHLOQWBmlnMOAjOznHMQmJnlnIPAzCznHARmZjnnIDAzyzkHgZlZzjkIzMxyzkFgZpZzmQaBpMGSnpW0RNKYBpb/h6Tb0uWPSuqZZT1mZra5zIJAUjtgEvB5YF9guKR963U7FVgTER8FrgB+mlU9ZmbWsCyPCPoDSyLi+Yh4G7gVOLZen2OBX6bTdwBHSFKGNZmZWT2KiGxWLH0JGBwR/yedPwU4OCJGF/RZlPZZkc4vTfusqreuUcCodPZjwLOZFL3tdAFWNdurbfK251eet781bPuHI6JrQwval7qSLRERk4HJ5a6jWJIWRERNuesoB297Prcd8r39rX3bsxwaehHYo2C+R9rWYB9J7YEPAKszrMnMzOrJMgjmA70kfURSB+AEYHq9PtOBr6TTXwL+GFmNVZmZWYMyGxqKiPWSRgMzgXbAjRHxtKSLgAURMR34BXCzpCXAP0nCoi1oNcNYGfC251eet79Vb3tmJ4vNzKx18DeLzcxyzkFgZpZzDoJtqLlbarRlkm6UtDL9bkiuSNpDUq2kZyQ9Lel75a6pVCTtIGmepCfSbb+w3DWVg6R2kv4s6Z5y17IlHATbSJG31GjLpgCDy11EmawHzoyIfYFDgO/k6P/9OuDwiNgfqAYGSzqkvCWVxfeAxeUuYks5CLadYm6p0WZFxGySK79yJyJejojH0+nXSXYI3ctbVWlE4o10tjJ95OoKFEk9gKOA/1vuWraUg2Db6Q4sL5hfQU52Bva+9A66nwAeLXMpJZMOiywEVgK/j4jcbHvqSuAHwHtlrmOLOQjMthFJOwF3At+PiNfKXU+pRMS7EVFNcveA/pL6lrmkkpF0NLAyIh4rdy1bw0Gw7RRzSw1royRVkoTA1IiYVu56yiEi1gK15Otc0aeBYyQtIxkOPlzSr8pbUss5CLadYm6pYW1Qeuv0XwCLI+Jn5a6nlCR1ldQpne4IDAT+UtaiSigixkZEj4joSfJv/o8RcXKZy2oxB8E2EhHrgQ231FgM3B4RT5e3qtKR9GvgEeBjklZIOrXcNZXQp4FTSD4NLkwfQ8pdVIl0A2olPUnyYej3EdEqL6HMM99iwsws53xEYGaWcw4CM7OccxCYmeWcg8DMLOccBGZmOdcqfrzerJwkfRB4IJ3dHXgXeDWd75/eW6qp538VqImI0ZkVabYVHARmzYiI1SR31kTSBcAbEXF5OWsy25Y8NGS2BSSNlDQ/vQ//nZKq0vbjJS1K22c38LyjJD0iqUvpqzZrmIPAbMtMi4iD0vvwLwY2fJP6fOBzafsxhU+QNBQYAwyJiFUlrdasCR4aMtsyfSWNBzoBO5HcWgRgDjBF0u1A4c3nDgdqgEF5ujOptQ4+IjDbMlOA0RHxceBCYAeAiPgmcB7JnWgfS080AywFdgZ6l75Us6Y5CMy2zM7Ay+ntp0/a0Chp74h4NCLOJ7myaMOtyf8OHAfcJGm/kldr1gQHgdmWGUfyK2Rz2PS2y5dJekrSIuD/AU9sWBARfyEJjd9I2ruUxZo1xXcfNTPLOR8RmJnlnIPAzCznHARmZjnnIDAzyzkHgZlZzjkIzMxyzkFgZpZz/x/jOYg2+yx1FwAAAABJRU5ErkJggg==\n" }, "metadata": { "needs_background": "light" } }, { "output_type": "display_data", "data": { "text/plain": "
", "image/svg+xml": "\n\n\n\n \n \n \n \n 2021-02-25T17:30:07.601652\n image/svg+xml\n \n \n Matplotlib v3.3.4, https://matplotlib.org/\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYIAAAEWCAYAAABrDZDcAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAAAcv0lEQVR4nO3de7xUdb3/8dd76ybBS0RCyUUxDyoXE3GHpNmxLNJtiYimmFodf2IXTEXzaL/0qGEXO4QHo6NmHryDphUZikSURxJ1k4ggoWgkFwskhGRUbp/zx1rosNmX2ciaYe/1fj4e83DWmu+s9VkI857v97vWGkUEZmaWX1WVLsDMzCrLQWBmlnMOAjOznHMQmJnlnIPAzCznHARmZjnnILDckTRB0uhK12G2s3AQ2E5P0utFj82S3iha/kKZapggaaOkfcqxP7NychDYTi8i9tjyAF4GPle07q6s9y9pd2AYsAY4M+v91dv3ruXcn+WTg8BaLUkDJT0u6TVJr0j6saR26WuSNFbSCklrJT0rqV8D29hT0gxJ4ySpkV0NA14DrgG+WO/9nST9j6TlklZL+mXRa0MkzUn3/6Kk49L1iyV9qqjdVZLuTJ/3lBSSzpH0MvC7dP19kv4maY2kRyX1LXp/e0ljJP01ff2xdN1vJJ1fr965koa24I/ZcsBBYK3ZJuAiYG/go8CxwNfS1wYDHwcOBN4LfB5YVfxmSe8HpgMzI+Ib0fj9Vr4I3ANMBA6WdHjRa3cAHYC+QBdgbLrtgcDtwDeBjmkti1twbP8K9AY+ky4/BPRK9/EnoLgn9J/A4cCRQCfgUmAzcBtFPRhJhwLdgN+0oA7LAQeBtVoRMTsiZkXExohYDNxE8gEKsAHYEzgYUEQsiIhXit7eFfgDcF9EfLuxfUjaF/gEcHdE/J0kOM5OX9sHOB74SkSsjogNEfGH9K3nALdGxLSI2BwRyyLizy04vKsiYl1EvJEe660R8c+IeAu4CjhU0nslVQH/BlyQ7mNTRPwxbTcZOFBSr3SbZwGTImJ9C+pA0gWS5kmaL+nCdN2haW/sWUm/lrRXuv6otNdRt2W/kjpKeiSt1XZC/h9jrZakAyU9mA6ZrAW+S9I7ICJ+B/wYGA+skHTzlg+r1AlAe+DGZnZzFrAgIuaky3cBZ0iqBnoA/4iI1Q28rwfw4nYeGsCSLU8k7SLp++nw0lre6VnsnT52a2hfEfEmMAk4M/0QHk7SgylZOpx2LjAQOBT4rKR/AW4BLouIQ4BfkPR8AC4GaoELga+k674NfDciNrdk31Y+DgJrzf4b+DPQKyL2Ar4FvD3OHxHjIuJwoA/JENE3i977U+BhYEo6GdyYs4EPpWHzN+BHJB++tSQf1p0kdWzgfUuAAxrZ5jqS4aQtPthAm+JhqjOAIcCnSIa5eqbrBbwKvNnEvm4DvkAybFaIiMcbadeY3sATEVGIiI0kvaiTSf48H03bTCOZR4GkJ9YhfWyQdADQIyJ+38L97jQa6RH1lzQrnQOqS4cCkTQsbfe/6dAjkg6QNKmCh9AsB4G1ZnsCa4HXJR0MfHXLC5I+IumI9Jv7OpIPy/rfSEcCC4FfS2pff+OSPkryATsQ6J8++gF3A2enQ00PAT+R9D5J1ZI+nr79Z8CXJR0rqUpSt7RGgDnA6Wn7GuCUEo7zLZI5jg4kPR8A0m/ZtwI/ktQ17T18VNJ70tcfT497DC3sDaTmAUdLer+kDiQB2AOYTxJOAKem6wC+RzI3cjlJj+xakh5Bq9REj+g64OqI6A9cmS4DnA98hGSY8ox03Wh28j8DB4G1ZpeQ/GP7J8k3/OJvXXul61YDfyX5EP1h8ZvTyeERwFLgV5J2q7f9LwK/iohnI+JvWx7Af5F8IHQiGTraQNIzWUEyJEJEPAl8mWTyeA3JN+n90u1eQRIwq4GrSYKlKbenx7AMeA6Y1cCfw7PAU8A/gB+w9b/t24FDgDub2c82ImJBur1HSHpQc0gm6f8N+Jqk2SRBtT5tPyciBkXEJ4APAa+QnMQ1SdKdkj7Q0hoqrLEeUZD8HYOkl7Y8fb4ZeA/v9IiOBv4WES+Ut+wWigg/SnwAF5B8Q5oPXJium0Tyj2MOydjtnHT9UcBcoI5k6AKSs0ceAaoqfSx+5OdBMrz12A7a1neBr9VbdyDwZL11Sv+udyKZV9mPZCL/2kr/ebTweHsDzwPvJ/lwfxy4IV3/MskQ4DJgv7T9p4HZwK9JAuIRoFOlj6O5hy9WKVG9LuJ64GFJD0bEaUVtxpB8+4N3Js16kkyaXYwnzazM0uGcrwE/eRfb6BIRK9IzqE4GBhWtqyL5e11/0v1sYEpE/COtYXP66EArEhELJG3pEa3jnR7RV4GLIuJ+SZ8nGQr8VERMI5kzQdLZwBSSM7cuIekBXhARhfIfSdMyGxqSdKuSi3nmNfK6lFzEsyg93WxAVrXsII11EYHkeEjOVb8nXdXmJs2sdZH0GWAl8HeaH35qyv2SniP5lvv1iHgNGC7peZIhseXA/xTttwPwJZIztiCZYJ8CXE/zZ2ntdCLiZxFxeER8nOTD/HmSYcMH0ib3kXxBfFu9P4Or0/aPkUzc73wy7FJ9HBgAzGvk9VqSiTYBg0g+ZCveRWppF7He8dYVLfcnGcudAXQnuRipV6WPww8//GjZA+iS/ndfkuDrCCwAjknXHwvMrvee/wBOSp8/mn5mnEXSI6j4MdV/ZDY0FBGPSurZRJMhwO2R/EnNSi862Se2vuhnpxGNdxG3GM47vQEiOe98EEB6Jsnbk2YkvYWLI7lAycx2bvenp4JuIO0RSToX+C8l94J6k+SkAwAkdQUGRsTV6aobSCbyXwNOKmfhpVKaWNlsPAmCByOioXu8PAh8PyIeS5enA/8eEXUNtB1B+ge9++67H37wwQfXb1J2y5Yto7q6mi5duhARzJ07l969e9OuXbut2kUEL7zwAh/60IdYsmQJXbt2Zf369axdu5Zu3bpVqHozy5vZs2e/GhGdG3qtVUwWR8TNwM0ANTU1UVe3TVaUxYoVK+jSpQsvv/wygwcPZtasWXTs2JGHH36Y733ve/zhD3/Y5j233XYbq1ev5sILL2To0KGMGzeOxYsX88ADDzB27NgKHIWZ5ZGkvzb2WiWDYBnvXIQCyTj6sgrVUpJhw4axatUqqqurGT9+PB07dgRg4sSJDB8+fJv2hUKBCRMm8MgjjwAwatQoamtradeuHXff/W7m7szMdpxKDg2dQHJlZy1wBDAuIgbWb1dfJXsEZq1Vz8vaxg1HF3//hEqX0GpJmh0RNQ29llmPQNI9wDHA3pKWksyiVwNExI0kp5PVAouAAslVmGZmO1RbCUHILgizPGto27GSrV8P4OtZ7d/MzErjew2ZmeVcqzhraEdxF9HMbFvuEZiZ5ZyDwMws5xwEZmY55yAwM8s5B4GZWc45CMzMcs5BYFaCsWPH0rdvX/r168fw4cN58803mT59OgMGDKB///587GMfY9GiRQDccMMN9OvXj9raWtavXw/AY489xkUXXVTJQzBrlIPArBnLli1j3Lhx1NXVMW/ePDZt2sTEiRP56le/yl133cWcOXM444wzGD16NAB33XUXc+fO5cgjj2Tq1KlEBN/5zne44oorKnwkZg1zEJiVYOPGjbzxxhts3LiRQqFA165dkcTatWsBWLNmDV27dgWS36DYsGEDhUKB6upq7rzzTo4//ng6depUyUMwa1Suriw22x7dunXjkksuYd9996V9+/YMHjyYwYMHc8stt1BbW0v79u3Za6+9mDVrFgAjR45k0KBB9O3bl6OOOoohQ4YwderUCh+FWePcIzBrxurVq/nVr37FX/7yF5YvX866deu48847GTt2LFOmTGHp0qV8+ctfZtSoUQCcddZZPP3002+3+cY3vsFDDz3EKaecwkUXXcTmzZsrfERmW3MQmDXjt7/9Lfvvvz+dO3emurqak08+mZkzZ/LMM89wxBFHAHDaaafxxz/+cav3LV++nCeffJKTTjqJMWPGMGnSJDp27Mj06dMrcRhmjXIQmDVj3333ZdasWRQKBSKC6dOn06dPH9asWcPzzz8PwLRp0+jdu/dW77viiiu45pprAHjjjTeQRFVVFYVCoezHYNYUzxGYNeOII47glFNOYcCAAey6664cdthhjBgxgu7duzNs2DCqqqp43/vex6233vr2e55++mkABgwYAMAZZ5zBIYccQo8ePbj00ksrchxmjcn0pyqz8G5+qtK3oba8ait/97fn731bOXZ4d//um/qpSg8NmZnlnIPAzCznHARmZjnnyWLLBY8TmzXOPQIzs5xzEJiZ5ZyDwMws5xwEZmY55yAwM8s5B4GZWc45CMzMcs5BYGaWcw4CM7OccxCYmeWcg8DMLOccBGZmOecgMDPLOQeBmVnOZRoEko6TtFDSIkmXNfD6vpJmSHpa0lxJtVnWY2Zm28osCCTtAowHjgf6AMMl9anX7NvAvRFxGHA68JOs6jEzs4Zl2SMYCCyKiJciYj0wERhSr00Ae6XP3wssz7AeMzNrQJZB0A1YUrS8NF1X7CrgTElLgSnA+Q1tSNIISXWS6lauXJlFrWZmuVXpyeLhwISI6A7UAndI2qamiLg5ImoioqZz585lL9LMrC3LMgiWAT2Klrun64qdA9wLEBGPA7sBe2dYk5mZ1ZNlEDwF9JK0v6R2JJPBk+u1eRk4FkBSb5Ig8NiPmVkZZRYEEbERGAlMBRaQnB00X9I1kk5Mm10MnCvpGeAe4EsREVnVZGZm29o1y41HxBSSSeDidVcWPX8OOCrLGszMrGmVniw2M7MKcxCYmeWcg8BKsnDhQvr37//2Y6+99uL666/nvvvuo2/fvlRVVVFXV/d2+5kzZ/LhD3+YmpoaXnjhBQBee+01Bg8ezObNmyt1GGbWgEznCKztOOigg5gzZw4AmzZtolu3bgwdOpRCocADDzzAeeedt1X7MWPGMGXKFBYvXsyNN97ImDFjGD16NN/61reoqvL3D7OdiYPAWmz69OkccMAB7Lfffo22qa6uplAoUCgUqK6u5sUXX2TJkiUcc8wx5SvUzEriILAWmzhxIsOHD2+yzeWXX87ZZ59N+/btueOOO7jkkksYPXp0mSo0s5ZwH91aZP369UyePJlTTz21yXb9+/dn1qxZzJgxg5deeol99tmHiOC0007jzDPP5O9//3uZKjaz5rhHYC3y0EMPMWDAAD7wgQ+U1D4iGD16NBMnTuT888/nuuuuY/HixYwbN45rr70242rNrBTuEViL3HPPPc0OCxW7/fbbqa2tpVOnThQKBaqqqqiqqqJQKGRYpZm1hHsEVrJ169Yxbdo0brrpprfX/eIXv+D8889n5cqVnHDCCfTv35+pU6cCUCgUmDBhAo888ggAo0aNora2lnbt2nH33XdX5BjMbFsOAivZ7rvvzqpVq7ZaN3ToUIYOHdpg+w4dOjBjxoy3l48++mieffbZTGs0s5bz0JCZWc45CMzMcs5BYGaWc54jyJGel/2m0iXsEIu/f0KlSzBrU9wjMDPLOQeBmVnOOQjMzHLOQWBmlnMOAjOznHMQmJnlnIPAzCznHARmZjnnIDAzyzkHgZlZzjkIzMxyzkFgZpZzDgIzs5xzEJiZ5ZyDwMws5xwEZmY55yAwM8s5B4GZWc45CMzMci7TIJB0nKSFkhZJuqyRNp+X9Jyk+ZLuzrIeMzPbVmY/Xi9pF2A88GlgKfCUpMkR8VxRm17A5cBREbFaUpes6jEzs4Zl2SMYCCyKiJciYj0wERhSr825wPiIWA0QESsyrMfMzBqQZRB0A5YULS9N1xU7EDhQ0kxJsyQd19CGJI2QVCepbuXKlRmVa2aWT5WeLN4V6AUcAwwHfiqpY/1GEXFzRNRERE3nzp3LW6GZWRvXbBBI+pyk7QmMZUCPouXu6bpiS4HJEbEhIv4CPE8SDGZmVialfMCfBrwg6TpJB7dg208BvSTtL6kdcDowuV6bX5L0BpC0N8lQ0Ust2IeZmb1LzQZBRJwJHAa8CEyQ9Hg6Zr9nM+/bCIwEpgILgHsjYr6kaySdmDabCqyS9BwwA/hmRKx6F8djZmYtVNLpoxGxVtLPgfbAhcBQ4JuSxkXEDU28bwowpd66K4ueBzAqfZiZWQWUMkdwoqRfAL8HqoGBEXE8cChwcbblmZlZ1krpEQwDxkbEo8UrI6Ig6ZxsyjIzs3IpJQiuAl7ZsiCpPfCBiFgcEdOzKszMzMqjlLOG7gM2Fy1vSteZmVkbUEoQ7JreIgKA9Hm77EoyM7NyKiUIVhad7omkIcCr2ZVkZmblVMocwVeAuyT9GBDJ/YPOzrQqMzMrm2aDICJeBAZJ2iNdfj3zqszMrGxKuqBM0glAX2A3SQBExDUZ1mVmZmVSygVlN5Lcb+h8kqGhU4H9Mq7LzMzKpJTJ4iMj4mxgdURcDXyU5OZwZmbWBpQSBG+m/y1I6gpsAPbJriQzMyunUuYIfp3+WMwPgT8BAfw0y6LMzKx8mgyC9AdppkfEa8D9kh4EdouINeUozszMstfk0FBEbAbGFy2/5RAwM2tbSpkjmC5pmLacN2pmZm1KKUFwHslN5t6StFbSPyWtzbguMzMrk1KuLG7yJynNzKx1azYIJH28ofX1f6jGzMxap1JOH/1m0fPdgIHAbOCTmVRkZmZlVcrQ0OeKlyX1AK7PqiAzMyuvUiaL61sK9N7RhZiZWWWUMkdwA8nVxJAER3+SK4zNzKwNKGWOoK7o+UbgnoiYmVE9ZmZWZqUEwc+BNyNiE4CkXSR1iIhCtqWZmVk5lHRlMdC+aLk98NtsyjEzs3IrJQh2K/55yvR5h+xKMjOzciolCNZJGrBlQdLhwBvZlWRmZuVUyhzBhcB9kpaT/FTlB0l+utLMzNqAUi4oe0rSwcBB6aqFEbEh27LMzKxcSvnx+q8Du0fEvIiYB+wh6WvZl2ZmZuVQyhzBuekvlAEQEauBczOryMzMyqqUINil+EdpJO0CtMuuJDMzK6dSJosfBiZJuildPg94KLuSzMysnEoJgn8HRgBfSZfnkpw5ZGZmbUCzQ0PpD9g/ASwm+S2CTwILStm4pOMkLZS0SNJlTbQbJikk1ZRWtpmZ7SiN9ggkHQgMTx+vApMAIuITpWw4nUsYD3ya5NbVT0maHBHP1Wu3J3ABSdiYmVmZNdUj+DPJt//PRsTHIuIGYFMLtj0QWBQRL0XEemAiMKSBdt8BfgC82YJtm5nZDtJUEJwMvALMkPRTSceSXFlcqm7AkqLlpem6t6W3rugREb9pakOSRkiqk1S3cuXKFpRgZmbNaTQIIuKXEXE6cDAwg+RWE10k/bekwe92x5KqgB8BFzfXNiJujoiaiKjp3Lnzu921mZkVKWWyeF1E3J3+dnF34GmSM4maswzoUbTcPV23xZ5AP+D3khYDg4DJnjA2MyuvFv1mcUSsTr+dH1tC86eAXpL2l9QOOB2YXLStNRGxd0T0jIiewCzgxIioa3hzZmaWhe358fqSRMRGYCQwleR003sjYr6kaySdmNV+zcysZUq5oGy7RcQUYEq9dVc20vaYLGsxM7OGZdYjMDOz1sFBYGaWcw4CM7OccxCYmeWcg8DMLOccBGZmOecgMDPLOQeBmVnOOQjMzHLOQWBmlnMOAjOznHMQmJnlnIPAzCznHARmZjnnIDAzyzkHgZlZzjkIzMxyzkFgZpZzDgIzs5xzEJiZ5ZyDwMws5xwEZmY55yAwM8s5B4GZWc45CMzMcs5BYGaWcw4CM7OccxCYmeWcg8DMLOccBGZmOecgMDPLOQeBmVnOOQjMzHLOQWBmlnOZBoGk4yQtlLRI0mUNvD5K0nOS5kqaLmm/LOsxM7NtZRYEknYBxgPHA32A4ZL61Gv2NFATER8Gfg5cl1U9ZmbWsCx7BAOBRRHxUkSsByYCQ4obRMSMiCiki7OA7hnWY2ZmDcgyCLoBS4qWl6brGnMO8FBDL0gaIalOUt3KlSt3YIlmZrZTTBZLOhOoAX7Y0OsRcXNE1ERETefOnctbnJlZG7drhtteBvQoWu6ertuKpE8B/x/414h4K8N6zMysAVn2CJ4CeknaX1I74HRgcnEDSYcBNwEnRsSKDGsxM7NGZBYEEbERGAlMBRYA90bEfEnXSDoxbfZDYA/gPklzJE1uZHNmZpaRLIeGiIgpwJR6664sev6pLPdvZmbN2ykmi83MrHIcBGZmOecgMDPLOQeBmVnOOQjMzHLOQWBmlnMOAjOznHMQmJnlnIPAzCznHARmZjnnIDAzyzkHgZlZzjkIzMxyzkFgZpZzDgIzs5xzEJiZ5ZyDwMws5xwEZmY55yAwM8s5B4GZWc45CMzMcs5BYGaWcw4CM7OccxCYmeWcg8DMLOccBGZmOecgMDPLOQeBmVnOOQjMzHLOQWBmlnMOAjOznHMQmJnlnIPAzCznHARmZjnnIDAzy7lMg0DScZIWSlok6bIGXn+PpEnp609I6pllPWZmtq3MgkDSLsB44HigDzBcUp96zc4BVkfEvwBjgR9kVY+ZmTUsyx7BQGBRRLwUEeuBicCQem2GALelz38OHCtJGdZkZmb1KCKy2bB0CnBcRPy/dPks4IiIGFnUZl7aZmm6/GLa5tV62xoBjEgXDwIWZlL0jrM38GqzrdomH3t+5fn4W8Ox7xcRnRt6YddyV7I9IuJm4OZK11EqSXURUVPpOirBx57PY4d8H39rP/Ysh4aWAT2Klrun6xpsI2lX4L3AqgxrMjOzerIMgqeAXpL2l9QOOB2YXK/NZOCL6fNTgN9FVmNVZmbWoMyGhiJio6SRwFRgF+DWiJgv6RqgLiImAz8D7pC0CPgHSVi0Ba1mGCsDPvb8yvPxt+pjz2yy2MzMWgdfWWxmlnMOAjOznHMQ7EDN3VKjLZN0q6QV6bUhuSKph6QZkp6TNF/SBZWuqVwk7SbpSUnPpMd+daVrqgRJu0h6WtKDla5lezgIdpASb6nRlk0Ajqt0ERWyEbg4IvoAg4Cv5+j//VvAJyPiUKA/cJykQZUtqSIuABZUuojt5SDYcUq5pUabFRGPkpz5lTsR8UpE/Cl9/k+SD4Rula2qPCLxerpYnT5ydQaKpO7ACcAtla5lezkIdpxuwJKi5aXk5MPA3pHeQfcw4IkKl1I26bDIHGAFMC0icnPsqeuBS4HNFa5juzkIzHYQSXsA9wMXRsTaStdTLhGxKSL6k9w9YKCkfhUuqWwkfRZYERGzK13Lu+Eg2HFKuaWGtVGSqklC4K6IeKDS9VRCRLwGzCBfc0VHASdKWkwyHPxJSXdWtqSWcxDsOKXcUsPaoPTW6T8DFkTEjypdTzlJ6iypY/q8PfBp4M8VLaqMIuLyiOgeET1J/s3/LiLOrHBZLeYg2EEiYiOw5ZYaC4B7I2J+ZasqH0n3AI8DB0laKumcStdURkcBZ5F8G5yTPmorXVSZ7APMkDSX5MvQtIholadQ5plvMWFmlnPuEZiZ5ZyDwMws5xwEZmY55yAwM8s5B4GZWc61ih+vN6skSe8HpqeLHwQ2ASvT5YHpvaWaev+XgJqIGJlZkWbvgoPArBkRsYrkzppIugp4PSL+s5I1me1IHhoy2w6SzpX0VHof/vsldUjXnyppXrr+0Qbed4KkxyXtXf6qzRrmIDDbPg9ExEfS+/AvALZcSX0l8Jl0/YnFb5A0FLgMqI2IV8tarVkTPDRktn36SRoNdAT2ILm1CMBMYIKke4Him899EqgBBufpzqTWOrhHYLZ9JgAjI+IQ4GpgN4CI+ArwbZI70c5OJ5oBXgT2BA4sf6lmTXMQmG2fPYFX0ttPf2HLSkkHRMQTEXElyZlFW25N/ldgGHC7pL5lr9asCQ4Cs+1zBcmvkM1k69su/1DSs5LmAX8EntnyQkT8mSQ07pN0QDmLNWuK7z5qZpZz7hGYmeWcg8DMLOccBGZmOecgMDPLOQeBmVnOOQjMzHLOQWBmlnP/B0iPrwaXcQuCAAAAAElFTkSuQmCC\n" }, "metadata": { "needs_background": "light" } } ], "source": [ "results.make_plots()\n", "improved_results.make_plots()" ] } ] } ================================================ FILE: examples/basic/quick_demo.py ================================================ """ Demo: Creates a simple new method and applies it to a single CL setting. """ import sys from argparse import Namespace from collections import defaultdict from dataclasses import dataclass from pathlib import Path from typing import Dict, List, Optional, Tuple, Type import gym import pandas as pd import torch import tqdm from gym import spaces from numpy import inf from simple_parsing import ArgumentParser from torch import Tensor, nn from sequoia import Method, Setting from sequoia.common import Config from sequoia.settings import Environment from sequoia.settings.sl import DomainIncrementalSLSetting from sequoia.settings.sl.environment import PassiveEnvironment from sequoia.settings.sl.incremental.objects import Actions, Observations, Rewards from sequoia.settings.sl.incremental.results import IncrementalSLResults as Results class MyModel(nn.Module): """Simple classification model without any CL-related mechanism. To keep things simple, this demo model is designed for supervised (classification) settings where observations have shape [3, 28, 28] (ie the MNIST variants: Mnist, FashionMnist, RotatedMnist, EMnist, etc.) NOTE: You are free to use whatever kind of Model you want, or even not to use one at all! This is just an example to help you get started quickly. """ def __init__( self, observation_space: gym.Space, action_space: gym.Space, reward_space: gym.Space, ): super().__init__() image_shape = observation_space["x"].shape assert image_shape == (3, 28, 28), "this example only works on mnist-like data" assert isinstance(action_space, spaces.Discrete) assert action_space == reward_space n_classes = action_space.n image_channels = image_shape[0] self.encoder = nn.Sequential( nn.Conv2d(image_channels, 6, 5), nn.ReLU(), nn.MaxPool2d(2), nn.Conv2d(6, 16, 5), nn.ReLU(), nn.MaxPool2d(2), ) self.classifier = nn.Sequential( nn.Flatten(), nn.Linear(256, 120), nn.ReLU(), nn.Linear(120, 84), nn.ReLU(), nn.Linear(84, n_classes), ) self.loss = nn.CrossEntropyLoss() def forward(self, observations: Observations) -> Tensor: # NOTE: here we don't make use of the task labels. x = observations.x task_labels = observations.task_labels features = self.encoder(x) logits = self.classifier(features) return logits def shared_step( self, batch: Tuple[Observations, Optional[Rewards]], environment: Environment ) -> Tuple[Tensor, Dict]: """Shared step used for both training and validation. Parameters ---------- batch : Tuple[Observations, Optional[Rewards]] Batch containing Observations, and optional Rewards. When the Rewards are None, it means that we'll need to provide the Environment with actions before we can get the Rewards (e.g. image labels) back. This happens for example when being applied in a Setting which cares about sample efficiency or training performance, for example. environment : Environment The environment we're currently interacting with. Used to provide the rewards when they aren't already part of the batch (as mentioned above). Returns ------- Tuple[Tensor, Dict] The Loss tensor, and a dict of metrics to be logged. """ # Since we're training on a Passive environment, we will get both observations # and rewards, unless we're being evaluated based on our training performance, # in which case we will need to send actions to the environments before we can # get the corresponding rewards (image labels). observations: Observations = batch[0] rewards: Optional[Rewards] = batch[1] # Get the predictions: logits = self(observations) y_pred = logits.argmax(-1) if rewards is None: # If the rewards in the batch is None, it means we're expected to give # actions before we can get rewards back from the environment. rewards = environment.send(Actions(y_pred)) assert rewards is not None image_labels = rewards.y loss = self.loss(logits, image_labels) accuracy = (y_pred == image_labels).sum().float() / len(image_labels) metrics_dict = {"accuracy": accuracy.item()} return loss, metrics_dict class DemoMethod(Method, target_setting=DomainIncrementalSLSetting): """Minimal example of a Method targetting the Class-Incremental CL setting. For a quick intro to dataclasses, see examples/dataclasses_example.py """ @dataclass class HParams: """Hyper-parameters of the demo model.""" # Learning rate of the optimizer. learning_rate: float = 0.001 def __init__(self, hparams: HParams = None): self.hparams: DemoMethod.HParams = hparams or self.HParams() self.max_epochs: int = 1 self.early_stop_patience: int = 2 # We will create those when `configure` will be called, before training. self.model: MyModel self.optimizer: torch.optim.Optimizer def configure(self, setting: DomainIncrementalSLSetting): """Called before the method is applied on a setting (before training). You can use this to instantiate your model, for instance, since this is where you get access to the observation & action spaces. """ self.model = MyModel( observation_space=setting.observation_space, action_space=setting.action_space, reward_space=setting.reward_space, ) self.optimizer = torch.optim.Adam( self.model.parameters(), lr=self.hparams.learning_rate, ) def fit(self, train_env: PassiveEnvironment, valid_env: PassiveEnvironment): """Example train loop. You can do whatever you want with train_env and valid_env here. NOTE: In the Settings where task boundaries are known (in this case all the supervised CL settings), this will be called once per task. """ # configure() will have been called by the setting before we get here. best_val_loss = inf best_epoch = 0 for epoch in range(self.max_epochs): self.model.train() print(f"Starting epoch {epoch}") postfix = {} # Training loop: with tqdm.tqdm(train_env) as train_pbar: train_pbar.set_description(f"Training Epoch {epoch}") for i, batch in enumerate(train_pbar): loss, metrics_dict = self.model.shared_step(batch, environment=train_env) self.optimizer.zero_grad() loss.backward() self.optimizer.step() postfix.update(metrics_dict) train_pbar.set_postfix(postfix) # Validation loop: self.model.eval() torch.set_grad_enabled(False) with tqdm.tqdm(valid_env) as val_pbar: val_pbar.set_description(f"Validation Epoch {epoch}") epoch_val_loss = 0.0 for i, batch in enumerate(val_pbar): batch_val_loss, metrics_dict = self.model.shared_step( batch, environment=valid_env ) epoch_val_loss += batch_val_loss postfix.update(metrics_dict, val_loss=epoch_val_loss) val_pbar.set_postfix(postfix) torch.set_grad_enabled(True) if epoch_val_loss < best_val_loss: best_val_loss = epoch_val_loss best_epoch = epoch if epoch - best_epoch > self.early_stop_patience: print(f"Early stopping at epoch {i}.") break def get_actions(self, observations: Observations, action_space: gym.Space) -> Actions: """Get a batch of predictions (aka actions) for these observations.""" with torch.no_grad(): logits = self.model(observations) # Get the predicted classes y_pred = logits.argmax(dim=-1) return self.target_setting.Actions(y_pred) @classmethod def add_argparse_args(cls, parser: ArgumentParser): """Adds command-line arguments for this Method to an argument parser.""" parser.add_arguments(cls.HParams, "hparams") @classmethod def from_argparse_args(cls, args: Namespace): """Creates an instance of this Method from the parsed arguments.""" hparams: cls.HParams = args.hparams return cls(hparams=hparams) def demo_simple(): """Simple demo: Creating and applying a Method onto a Setting.""" from sequoia.settings.sl import DomainIncrementalSLSetting ## 1. Creating the setting: setting = DomainIncrementalSLSetting(dataset="fashionmnist", batch_size=32) ## 2. Creating the Method method = DemoMethod() # (Optional): You can also create a Config, which holds other fields like # `log_dir`, `debug`, `device`, etc. which aren't specific to either the # Setting or the Method. config = Config(debug=True, render=False, device="cpu") ## 3. Applying the method to the setting: (optionally passing a Config to # use for that run) results = setting.apply(method, config=config) print(results.summary()) print(f"objective: {results.objective}") def demo_command_line(): """Run this quick demo from the command-line.""" parser = ArgumentParser(description=__doc__) # Add command-line arguments for the Method and the Setting. DemoMethod.add_argparse_args(parser) # Add command-line arguments for the Setting and the Config (an object with # options like log_dir, debug, etc, which are not part of the Setting or the # Method) using simple-parsing. parser.add_arguments(DomainIncrementalSLSetting, "setting") parser.add_arguments(Config, "config") args = parser.parse_args() # Create the Method from the parsed arguments method: DemoMethod = DemoMethod.from_argparse_args(args) # Extract the Setting and Config from the args. setting: DomainIncrementalSLSetting = args.setting config: Config = args.config # Run the demo, applying that DemoMethod on the given setting. results: Results = setting.apply(method, config=config) print(results.summary()) print(f"objective: {results.objective}") if __name__ == "__main__": # Example: Evaluate a Method on a single CL setting: ### ### First option: Run the demo, creating the Setting and Method directly. ### # demo_simple() ## ## Second part of the demo: Same as before, but customize the options for ## the Setting and the Method from the command-line. ## demo_command_line() ## ## As a little bonus: Evaluate on *ALL* the applicable settings, and ## aggregate the results in a nice little LaTeX-formatted table. ## # from examples.demo_utils import demo_all_settings # all_results = demo_all_settings(DemoMethod) ================================================ FILE: examples/basic/quick_demo_ewc.py ================================================ """ Example script: Defines a new Method based on the DemoMethod from the quick_demo.py script, adding an EWC-like loss to prevent the weights from changing too much between tasks. """ import sys from copy import deepcopy from dataclasses import dataclass from typing import ClassVar, Dict, Optional, Tuple import gym import torch from torch import Tensor from examples.basic.quick_demo import DemoMethod, MyModel from sequoia.settings import DomainIncrementalSLSetting from sequoia.settings.sl.incremental.objects import Observations, Rewards from sequoia.utils.utils import dict_intersection from sequoia.utils.logging_utils import get_logger logger = get_logger(__name__) class MyImprovedModel(MyModel): """Adds an ewc-like penalty to the demo model.""" def __init__( self, observation_space: gym.Space, action_space: gym.Space, reward_space: gym.Space, ewc_coefficient: float = 1.0, ewc_p_norm: int = 2, ): super().__init__( observation_space, action_space, reward_space, ) self.ewc_coefficient = ewc_coefficient self.ewc_p_norm = ewc_p_norm self.previous_model_weights: Dict[str, Tensor] = {} self._previous_task: Optional[int] = None self._n_switches: int = 0 def shared_step(self, batch: Tuple[Observations, Rewards], *args, **kwargs): base_loss, metrics = super().shared_step(batch, *args, **kwargs) ewc_loss = self.ewc_coefficient * self.ewc_loss() metrics["ewc_loss"] = ewc_loss return base_loss + ewc_loss, metrics def on_task_switch(self, task_id: int) -> None: """Executed when the task switches (to either a known or unknown task).""" if self._previous_task is None and self._n_switches == 0: logger.debug("Starting the first task, no EWC update.") elif task_id is None or task_id != self._previous_task: # NOTE: We also switch between unknown tasks. logger.debug( f"Switching tasks: {self._previous_task} -> {task_id}: " f"Updating the EWC 'anchor' weights." ) self._previous_task = task_id self.previous_model_weights.clear() self.previous_model_weights.update( deepcopy({k: v.detach() for k, v in self.named_parameters()}) ) self._n_switches += 1 def ewc_loss(self) -> Tensor: """Gets an 'ewc-like' regularization loss. NOTE: This is a simplified version of EWC where the loss is the P-norm between the current weights and the weights as they were on the begining of the task. """ if self._previous_task is None: # We're in the first task: do nothing. return 0.0 old_weights: Dict[str, Tensor] = self.previous_model_weights new_weights: Dict[str, Tensor] = dict(self.named_parameters()) loss = 0.0 for weight_name, (new_w, old_w) in dict_intersection(new_weights, old_weights): loss += torch.dist(new_w, old_w.type_as(new_w), p=self.ewc_p_norm) return loss class ImprovedDemoMethod(DemoMethod): """Improved version of the demo method, that adds an ewc-like regularizer.""" # Name of this method: name: ClassVar[str] = "demo_ewc" @dataclass class HParams(DemoMethod.HParams): """Hyperparameters of this new improved method. (Adds ewc params).""" # Coefficient of the ewc-like loss. ewc_coefficient: float = 1.0 # Distance norm used in the ewc loss. ewc_p_norm: int = 2 def __init__(self, hparams: HParams = None): super().__init__(hparams=hparams or self.HParams.from_args()) def configure(self, setting: DomainIncrementalSLSetting): # Use the improved model, with the added EWC-like term. self.model = MyImprovedModel( observation_space=setting.observation_space, action_space=setting.action_space, reward_space=setting.reward_space, ewc_coefficient=self.hparams.ewc_coefficient, ewc_p_norm=self.hparams.ewc_p_norm, ) self.optimizer = torch.optim.Adam( self.model.parameters(), lr=self.hparams.learning_rate, ) def on_task_switch(self, task_id: Optional[int]): self.model.on_task_switch(task_id) def demo_ewc(): """Demo: Comparing two methods on the same setting:""" ## 1. Create the Setting (same as in quick_demo.py) setting = DomainIncrementalSLSetting(dataset="fashionmnist", nb_tasks=5, batch_size=64) # setting = DomainIncrementalSLSetting.from_args() # 2.1: Get the results for the base method base_method = DemoMethod() base_results = setting.apply(base_method) # 2.2: Get the results for the 'improved' method: new_method = ImprovedDemoMethod() new_results = setting.apply(new_method) # Compare the two results: print( f"\n\nComparison: DemoMethod vs ImprovedDemoMethod - (DomainIncrementalSLSetting, dataset=fashionmnist):" ) print(base_results.summary()) print(new_results.summary()) exit() if __name__ == "__main__": # Example: Comparing two methods on the same setting: from sequoia.settings import DomainIncrementalSLSetting ## 1. Create the Setting (same as in quick_demo.py) setting = DomainIncrementalSLSetting( dataset="fashionmnist", nb_tasks=5, monitor_training_performance=True ) # setting = DomainIncrementalSLSetting.from_args() # Get the results for the base method: base_method = DemoMethod() base_results = setting.apply(base_method) # Get the results for the 'improved' method: new_method = ImprovedDemoMethod() new_results = setting.apply(new_method) print( f"\n\nComparison: DemoMethod vs ImprovedDemoMethod - (DomainIncrementalSLSetting, dataset=fashionmnist):" ) print(base_results.summary()) print(new_results.summary()) exit() ## ## As a little bonus: Evaluate *both* methods on *ALL* their applicable ## settings, and aggregate the results in a nice LaTeX-formatted table. ## from examples.demo_utils import compare_results, demo_all_settings base_results = demo_all_settings(DemoMethod, datasets=["mnist", "fashionmnist"]) improved_results = demo_all_settings( ImprovedDemoMethod, datasets=["mnist", "fashionmnist"], monitor_training_performance=True, ) compare_results( { DemoMethod: base_results, ImprovedDemoMethod: improved_results, } ) ================================================ FILE: examples/basic/quick_demo_packnet.py ================================================ from sequoia.methods.packnet_method import PackNetMethod from sequoia.settings.sl import TaskIncrementalSLSetting if __name__ == "__main__": setting = TaskIncrementalSLSetting(dataset="mnist", nb_tasks=2) my_method = PackNetMethod() results = setting.apply(my_method) ================================================ FILE: examples/basic/quick_demo_test.py ================================================ """ TODO: Write tests that check that the examples are working correctly. """ import contextlib import sys import pytest from examples.basic.quick_demo import demo_command_line, demo_simple from sequoia.settings import ClassIncrementalSetting, Results @pytest.mark.timeout(120) def test_quick_demo(monkeypatch): """Test that runs the quick demo and checks that the results correspond to what you'd expect. """ results: ClassIncrementalSetting.Results = None summary_method = ClassIncrementalSetting.Results.summary def summary(self: ClassIncrementalSetting.Results): nonlocal results results = self return summary_method(self) monkeypatch.setattr(ClassIncrementalSetting.Results, "summary", summary) demo_simple() from sequoia.common.metrics import ClassificationMetrics # NOTE: Results aren't going to give *exactly* the same results, so we can't # test like this directly: # assert results.average_metrics_per_task == [ # ClassificationMetrics(n_samples=1984, accuracy=0.500504), # ClassificationMetrics(n_samples=2016, accuracy=0.499504), # ClassificationMetrics(n_samples=1984, accuracy=0.817036), # ClassificationMetrics(n_samples=2016, accuracy=0.835317), # ClassificationMetrics(n_samples=1984, accuracy=0.99748), # ] assert results.final_performance_metrics[0].n_samples == 1984 assert results.final_performance_metrics[1].n_samples == 2016 assert results.final_performance_metrics[2].n_samples == 1984 assert results.final_performance_metrics[3].n_samples == 2016 assert results.final_performance_metrics[4].n_samples == 1984 assert 0.48 <= results.final_performance_metrics[0].accuracy <= 0.55 assert 0.48 <= results.final_performance_metrics[1].accuracy <= 0.70 assert 0.60 <= results.final_performance_metrics[2].accuracy <= 1.00 assert 0.70 <= results.final_performance_metrics[3].accuracy <= 1.00 assert 0.99 <= results.final_performance_metrics[4].accuracy <= 1.00 ================================================ FILE: examples/clcomp21/README.md ================================================ ## Example Submissions for CLVision Workshop Examples in this folder are aimed at solving the supervised learning track of the competition. Each example builds on top of the previous, in a manner that improves the overall performance you can expect on any given CL setting. As such, it is recommended that you take a look at the examples in the following order: 0. [DummyMethod](dummy_method.py) Non-parametric method that simply returns a random prediction for each observation. 1. [Simple Classifier](classifier.py): Standard neural net classifier without any CL-related mechanism. Works in the SL track, but has very poor performance. 2. [Multi-Head / Task Inference Classifier](multihead_classifier.py): Performs multi-head prediction, and a simple form of task inference. Gets better results that the example. 3. [CL Regularized Classifier](regularization_example.py): Adds a simple CL regularization loss to the multihead classifier above. ## RL Examples: For RL, you can take a look at these examples: - [A2C Example](a2c_example.py): Example where A2C is implemented from scratch as a Method for the RL track. The code for A2C was adapted from [this blogpost.](https://towardsdatascience.com/understanding-actor-critic-methods-931b97b6df3f) - [SB3 Example](sb3_example.py): Example of how we can extend an existing Method from Stable-Baselines3. ================================================ FILE: examples/clcomp21/__init__.py ================================================ ================================================ FILE: examples/clcomp21/a2c_example.py ================================================ from argparse import Namespace from dataclasses import dataclass from pathlib import Path from typing import Dict, List, Optional, Tuple import gym import matplotlib.pyplot as plt import numpy as np import pandas as pd import torch import torch.nn as nn import torch.optim as optim from gym import spaces from gym.spaces.utils import flatdim # TODO: Migrate stuff to directly import simple-parsing's hparams module. # from simple_parsing.helpers.hparams import HyperParameters from simple_parsing import ArgumentParser from torch import Tensor from torch.distributions import Categorical from sequoia.common.hparams import HyperParameters, log_uniform from sequoia.common.spaces import Image from sequoia.methods import Method from sequoia.settings.rl import ActiveEnvironment, RLSetting class ActorCritic(nn.Module): def __init__( self, observation_space: gym.Space, action_space: gym.Space, hidden_size: int, ): super().__init__() self.observation_space = observation_space # NOTE: See note below for why we don't use the task label portion of the space # here. self.num_inputs = flatdim(self.observation_space.x) self.hidden_size = hidden_size if not isinstance(action_space, spaces.Discrete): raise NotImplementedError("This example only works with discrete action spaces.") self.action_space = action_space self.num_actions = self.action_space.n if self.num_inputs < 100: # If we have a reasonably-small input space, use an MLP architecture. self.critic = nn.Sequential( nn.Flatten(), nn.Linear(self.num_inputs, self.hidden_size), nn.ReLU(inplace=True), nn.Linear(self.hidden_size, 1), ) self.actor = nn.Sequential( nn.Flatten(), nn.Linear(self.num_inputs, self.hidden_size), nn.ReLU(inplace=True), nn.Linear(self.hidden_size, self.num_actions), ) else: assert isinstance(self.observation_space.x, Image) channels = self.observation_space.x.channels self.encoder = nn.Sequential( nn.Conv2d(channels, 6, kernel_size=5, stride=1, padding=1, bias=False), nn.BatchNorm2d(6), nn.ReLU(inplace=True), nn.Conv2d(6, 16, kernel_size=5, stride=1, padding=1, bias=False), nn.BatchNorm2d(16), nn.ReLU(inplace=True), nn.Conv2d(16, 16, kernel_size=3, stride=1, padding=1, bias=False), nn.BatchNorm2d(16), nn.AdaptiveAvgPool2d(output_size=(8, 8)), # [16, 8, 8] nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=0, bias=False), nn.BatchNorm2d(32), # [32, 6, 6] nn.ReLU(inplace=True), nn.Conv2d(32, 32, kernel_size=3, stride=1, padding=0, bias=False), nn.BatchNorm2d(32), # [32, 4, 4] nn.Flatten(), ) # NOTE: Here we share the encoder for both the actor and critic. self.critic = nn.Sequential( self.encoder, nn.Linear(512, self.hidden_size), nn.ReLU(inplace=True), nn.Linear(self.hidden_size, 1), ) self.actor = nn.Sequential( self.encoder, nn.Linear(512, self.hidden_size), nn.ReLU(inplace=True), nn.Linear(self.hidden_size, self.num_actions), ) def forward(self, observation: RLSetting.Observations) -> Tuple[Tensor, Categorical]: x = observation.x state = torch.as_tensor(x, dtype=torch.float) # NOTE: Here you could for instance concatenate the task labels onto the state # to make the model multi-task! However if you target the IncrementalRLSetting # or above, you might not have these task labels at test-time, so that would # have to be taken into consideration (e.g. can't concat None to a Tensor) # task_labels = observation.task_labels x_space = self.observation_space.x batched_inputs = state.ndim > len(x_space.shape) if not batched_inputs: # Add a batch dimension if necessary. state = state.unsqueeze(0) value = self.critic(state) policy_logits = self.actor(state) if not batched_inputs: # Remove the batch dimension from the predictions if necessary. value = value.squeeze(0) policy_logits = policy_logits.squeeze(0) policy_dist = Categorical(logits=policy_logits) # policy_dist = F.relu(self.actor_linear1(state)) # policy_dist = F.softmax(self.actor_linear2(policy_dist), dim=1) return value, policy_dist class ExampleA2CMethod(Method, target_setting=RLSetting): """Example A2C method. Most of the code here was taken from: https://towardsdatascience.com/understanding-actor-critic-methods-931b97b6df3f """ @dataclass class HParams(HyperParameters): """Hyper-Parameters of the model, as a dataclass. Fields get command-line arguments with simple-parsing. """ # Hidden size (representation size). hidden_size: int = 256 # Learning rate of the optimizer. learning_rate: float = log_uniform(1e-6, 1e-2, default=3e-4) # Discount factor gamma: float = 0.99 # Coefficient for the entropy term in the loss formula. entropy_term_coefficient: float = 0.001 # Maximum length of an episode, when desired. (Generally not needed). max_episode_steps: Optional[int] = None def __init__(self, hparams: HParams = None, render: bool = False): self.hparams = hparams or self.HParams() self.task: int = 0 self.plots_dir: Path = Path("plots") self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") self.render = render def configure(self, setting: RLSetting): self.actor_critic = ActorCritic( observation_space=setting.observation_space, action_space=setting.action_space, hidden_size=self.hparams.hidden_size, ).to(self.device) self.ac_optimizer = optim.Adam( self.actor_critic.parameters(), lr=self.hparams.learning_rate ) # If there is a limit on the number of steps per task, then observe that limit. self.max_training_steps = setting.steps_per_phase def fit(self, train_env: ActiveEnvironment, valid_env: ActiveEnvironment): assert isinstance(train_env, gym.Env) # Just to illustrate that it's a gym Env. # NOTE: This example only works if the environment isn't vectorized. all_lengths: List[int] = [] average_lengths: List[float] = [] all_rewards: List[float] = [] episode = 0 total_steps = 0 while not train_env.is_closed() and total_steps < self.max_training_steps: episode += 1 log_probs: List[Tensor] = [] values: List[Tensor] = [] rewards: List[Tensor] = [] entropy_term = 0 observation: RLSetting.Observations = train_env.reset() # Convert numpy arrays in the observation into Tensors on the right device. observation = observation.torch(device=self.device) done = False episode_steps = 0 while not done and total_steps < self.max_training_steps: episode_steps += 1 value, policy_dist = self.actor_critic.forward(observation) value = value.cpu().detach().numpy() action = policy_dist.sample() log_prob = policy_dist.log_prob(action) entropy = policy_dist.entropy() # NOTE: 'correct' thing to do would be to pass Actions objects of the # right type. This is for future-proofing this Method so it can # still function in the future if new settings are added. action = RLSetting.Actions(y_pred=action.cpu().detach().numpy()) if self.render: train_env.render() new_observation: RLSetting.Observations reward: RLSetting.Rewards new_observation, reward, done, _ = train_env.step(action) new_observation = new_observation.torch(device=self.device) total_steps += 1 # Likewise, in order to support different future settings, we receive a # Rewards object, which contains the reward value (the float when the # env isn't batched.). reward_value: float = reward.y rewards.append(reward_value) values.append(value) log_probs.append(log_prob) entropy_term += entropy observation = new_observation Qval, _ = self.actor_critic.forward(new_observation) Qval = Qval.detach().cpu().numpy() all_rewards.append(np.sum(rewards)) all_lengths.append(episode_steps) average_lengths.append(np.mean(all_lengths[-10:])) if episode % 10 == 0: print( f"step {total_steps}/{self.max_training_steps}, " f"episode: {episode}, " f"reward: {np.sum(rewards)}, " f"total length: {episode_steps}, " f"average length: {average_lengths[-1]} \n" ) if total_steps >= self.max_training_steps: print(f"Reached the limit of {self.max_training_steps} steps.") break # compute Q values Q_values = np.zeros_like(values) # Use the last value from the critic as the final value estimate. q_value = Qval for t, reward in reversed(list(enumerate(rewards))): q_value = reward + self.hparams.gamma * q_value Q_values[t] = q_value # update actor critic values = torch.as_tensor(values, dtype=torch.float, device=self.device) Q_values = torch.as_tensor(Q_values, dtype=torch.float, device=self.device) log_probs = torch.stack(log_probs) advantage = Q_values - values actor_loss = (-log_probs * advantage).mean() critic_loss = 0.5 * advantage.pow(2).mean() ac_loss = ( actor_loss + critic_loss + self.hparams.entropy_term_coefficient * entropy_term ) self.ac_optimizer.zero_grad() ac_loss.backward() self.ac_optimizer.step() # Plot results smoothed_rewards = pd.Series.rolling(pd.Series(all_rewards), 10).mean() smoothed_rewards = [elem for elem in smoothed_rewards] plt.plot(all_rewards) plt.plot(smoothed_rewards) plt.plot() plt.xlabel("Episode") plt.ylabel("Reward") self.plots_dir.mkdir(parents=True, exist_ok=True) plt.savefig(self.plots_dir / f"task_{self.task}_0.png") # plt.show() plt.plot(all_lengths) plt.plot(average_lengths) plt.xlabel("Episode") plt.ylabel("Episode length") plt.savefig(self.plots_dir / f"task_{self.task}_1.png") # plt.show() def get_actions( self, observations: RLSetting.Observations, action_space: gym.Space ) -> RLSetting.Actions: # Move the observations to the right device, converting numpy arrays to tensors. observations = observations.torch(device=self.device) value, action_dist = self.actor_critic(observations) return RLSetting.Actions(y_pred=action_dist.sample()) # The methods below aren't required, but are good to add. def on_task_switch(self, task_id: Optional[int]) -> None: """Called by the Setting when switching between tasks. Parameters ---------- task_id : Optional[int] the id of the new task. When None, we are basically being informed that there is a task boundary, but without knowing what task we're switching to. """ if isinstance(task_id, int): self.task = task_id @classmethod def add_argparse_args(cls, parser: ArgumentParser): parser.add_arguments(cls.HParams, dest="hparams") @classmethod def from_argparse_args(cls, args: Namespace): hparams: ExampleA2CMethod.HParams = args.hparams return cls(hparams=hparams) def get_search_space(self, setting: RLSetting) -> Dict: return self.hparams.get_orion_space() def adapt_to_new_hparams(self, new_hparams: Dict) -> None: self.hparams = self.HParams.from_dict(new_hparams) if __name__ == "__main__": # Create the Setting. # CartPole for debugging: from sequoia.settings.rl import TraditionalRLSetting setting = TraditionalRLSetting(dataset="CartPole-v0", nb_tasks=1, train_max_steps=10_000) # OR: Incremental CartPole: from sequoia.settings.rl import IncrementalRLSetting setting = IncrementalRLSetting(dataset="CartPole-v0", nb_tasks=5, train_steps_per_task=10_000) # OR: Setting of the RL Track of the competition: # setting = IncrementalRLSetting.load_benchmark("rl_track") # Create the Method: method = ExampleA2CMethod(render=True) # Apply the Method onto the Setting to get Results. results = setting.apply(method) print(results.summary()) # BONUS: Running a hyper-parameter sweep: # method.hparam_sweep(setting) ================================================ FILE: examples/clcomp21/a2c_example_test.py ================================================ import pytest from sequoia.client.setting_proxy import SettingProxy from sequoia.conftest import slow from sequoia.settings.rl import IncrementalRLSetting, RLSetting from sequoia.settings.sl import ClassIncrementalSetting from .a2c_example import ExampleA2CMethod from .dummy_method import DummyMethod @slow @pytest.mark.timeout(120) def test_cartpole_state(cartpole_state_setting: SettingProxy[RLSetting]): """Applies this Method to a simple cartpole-state setting.""" method = ExampleA2CMethod() results = cartpole_state_setting.apply(method) assert results.to_log_dict() results: RLSetting.Results # TODO: The example isn't actually performing that well! We should try to get # something that can easily and reproducibly solve cartpole to 200, if possible. # assert 150 < results.average_final_performance.mean_episode_length # TODO: Increase this bound when performance is improved. assert 5 < results.average_final_performance.mean_episode_length @slow @pytest.mark.timeout(120) def test_incremental_cartpole_state( incremental_cartpole_state_setting: SettingProxy[IncrementalRLSetting], ): """Applies this Method to the class-incremental mnist Setting.""" method = ExampleA2CMethod() results = incremental_cartpole_state_setting.apply(method) assert results.to_log_dict() results: ClassIncrementalSetting.Results # TODO: Increase this bound assert 5 <= results.average_online_performance.objective assert 5 <= results.average_final_performance.objective @slow @pytest.mark.timeout(300) def test_RL_track(rl_track_setting: SettingProxy[IncrementalRLSetting]): """Applies this Method to the Setting of the sl track of the competition.""" method = DummyMethod() results = rl_track_setting.apply(method) assert results.to_log_dict() # TODO: Add tests for having a different ordering of test tasks vs train tasks. results: ClassIncrementalSetting.Results online_perf = results.average_online_performance # TODO: get an estimate of the upper bound of the random method on the RL track. TODO = 1_000 # this is way too large. assert 0 < online_perf.objective < TODO final_perf = results.average_final_performance assert 0 < final_perf.objective < TODO ================================================ FILE: examples/clcomp21/classifier.py ================================================ """ Example Method for the SL track: Uses a simple classifier, without any CL mechanism. As you'd expect, this Method exhibits complete forgetting of all previous tasks. You can use this model and method as a jumping off point for your own submission. """ from argparse import Namespace from dataclasses import dataclass from typing import ClassVar, Dict, List, Optional, Tuple, Type import gym import torch import tqdm from gym import spaces from numpy import inf from simple_parsing import ArgumentParser from torch import Tensor, nn from torch.optim.optimizer import Optimizer from torchvision.models import ResNet, resnet18 from sequoia.common.hparams import HyperParameters, log_uniform from sequoia.common.spaces import Image from sequoia.methods import Method from sequoia.settings import ClassIncrementalSetting from sequoia.settings.sl import PassiveEnvironment from sequoia.settings.sl.incremental import Actions, Environment, Observations, Rewards @dataclass class HParams(HyperParameters): """Hyper-parameters of the demo model.""" # Learning rate of the optimizer. learning_rate: float = log_uniform(1e-6, 1e-2, default=0.001) # L2 regularization coefficient. weight_decay: float = log_uniform(1e-9, 1e-3, default=1e-6) # Maximum number of training epochs per task. max_epochs_per_task: int = 10 # Number of epochs with increasing validation loss after which we stop training. early_stop_patience: int = 2 class Classifier(nn.Module): """Simple classification model without any CL-related mechanism. This example model uses a resnet18 as the encoder, and a single output layer. """ HParams: ClassVar[Type[HParams]] = HParams def __init__( self, observation_space: gym.Space, action_space: gym.Space, reward_space: gym.Space, hparams: HParams = None, ): super().__init__() self.hparams = hparams or self.HParams() image_space: Image = observation_space.x # image_shape = image_space.shape # This example is intended for classification / discrete action spaces. assert isinstance(action_space, spaces.Discrete) assert action_space == reward_space self.n_classes = action_space.n self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") self.encoder, self.representations_size = self.create_encoder(image_space) self.output = self.create_output_head() self.loss = nn.CrossEntropyLoss() def create_output_head(self) -> nn.Module: return nn.Linear(self.representations_size, self.n_classes).to(self.device) def configure_optimizers(self) -> Optimizer: return torch.optim.Adam( self.parameters(), lr=self.hparams.learning_rate, weight_decay=self.hparams.weight_decay, ) def create_encoder(self, image_space: Image) -> Tuple[nn.Module, int]: """Create an encoder for the given image space. Returns the encoder, as well as the size of the representations it will produce. Parameters ---------- image_space : Image A subclass of `gym.spaces.Box` for images. Represents the space the images will come from during training and testing. Its attributes of interest include `c`, `w`, `h`, `shape` and `dype`. Returns ------- Tuple[nn.Module, int] The encoder to be used, (a nn.Module), as well as the size of the representations it will produce. Raises ------ NotImplementedError If no encoder is available for the given image dimensions. """ if image_space.width == image_space.height == 28: # Setup for mnist variants. # (not part of the competition, but used for debugging below). encoder = nn.Sequential( nn.Conv2d(image_space.channels, 6, 5), nn.ReLU(), nn.MaxPool2d(2), nn.Conv2d(6, 16, 5), nn.ReLU(), nn.MaxPool2d(2), nn.Flatten(), ) features = 256 elif image_space.width == image_space.height == 32: # Synbols dataset: use a resnet18 by default. resnet: ResNet = resnet18(pretrained=False) features = resnet.fc.in_features # Disable/Remove the last layer. resnet.fc = nn.Sequential() encoder = resnet else: raise NotImplementedError( f"TODO: Add an encoder for the given image space {image_space}" ) return encoder.to(self.device), features def forward(self, observations: Observations) -> Tensor: # NOTE: here we don't make use of the task labels. observations = observations.to(self.device) x = observations.x task_labels = observations.task_labels features = self.encoder(x) logits = self.output(features) return logits def shared_step( self, batch: Tuple[Observations, Optional[Rewards]], environment: Environment ) -> Tuple[Tensor, Dict]: """Shared step used for both training and validation. Parameters ---------- batch : Tuple[Observations, Optional[Rewards]] Batch containing Observations, and optional Rewards. When the Rewards are None, it means that we'll need to provide the Environment with actions before we can get the Rewards (e.g. image labels) back. This happens for example when being applied in a Setting which cares about sample efficiency or training performance, for example. environment : Environment The environment we're currently interacting with. Used to provide the rewards when they aren't already part of the batch (as mentioned above). Returns ------- Tuple[Tensor, Dict] The Loss tensor, and a dict of metrics to be logged. """ # Since we're training on a Passive environment, we will get both observations # and rewards, unless we're being evaluated based on our training performance, # in which case we will need to send actions to the environments before we can # get the corresponding rewards (image labels). observations: Observations = batch[0] rewards: Optional[Rewards] = batch[1] # Get the predictions: logits = self(observations) y_pred = logits.argmax(-1) if rewards is None: # If the rewards in the batch is None, it means we're expected to give # actions before we can get rewards back from the environment. rewards = environment.send(Actions(y_pred)) assert rewards is not None image_labels = rewards.y.to(self.device) loss = self.loss(logits, image_labels) accuracy = (y_pred == image_labels).sum().float() / len(image_labels) metrics_dict = {"accuracy": f"{accuracy.cpu().item():3.2%}"} return loss, metrics_dict class ExampleMethod(Method, target_setting=ClassIncrementalSetting): """Minimal example of a Method usable only in the SL track of the competition. This method uses the ExampleModel, which is quite simple. """ ModelType: ClassVar[Type[Classifier]] = Classifier def __init__(self, hparams: HParams = None): self.hparams: HParams = hparams or HParams() # We will create those when `configure` will be called, before training. self.model: Classifier self.optimizer: torch.optim.Optimizer def configure(self, setting: ClassIncrementalSetting): """Called before the method is applied on a setting (before training). You can use this to instantiate your model, for instance, since this is where you get access to the observation & action spaces. """ self.model = self.ModelType( observation_space=setting.observation_space, action_space=setting.action_space, reward_space=setting.reward_space, ) self.optimizer = self.model.configure_optimizers() def fit(self, train_env: PassiveEnvironment, valid_env: PassiveEnvironment): """Example train loop. You can do whatever you want with train_env and valid_env here. NOTE: In the Settings where task boundaries are known (in this case all the supervised CL settings), this will be called once per task. """ # configure() will have been called by the setting before we get here. best_val_loss = inf best_epoch = 0 for epoch in range(self.hparams.max_epochs_per_task): self.model.train() print(f"Starting epoch {epoch}") # Training loop: with tqdm.tqdm(train_env) as train_pbar: postfix = {} train_pbar.set_description(f"Training Epoch {epoch}") for i, batch in enumerate(train_pbar): loss, metrics_dict = self.model.shared_step(batch, environment=train_env) self.optimizer.zero_grad() loss.backward() self.optimizer.step() postfix.update(metrics_dict) train_pbar.set_postfix(postfix) # Validation loop: self.model.eval() torch.set_grad_enabled(False) with tqdm.tqdm(valid_env) as val_pbar: postfix = {} val_pbar.set_description(f"Validation Epoch {epoch}") epoch_val_loss = 0.0 for i, batch in enumerate(val_pbar): batch_val_loss, metrics_dict = self.model.shared_step( batch, environment=valid_env ) epoch_val_loss += batch_val_loss postfix.update(metrics_dict, val_loss=epoch_val_loss) val_pbar.set_postfix(postfix) torch.set_grad_enabled(True) if epoch_val_loss < best_val_loss: best_val_loss = epoch_val_loss best_epoch = epoch if epoch - best_epoch > self.hparams.early_stop_patience: print(f"Early stopping at epoch {i}.") # NOTE: You should probably reload the model weights as they were at the # best epoch. break def get_actions(self, observations: Observations, action_space: gym.Space) -> Actions: """Get a batch of predictions (aka actions) for these observations.""" with torch.no_grad(): logits = self.model(observations) # Get the predicted classes y_pred = logits.argmax(dim=-1) return self.target_setting.Actions(y_pred) @classmethod def add_argparse_args(cls, parser: ArgumentParser): """Adds command-line arguments for this Method to an argument parser.""" parser.add_arguments(cls.ModelType.HParams, "hparams") @classmethod def from_argparse_args(cls, args: Namespace): """Creates an instance of this Method from the parsed arguments.""" hparams: Classifier.HParams = args.hparams return cls(hparams=hparams) if __name__ == "__main__": # Create the Method: # - Manually: # method = ExampleMethod() # - From the command-line: from simple_parsing import ArgumentParser from sequoia.common import Config from sequoia.settings import ClassIncrementalSetting parser = ArgumentParser() ExampleMethod.add_argparse_args(parser) args = parser.parse_args() method = ExampleMethod.from_argparse_args(args) # Create the Setting: # - "Easy": Domain-Incremental MNIST Setting, useful for quick debugging, but # beware that the action space is different than in class-incremental! # (which is the type of Setting used in the SL track!) # from sequoia.settings.sl.class_incremental.domain_incremental import DomainIncrementalSetting # setting = DomainIncrementalSetting( # dataset="mnist", nb_tasks=5, monitor_training_performance=True # ) # - "Medium": Class-Incremental MNIST Setting, useful for quick debugging: # setting = ClassIncrementalSetting( # dataset="mnist", # nb_tasks=5, # monitor_training_performance=True, # known_task_boundaries_at_test_time=False, # batch_size=32, # num_workers=4, # ) # - "HARD": Class-Incremental Synbols, more challenging. # NOTE: This Setting is very similar to the one used for the SL track of the # competition. setting = ClassIncrementalSetting( dataset="synbols", nb_tasks=12, known_task_boundaries_at_test_time=False, monitor_training_performance=True, batch_size=32, num_workers=4, ) # NOTE: can also use pass a `Config` object to `setting.apply`. This object has some # configuration options like device, data_dir, etc. results = setting.apply(method, config=Config(data_dir="data")) print(results.summary()) ================================================ FILE: examples/clcomp21/classifier_test.py ================================================ import pytest from sequoia.client.setting_proxy import SettingProxy from sequoia.conftest import slow from sequoia.settings.sl import ClassIncrementalSetting from .classifier import Classifier, ExampleMethod @pytest.mark.timeout(120) def test_mnist(mnist_setting: SettingProxy[ClassIncrementalSetting]): """Applies this Method to the class-incremental mnist Setting.""" method = ExampleMethod(hparams=Classifier.HParams(max_epochs_per_task=1)) results = mnist_setting.apply(method) assert results.to_log_dict() results: ClassIncrementalSetting.Results assert 0.60 <= results.average_online_performance.objective <= 1.00 assert 0.10 <= results.average_final_performance.objective <= 0.30 @slow @pytest.mark.timeout(300) def test_SL_track(sl_track_setting: SettingProxy[ClassIncrementalSetting]): """Applies this Method to the Setting of the sl track of the competition.""" method = ExampleMethod(hparams=Classifier.HParams(max_epochs_per_task=1)) results = sl_track_setting.apply(method) assert results.to_log_dict() # TODO: Add tests for having a different ordering of test tasks vs train tasks. results: ClassIncrementalSetting.Results online_perf = results.average_online_performance assert 0.15 <= online_perf.objective <= 0.30 final_perf = results.average_final_performance assert 0.01 <= final_perf.objective <= 0.05 ================================================ FILE: examples/clcomp21/conftest.py ================================================ import pytest from sequoia.client.setting_proxy import SettingProxy from sequoia.settings.rl import IncrementalRLSetting, TraditionalRLSetting from sequoia.settings.sl import ClassIncrementalSetting, TaskIncrementalSLSetting @pytest.fixture() def mnist_setting(): return SettingProxy( ClassIncrementalSetting, dataset="mnist", monitor_training_performance=True, ) @pytest.fixture() def task_incremental_mnist_setting(): return SettingProxy( TaskIncrementalSLSetting, dataset="mnist", monitor_training_performance=True, ) @pytest.fixture() def fashion_mnist_setting(): return SettingProxy( ClassIncrementalSetting, dataset="fashionmnist", monitor_training_performance=True, ) @pytest.fixture() def sl_track_setting(): setting = SettingProxy( ClassIncrementalSetting, "sl_track", # dataset="synbols", # nb_tasks=12, # class_order=class_order, # monitor_training_performance=True, ) return setting @pytest.fixture() def cartpole_state_setting(): setting = SettingProxy( TraditionalRLSetting, dataset="cartpole", train_max_steps=5_000, test_max_steps=2_000, nb_tasks=1, ) return setting @pytest.fixture() def incremental_cartpole_state_setting(): setting = SettingProxy( IncrementalRLSetting, dataset="cartpole", train_max_steps=10_000, nb_tasks=2, test_max_steps=2_000, ) return setting @pytest.fixture() def rl_track_setting(tmp_path): # NOTE: Here instead of loading the `rl_track.yaml`, we create instantiate it # directly, because we want to reduce the length of the task for testing, and it # isn't currently possible to both pass a preset yaml file and also pass kwargs to # the SettingProxy. setting = SettingProxy( IncrementalRLSetting, dataset="monsterkong", train_task_schedule={ 0: {"level": 0}, 1: {"level": 1}, 2: {"level": 10}, 3: {"level": 11}, 4: {"level": 20}, 5: {"level": 21}, 6: {"level": 30}, 7: {"level": 31}, }, train_steps_per_task=2_000, # Reduced length for testing test_steps_per_task=2_000, task_labels_at_train_time=True, ) assert setting.steps_per_phase == 2000 assert sorted(setting.train_task_schedule.keys()) == list(range(0, 16_000, 2000)) return setting ================================================ FILE: examples/clcomp21/dummy_method.py ================================================ from typing import Optional import gym import numpy as np import tqdm from torch import Tensor from sequoia.methods import Method from sequoia.settings import Actions, Environment, Observations, Setting from sequoia.settings.sl import SLSetting class DummyMethod(Method, target_setting=Setting): """Dummy method that returns random actions for each observation.""" def __init__(self): self.max_train_episodes: Optional[int] = None def configure(self, setting: Setting): """Called before the method is applied on a setting (before training). You can use this to instantiate your model, for instance, since this is where you get access to the observation & action spaces. """ if isinstance(setting, SLSetting): # Being applied in SL, we will only do one 'epoch" (a.k.a. "episode"). self.max_train_episodes = 1 pass def fit(self, train_env: Environment, valid_env: Environment): """Example train loop. You can do whatever you want with train_env and valid_env here. NOTE: In the Settings where task boundaries are known (in this case all the supervised CL settings), this will be called once per task. """ # configure() will have been called by the setting before we get here. episodes = 0 with tqdm.tqdm(desc="training") as train_pbar: while not train_env.is_closed(): for i, batch in enumerate(train_env): if isinstance(batch, Observations): observations, rewards = batch, None else: observations, rewards = batch batch_size = observations.x.shape[0] y_pred = train_env.action_space.sample() # If we're at the last batch, it might have a different size, so w # give only the required number of values. if isinstance(y_pred, (np.ndarray, Tensor)): if y_pred.shape[0] != batch_size: y_pred = y_pred[:batch_size] if rewards is None: rewards = train_env.send(y_pred) train_pbar.set_postfix( { "Episode": episodes, "Step": i, } ) # train as you usually would. episodes += 1 if self.max_train_episodes and episodes >= self.max_train_episodes: train_env.close() break def get_actions(self, observations: Observations, action_space: gym.Space) -> Actions: """Get a batch of predictions (aka actions) for these observations.""" y_pred = action_space.sample() return self.target_setting.Actions(y_pred) if __name__ == "__main__": from sequoia.common import Config from sequoia.settings import ClassIncrementalSetting # Create the Method: # - Manually: method = DummyMethod() # NOTE: This Setting is very similar to the one used for the SL track of the # competition. from sequoia.client import SettingProxy setting = SettingProxy(ClassIncrementalSetting, "sl_track") # setting = SettingProxy(ClassIncrementalSetting, # dataset="synbols", # nb_tasks=12, # known_task_boundaries_at_test_time=False, # monitor_training_performance=True, # batch_size=32, # num_workers=4, # ) # NOTE: can also use pass a `Config` object to `setting.apply`. This object has some # configuration options like device, data_dir, etc. results = setting.apply(method, config=Config(data_dir="data")) print(results.summary()) ================================================ FILE: examples/clcomp21/dummy_method_test.py ================================================ import pytest from sequoia.client.setting_proxy import SettingProxy from sequoia.conftest import slow from sequoia.settings.rl import IncrementalRLSetting from sequoia.settings.sl import ClassIncrementalSetting from .dummy_method import DummyMethod @pytest.mark.timeout(120) def test_mnist(mnist_setting: SettingProxy[ClassIncrementalSetting]): """Applies this Method to the class-incremental mnist Setting.""" method = DummyMethod() results = mnist_setting.apply(method) assert results.to_log_dict() results: ClassIncrementalSetting.Results assert 0.10 * 0.5 <= results.average_online_performance.objective <= 0.10 * 1.5 assert 0.10 * 0.5 <= results.average_final_performance.objective <= 0.10 * 1.5 @slow @pytest.mark.timeout(300) def test_SL_track(sl_track_setting: SettingProxy[ClassIncrementalSetting]): """Applies this Method to the Setting of the sl track of the competition.""" method = DummyMethod() results = sl_track_setting.apply(method) assert results.to_log_dict() # TODO: Add tests for having a different ordering of test tasks vs train tasks. results: ClassIncrementalSetting.Results online_perf = results.average_online_performance assert 0.02 <= online_perf.objective <= 0.05 final_perf = results.average_final_performance assert 0.02 <= final_perf.objective <= 0.05 @slow @pytest.mark.timeout(300) def test_RL_track(rl_track_setting: SettingProxy[IncrementalRLSetting]): """Applies this Method to the Setting of the sl track of the competition.""" method = DummyMethod() results = rl_track_setting.apply(method) assert results.to_log_dict() # TODO: Add tests for having a different ordering of test tasks vs train tasks. results: ClassIncrementalSetting.Results online_perf = results.average_online_performance # TODO: get an estimate of the upper bound of the random method on the RL track. TODO = 1_000 # this is way too large. assert 0 < online_perf.objective < TODO final_perf = results.average_final_performance assert 0 < final_perf.objective < TODO ================================================ FILE: examples/clcomp21/multihead_classifier.py ================================================ """ Example Method for the SL track: Multi-Head Classifier with simple task inference. You can use this model and method as a jumping off point for your own submission. """ from dataclasses import dataclass, replace from logging import getLogger from typing import ClassVar, Optional, Type import torch from gym import Space, spaces from torch import Tensor, nn from torch.nn import functional as F from torch.optim.optimizer import Optimizer from sequoia.settings.sl.incremental import ClassIncrementalSetting from sequoia.settings.sl.incremental.objects import Observations from .classifier import Classifier, ExampleMethod logger = getLogger(__file__) class MultiHeadClassifier(Classifier): @dataclass class HParams(Classifier.HParams): pass def __init__( self, observation_space: Space, action_space: spaces.Discrete, reward_space: spaces.Discrete, hparams: "MultiHeadClassifier.HParams" = None, ): super().__init__(observation_space, action_space, reward_space, hparams=hparams) # Use one output layer per task, rather than a single layer. self.output_heads = nn.ModuleList() # Use the output layer created in the Classifier constructor for task 0. self.output_heads.append(self.output) # NOTE: The optimizer will be set here, so that we can add the parameters of any # new output heads to it later. self.optimizer: Optional[torch.optim.Optimizer] = None self.current_task_id: int = 0 def configure_optimizers(self) -> Optimizer: self.optimizer = super().configure_optimizers() return self.optimizer def create_output_head(self) -> nn.Module: return nn.Linear(self.representations_size, self.n_classes).to(self.device) def get_or_create_output_head(self, task_id: int) -> nn.Module: """Retrieves or creates a new output head for the given task index. Also stores it in the `output_heads`, and adds its parameters to the optimizer. """ task_output_head: nn.Module if len(self.output_heads) > task_id: task_output_head = self.output_heads[task_id] else: logger.info(f"Creating a new output head for task {task_id}.") task_output_head = self.create_output_head() self.output_heads.append(task_output_head) assert self.optimizer, "need to set `optimizer` on the model." self.optimizer.add_param_group({"params": task_output_head.parameters()}) return task_output_head def forward(self, observations: Observations) -> Tensor: """Smart forward pass with multi-head predictions and task inference. This forward pass can handle three different scenarios, depending on the contents of `observations.task_labels`: 1. Base case: task labels are present, and all examples are from the same task. - Perform the 'usual' forward pass (e.g. `super().forward(observations)`). 2. Task labels are present, and the batch contains a mix of samples from different tasks: - Create slices of the batch for each task, where all items in each 'sub-batch' come from the same task. - Perform a forward pass for each task, by calling `forward` recursively with the sub-batch for each task as an argument (Case 1). 3. Task labels are *not* present. Perform some type of task inference, using the `task_inference_forward_pass` method. Check its docstring for more info. Parameters ---------- observations : Observations Observations from an environment. As of right now, all Settings produce observations with (at least) the two following attributes: - x: Tensor (the images/inputs) - task_labels: Optional[Tensor] (The task labels, when available, else None) Returns ------- Tensor The outputs, which in this case are the classification logits. All three cases above produce the same kind of outputs. """ observations = observations.to(self.device) task_ids: Optional[Tensor] = observations.task_labels if task_ids is None: # Run the forward pass with task inference turned on. return self.task_inference_forward_pass(observations) task_ids_present_in_batch = torch.unique(task_ids) if len(task_ids_present_in_batch) > 1: # Case 2: The batch contains data from more than one task. return self.split_forward_pass(observations) # Base case: "Normal" forward pass, where all items come from the same task. # - Setup the model for this task, however you want, and then do a forward pass, # as you normally would. # NOTE: If you want to reuse this cool multi-headed forward pass in your # own model, these lines here are what you'd want to change. task_id: int = task_ids_present_in_batch.item() # <--------------- Change below ----------------> if task_id == self.current_task_id: output_head = self.output else: output_head = self.get_or_create_output_head(task_id) features = self.encoder(observations.x) logits = output_head(features) return logits def split_forward_pass(self, observations: Observations) -> Tensor: """Perform a forward pass for a batch of observations from different tasks. This is called in `forward` when there is more than one unique task label in the batch. This will call `forward` for each task id present in the batch, passing it a slice of the batch, in which all items are from that task. NOTE: This cannot cause recursion problems, because `forward`(d=2) will be called with a bach of items, all of which come from the same task. This makes it so `split_forward_pass` cannot then be called again. Parameters ---------- observations : Observations Observations, in which the task labels might not all be the same. Returns ------- Tensor The outputs/logits from each task, re-assembled into a single batch, with the task ordering from `observations` preserved. """ assert observations.task_labels is not None # We have task labels. task_labels: Tensor = observations.task_labels unique_task_ids, inv_indices = torch.unique(task_labels, return_inverse=True) # There might be more than one task in the batch. batch_size = observations.batch_size assert batch_size is not None all_indices = torch.arange(batch_size, dtype=torch.int64, device=self.device) # Placeholder for the predicitons for each item in the batch. task_outputs = [None for _ in range(batch_size)] for i, task_id in enumerate(unique_task_ids): # Get the forward pass slice for this task. # Boolean 'mask' tensor, that selects entries from task `task_id`. is_from_this_task = inv_indices == i # Indices of the batch elements that are from task `task_id`. task_indices = all_indices[is_from_this_task] # Take a slice of the observations, in which all items come from this task. task_observations = observations[is_from_this_task] # Perform a "normal" forward pass (Base case). task_output = self.forward(task_observations) # Store the outputs for the items from this task. for i, index in enumerate(task_indices): task_outputs[index] = task_output[i] # Merge the results. assert all(item is not None for item in task_outputs) logits = torch.stack(task_outputs) return logits def task_inference_forward_pass(self, observations: Observations) -> Tensor: """Forward pass with a simple form of task inference.""" # We don't have access to task labels (`task_labels` is None). # --> Perform a simple kind of task inference: # 1. Perform a forward pass with each task's output head; # 2. Merge these predictions into a single prediction somehow. assert observations.task_labels is None # NOTE: This assumes that the observations are batched. # These are used below to indicate the shape of the different tensors. B = observations.x.shape[0] T = n_known_tasks = len(self.output_heads) N = self.n_classes # Tasks encountered previously and for which we have an output head. known_task_ids: list[int] = list(range(n_known_tasks)) assert known_task_ids # Placeholder for the predictions from each output head for each item in the # batch task_outputs = [None for _ in known_task_ids] # [T, B, N] # Get the forward pass for each task. for task_id in known_task_ids: # Create 'fake' Observations for this forward pass, with 'fake' task labels. # NOTE: We do this so we can call `self.forward` and not get an infinite # recursion. task_labels = torch.full([B], task_id, device=self.device, dtype=int) task_observations = replace(observations, task_labels=task_labels) # Setup the model for task `task_id`, and then do a forward pass. task_logits = self.forward(task_observations) task_outputs[task_id] = task_logits # 'Merge' the predictions from each output head using some kind of task # inference. assert all(item is not None for item in task_outputs) # Stack the predictions (logits) from each output head. logits_from_each_head: Tensor = torch.stack(task_outputs, dim=1) assert logits_from_each_head.shape == (B, T, N) # Normalize the logits from each output head with softmax. # Example with batch size of 1, output heads = 2, and classes = 4: # logits from each head: [[[123, 456, 123, 123], [1, 1, 2, 1]]] # 'probs' from each head: [[[0.1, 0.6, 0.1, 0.1], [0.2, 0.2, 0.4, 0.2]]] probs_from_each_head = torch.softmax(logits_from_each_head, dim=-1) assert probs_from_each_head.shape == (B, T, N) # Simple kind of task inference: # For each item in the batch, use the class that has the highest probability # accross all output heads. max_probs_across_heads, chosen_head_per_class = probs_from_each_head.max(dim=1) assert max_probs_across_heads.shape == (B, N) assert chosen_head_per_class.shape == (B, N) # Example (continued): # max probs across heads: [[0.2, 0.6, 0.4, 0.2]] # chosen output heads per class: [[1, 0, 1, 1]] # Determine which output head has highest "confidence": max_prob_value, most_probable_class = max_probs_across_heads.max(dim=1) assert max_prob_value.shape == (B,) assert most_probable_class.shape == (B,) # Example (continued): # max_prob_value: [0.6] # max_prob_class: [1] # A bit of boolean trickery to get what we need, which is, for each item, the # index of the output head that gave the most confident prediction. mask = F.one_hot(most_probable_class, N).to(dtype=bool, device=self.device) chosen_output_head_per_item = chosen_head_per_class[mask] assert mask.shape == (B, N) assert chosen_output_head_per_item.shape == (B,) # Example (continued): # mask: [[False, True, False, True]] # chosen_output_head_per_item: [0] # Create a bool tensor to select items associated with the chosen output head. selected_mask = F.one_hot(chosen_output_head_per_item, T).to(dtype=bool, device=self.device) assert selected_mask.shape == (B, T) # Select the logits using the mask: logits = logits_from_each_head[selected_mask] assert logits.shape == (B, N) return logits def on_task_switch(self, task_id: Optional[int]): """Executed when the task switches (to either a known or unknown task).""" if task_id is not None: # Switch the output head. self.current_task_id = task_id self.output = self.get_or_create_output_head(task_id) class ExampleTaskInferenceMethod(ExampleMethod): ModelType: ClassVar[Type[Classifier]] = MultiHeadClassifier def __init__(self, hparams: MultiHeadClassifier.HParams = None): super().__init__(hparams=hparams or MultiHeadClassifier.HParams()) self.hparams: MultiHeadClassifier.HParams def configure(self, setting: ClassIncrementalSetting): """Called before the method is applied on a setting (before training). You can use this to instantiate your model, for instance, since this is where you get access to the observation & action spaces. """ self.model = MultiHeadClassifier( observation_space=setting.observation_space, action_space=setting.action_space, reward_space=setting.reward_space, hparams=self.hparams, ) self.optimizer = self.model.configure_optimizers() # Share a reference to the Optimizer with the model, so it can add new weights # when needed. self.model.optimizer = self.optimizer def on_task_switch(self, task_id: Optional[int]): self.model.on_task_switch(task_id) def get_actions(self, observations, action_space): return super().get_actions(observations, action_space) if __name__ == "__main__": # Create the Method, either manually: # method = ExampleTaskInferenceMethod() # Or, from the command-line: from simple_parsing import ArgumentParser from sequoia.settings.sl.class_incremental import ( ClassIncrementalSetting, TaskIncrementalSLSetting, ) parser = ArgumentParser(description=__doc__) ExampleTaskInferenceMethod.add_argparse_args(parser) args = parser.parse_args() method = ExampleTaskInferenceMethod.from_argparse_args(args) # Create the Setting: # Simpler Settings (useful for debugging): # setting = TaskIncrementalSLSetting( # setting = ClassIncrementalSetting( # dataset="mnist", # nb_tasks=5, # monitor_training_performance=True, # batch_size=32, # num_workers=4, # ) # Very similar setup to the SL Track of the competition: setting = ClassIncrementalSetting( dataset="synbols", nb_tasks=12, monitor_training_performance=True, known_task_boundaries_at_test_time=False, batch_size=32, num_workers=4, ) results = setting.apply(method) ================================================ FILE: examples/clcomp21/multihead_classifier_test.py ================================================ import pytest from sequoia.client.setting_proxy import SettingProxy from sequoia.conftest import slow from sequoia.settings import ClassIncrementalSetting, TaskIncrementalSLSetting from .multihead_classifier import ExampleTaskInferenceMethod, MultiHeadClassifier @pytest.mark.timeout(120) def test_task_incremental_mnist( task_incremental_mnist_setting: SettingProxy[TaskIncrementalSLSetting], ): """Applies this Method to the class-incremental mnist Setting.""" mnist_setting = task_incremental_mnist_setting method = ExampleTaskInferenceMethod(hparams=MultiHeadClassifier.HParams(max_epochs_per_task=1)) results = mnist_setting.apply(method) assert results.to_log_dict() results: ClassIncrementalSetting.Results # There should be an improvement over the Method in `classifier.py`: assert 0.80 <= results.average_online_performance.objective <= 1.00 assert 0.50 <= results.average_final_performance.objective <= 1.00 @pytest.mark.timeout(120) def test_mnist(mnist_setting: SettingProxy[ClassIncrementalSetting]): """Applies this Method to the class-incremental mnist Setting.""" method = ExampleTaskInferenceMethod(hparams=MultiHeadClassifier.HParams(max_epochs_per_task=1)) results = mnist_setting.apply(method) assert results.to_log_dict() results: ClassIncrementalSetting.Results # There should be an improvement over the Method in `classifier.py`: assert 0.80 <= results.average_online_performance.objective <= 1.00 assert 0.50 <= results.average_final_performance.objective <= 1.00 @slow @pytest.mark.timeout(600) def test_SL_track(sl_track_setting: SettingProxy[ClassIncrementalSetting]): """Applies this Method to the Setting of the sl track of the competition.""" method = ExampleTaskInferenceMethod(hparams=MultiHeadClassifier.HParams(max_epochs_per_task=1)) results = sl_track_setting.apply(method) assert results.to_log_dict() # TODO: Add tests for having a different ordering of test tasks vs train tasks. results: ClassIncrementalSetting.Results assert 0.30 <= results.average_online_performance.objective <= 0.50 assert 0.02 <= results.average_final_performance.objective <= 0.05 ================================================ FILE: examples/clcomp21/regularization_example.py ================================================ """ Example: Defines a new Method based on the ExampleMethod, adding an EWC-like loss to help prevent the weights from changing too much between tasks. """ from copy import deepcopy from dataclasses import dataclass from typing import ClassVar, Dict, Optional, Tuple, Type import gym import torch from torch import Tensor from sequoia.common.hparams import uniform from sequoia.settings import DomainIncrementalSLSetting from sequoia.settings.sl.incremental.objects import Observations, Rewards from sequoia.utils.utils import dict_intersection from sequoia.utils.logging_utils import get_logger from .multihead_classifier import ExampleTaskInferenceMethod, MultiHeadClassifier logger = get_logger(__name__) class RegularizedClassifier(MultiHeadClassifier): """Adds an ewc-like penalty to the base classifier, to prevent its weights from shifting too much during training. """ @dataclass class HParams(MultiHeadClassifier.HParams): """Hyperparameters of this improved method. Adds the hyper-parameters related the 'ewc-like' regularization to those of the ExampleMethod. NOTE: These `uniform()` and `log_uniform` and `HyperParameters` are just there to make it easier to run HPO sweeps for your Method, which isn't required for the competition. """ # Coefficient of the ewc-like loss. reg_coefficient: float = uniform(0.0, 10.0, default=1.0) # Distance norm used in the regularization loss. reg_p_norm: int = 2 def __init__( self, observation_space: gym.Space, action_space: gym.Space, reward_space: gym.Space, hparams: "RegularizedClassifier.HParams" = None, ): super().__init__( observation_space, action_space, reward_space, hparams=hparams, ) self.reg_coefficient = self.hparams.reg_coefficient self.reg_p_norm = self.hparams.reg_p_norm self.previous_model_weights: Dict[str, Tensor] = {} self._previous_task: Optional[int] = None self._n_switches: int = 0 def shared_step(self, batch: Tuple[Observations, Rewards], *args, **kwargs): base_loss, metrics = super().shared_step(batch, *args, **kwargs) ewc_loss = self.reg_coefficient * self.ewc_loss() metrics["ewc_loss"] = ewc_loss return base_loss + ewc_loss, metrics def on_task_switch(self, task_id: Optional[int]) -> None: """Executed when the task switches (to either a known or unknown task).""" super().on_task_switch(task_id) if self._previous_task is None and self._n_switches == 0: logger.debug("Starting the first task, no EWC update.") elif task_id is None or task_id != self._previous_task: # NOTE: We also switch between unknown tasks. logger.info( f"Switching tasks: {self._previous_task} -> {task_id}: " f"Updating the EWC 'anchor' weights." ) self._previous_task = task_id self.previous_model_weights.clear() self.previous_model_weights.update( deepcopy({k: v.detach() for k, v in self.named_parameters()}) ) self._n_switches += 1 def ewc_loss(self) -> Tensor: """Gets an 'ewc-like' regularization loss. NOTE: This is a simplified version of EWC where the loss is the P-norm between the current weights and the weights as they were on the begining of the task. """ if self._previous_task is None: # We're in the first task: do nothing. return 0.0 old_weights: Dict[str, Tensor] = self.previous_model_weights new_weights: Dict[str, Tensor] = dict(self.named_parameters()) loss = 0.0 for weight_name, (new_w, old_w) in dict_intersection(new_weights, old_weights): loss += torch.dist(new_w, old_w.type_as(new_w), p=self.reg_p_norm) return loss class ExampleRegMethod(ExampleTaskInferenceMethod): """Improved version of the ExampleMethod that uses a `RegularizedClassifier`.""" HParams: ClassVar[Type[HParams]] = RegularizedClassifier.HParams def __init__(self, hparams: HParams = None): super().__init__(hparams=hparams or self.HParams.from_args()) def configure(self, setting: DomainIncrementalSLSetting): # Use the improved model, with the added EWC-like term. self.model = RegularizedClassifier( observation_space=setting.observation_space, action_space=setting.action_space, reward_space=setting.reward_space, hparams=self.hparams, ) self.optimizer = self.model.configure_optimizers() def on_task_switch(self, task_id: Optional[int]): self.model.on_task_switch(task_id) if __name__ == "__main__": # Create the Method: # - Manually: # method = ExampleRegMethod() # - From the command-line: from simple_parsing import ArgumentParser from sequoia.common import Config from sequoia.settings import ClassIncrementalSetting parser = ArgumentParser() ExampleRegMethod.add_argparse_args(parser) args = parser.parse_args() method = ExampleRegMethod.from_argparse_args(args) # Create the Setting: # - "Easy": Domain-Incremental MNIST Setting, useful for quick debugging, but # beware that the action space is different than in class-incremental! # (which is the type of Setting used in the SL track!) # from sequoia.settings.sl.class_incremental.domain_incremental import DomainIncrementalSLSetting # setting = DomainIncrementalSLSetting( # dataset="mnist", nb_tasks=5, monitor_training_performance=True # ) # - "Medium": Class-Incremental MNIST Setting, useful for quick debugging: # setting = ClassIncrementalSetting( # dataset="mnist", # nb_tasks=5, # monitor_training_performance=True, # known_task_boundaries_at_test_time=False, # batch_size=32, # num_workes=4, # ) # - "HARD": Class-Incremental Synbols, more challenging. # NOTE: This Setting is very similar to the one used for the SL track of the # competition. setting = ClassIncrementalSetting( dataset="synbols", nb_tasks=12, known_task_boundaries_at_test_time=False, monitor_training_performance=True, batch_size=32, num_workers=4, ) # Run the experiment: results = setting.apply(method, config=Config(debug=True, data_dir="./data")) print(results.summary()) ================================================ FILE: examples/clcomp21/regularization_example_test.py ================================================ import pytest from sequoia.client.setting_proxy import SettingProxy from sequoia.conftest import slow from sequoia.settings import ClassIncrementalSetting from .regularization_example import ExampleRegMethod, RegularizedClassifier @pytest.mark.timeout(120) def test_mnist(mnist_setting: SettingProxy[ClassIncrementalSetting]): """Applies this Method to the class-incremental mnist Setting.""" method = ExampleRegMethod(hparams=RegularizedClassifier.HParams(max_epochs_per_task=1)) results = mnist_setting.apply(method) assert results.to_log_dict() results: ClassIncrementalSetting.Results # There should be an improvement over the Method in `multihead_classifier.py`: assert 0.80 <= results.average_online_performance.objective <= 1.00 assert 0.30 <= results.average_final_performance.objective <= 0.50 @slow @pytest.mark.timeout(600) def test_SL_track(sl_track_setting: SettingProxy[ClassIncrementalSetting]): """Applies this Method to the Setting of the sl track of the competition.""" method = ExampleRegMethod(hparams=RegularizedClassifier.HParams(max_epochs_per_task=1)) results = sl_track_setting.apply(method) assert results.to_log_dict() # TODO: Add tests for having a different ordering of test tasks vs train tasks. results: ClassIncrementalSetting.Results assert 0.30 <= results.average_online_performance.objective <= 0.50 assert 0.02 <= results.average_final_performance.objective <= 0.05 ================================================ FILE: examples/clcomp21/sb3_example.py ================================================ """ Example where we start from a Method from stable-baselines3 to solve the rl track. """ from dataclasses import dataclass from typing import ClassVar, Dict, Mapping, Optional, Type, Union import gym from gym import spaces from simple_parsing import mutable_field from sequoia.methods.stable_baselines3_methods.ppo import PPOMethod, PPOModel from sequoia.settings.rl import ContinualRLSetting # from stable_baselines3.ppo.policies import ActorCriticCnnPolicy, ActorCriticPolicy class CustomPPOModel(PPOModel): @dataclass class HParams(PPOModel.HParams): """Hyper-parameters of the PPO Model.""" @dataclass class CustomPPOMethod(PPOMethod): Model: ClassVar[Type[PPOModel]] = PPOModel # Hyper-parameters of the PPO Model. hparams: PPOModel.HParams = mutable_field(PPOModel.HParams) def configure(self, setting: ContinualRLSetting): super().configure(setting=setting) def create_model(self, train_env: gym.Env, valid_env: gym.Env) -> PPOModel: return self.Model(env=train_env, **self.hparams.to_dict()) def fit(self, train_env: gym.Env, valid_env: gym.Env): super().fit(train_env=train_env, valid_env=valid_env) def get_actions( self, observations: ContinualRLSetting.Observations, action_space: spaces.Space ) -> ContinualRLSetting.Actions: return super().get_actions( observations=observations, action_space=action_space, ) def on_task_switch(self, task_id: Optional[int]) -> None: """Called when switching tasks in a CL setting. If task labels are available, `task_id` will correspond to the index of the new task. Otherwise, if task labels aren't available, `task_id` will be `None`. todo: use this to customize how your method handles task transitions. """ def get_search_space(self, setting: ContinualRLSetting) -> Mapping[str, Union[str, Dict]]: return super().get_search_space(setting) if __name__ == "__main__": # Create the Setting. # CartPole-state for debugging: from sequoia.settings.rl import RLSetting setting = RLSetting(dataset="CartPole-v0") # OR: Incremental CartPole-state: from sequoia.settings.rl import IncrementalRLSetting setting = IncrementalRLSetting( dataset="CartPole-v0", monitor_training_performance=True, nb_tasks=1, train_steps_per_task=1_000, test_max_steps=2000, ) # OR: Setting of the RL Track of the competition: # setting = IncrementalRLSetting.load_benchmark("rl_track") # Create the Method: method = CustomPPOMethod() # Apply the Method onto the Setting to get Results. results = setting.apply(method) print(results.summary()) # BONUS: Running a hyper-parameter sweep: # method.hparam_sweep(setting) ================================================ FILE: examples/clcomp21/sb3_example_test.py ================================================ import pytest from sequoia.client.setting_proxy import SettingProxy from sequoia.conftest import slow from sequoia.settings.rl import IncrementalRLSetting, RLSetting from sequoia.settings.sl import ClassIncrementalSetting from .sb3_example import CustomPPOMethod, CustomPPOModel @pytest.mark.timeout(120) def test_cartpole_state(cartpole_state_setting: SettingProxy[RLSetting]): """Applies this Method to a simple cartpole-state setting.""" method = CustomPPOMethod(hparams=CustomPPOModel.HParams(n_steps=64)) results = cartpole_state_setting.apply(method) assert results.to_log_dict() results: RLSetting.Results # TODO: BUG: The SB3 method uses more than the number of steps allowed, probably # while filling up its buffer. assert 150 < results.average_final_performance.mean_episode_length @pytest.mark.timeout(120) def test_incremental_cartpole_state( incremental_cartpole_state_setting: SettingProxy[IncrementalRLSetting], ): """Applies this Method to the class-incremental mnist Setting.""" method = CustomPPOMethod() results = incremental_cartpole_state_setting.apply(method) assert results.to_log_dict() results: ClassIncrementalSetting.Results # TODO: Increase this bound assert 5 <= results.average_online_performance.objective assert 5 <= results.average_final_performance.objective @pytest.mark.timeout(300) def test_RL_track(rl_track_setting: SettingProxy[IncrementalRLSetting]): """Applies this Method to the Setting of the sl track of the competition.""" method = CustomPPOMethod() results = rl_track_setting.apply(method) assert results.to_log_dict() # TODO: Add tests for having a different ordering of test tasks vs train tasks. results: ClassIncrementalSetting.Results online_perf = results.average_online_performance # TODO: get an estimate of the upper bound of the random method on the RL track. assert 0 < online_perf.objective final_perf = results.average_final_performance assert 0 < final_perf.objective ================================================ FILE: examples/demo_utils.py ================================================ from collections import defaultdict from pathlib import Path from typing import Dict, List, Type import pandas as pd from simple_parsing import ArgumentParser from sequoia.common.config import Config from sequoia.settings import Method, Results, RLSetting, Setting, SLSetting def demo_all_settings( MethodType: Type[Method], datasets: List[str] = ["mnist", "fashionmnist"], **setting_kwargs, ): """Evaluates the given Method on all its applicable settings. NOTE: Only evaluates on the mnist/fashion-mnist datasets for this demo. """ # Iterate over all the applicable evaluation settings, using the default # options for each setting, and store the results inside this dictionary. all_results: Dict[Type[Setting], Dict[str, Results]] = defaultdict(dict) # Loop over all the types of settings this method is applicable on, i.e. # all the nodes in the tree below its target Setting). for setting_type in MethodType.get_applicable_settings(): # Loop over all the available dataset for each setting: for dataset in setting_type.get_available_datasets(): if datasets and dataset not in datasets: print(f"Skipping {setting_type} / {dataset} for now.") continue if issubclass(setting_type, RLSetting): print(f"Skipping {setting_type} (not considering RL settings for this demo).") continue # 1. Create a Method of the provided type, so we start fresh every time. method = MethodType() # 2. Create the setting setting = setting_type(dataset=dataset, **setting_kwargs) # 3. Apply the method on the setting. results: Results = setting.apply(method) print(f"Results on setting {setting_type}, dataset {dataset}:") print(results.summary()) # Save the results in the dict defined above. all_results[setting_type][dataset] = results # Create a pandas dataframe with all the results: result_df: pd.DataFrame = make_result_dataframe(all_results) csv_path = Path(f"examples/results/results_{method.get_name()}.csv") csv_path.parent.mkdir(exist_ok=True, parents=True) result_df.to_csv(csv_path) print(f"Saved dataframe with results to path {csv_path}") # BONUS: Display the results in a LaTeX-formatted table! latex_table_path = Path(f"examples/results/table_{method.get_name()}.tex") caption = f"Results for method {type(method).__name__} settings." result_df.to_latex( buf=latex_table_path, caption=caption, na_rep="N/A", multicolumn=True, ) print(f"Saved LaTeX table with results to path {latex_table_path}") return all_results def make_result_dataframe(all_results): # Create a LaTeX table with all the results for all the settings. import pandas as pd all_settings: List[Type[Setting]] = list(all_results.keys()) all_setting_names: List[str] = [s.get_name() for s in all_settings] all_datasets: List[str] = [] for setting, dataset_to_results in all_results.items(): all_datasets.extend(dataset_to_results.keys()) all_datasets = list(set(all_datasets)) ## Create a multi-index for the dataframe. # tuples = [] # for setting, dataset_to_results in all_results.items(): # setting_name = setting.get_name() # tuples.extend((setting_name, dataset) for dataset in dataset_to_results.keys()) # tuples = sorted(list(set(tuples))) # multi_index = pd.MultiIndex.from_tuples(tuples, names=["setting", "dataset"]) # single_index = pd.Index(["Objective"]) # df = pd.DataFrame(index=multi_index, columns=single_index) df = pd.DataFrame(index=all_setting_names, columns=all_datasets) for setting_type, dataset_to_results in all_results.items(): setting_name = setting_type.get_name() for dataset, result in dataset_to_results.items(): # df["Objective"][setting_name, dataset] = result.objective df[dataset][setting_name] = result.objective return df def compare_results( all_results: Dict[Type[Method], Dict[Type[Setting], Dict[str, Results]]] ) -> None: """Helper function, compares the results of the different methods by arranging them in a table (pandas dataframe). """ # Make one huge dictionary that maps from: # >> from .demo_utils import make_comparison_dataframe comparison_df = make_comparison_dataframe(all_results) print("----- All Results -------") print(comparison_df) csv_path = Path("examples/results/comparison.csv") latex_path = Path("examples/results/table_comparison.tex") comparison_df.to_csv(csv_path) print(f"Saved dataframe with results to path {csv_path}") caption = f"Comparison of different methods on their applicable settings." comparison_df.to_latex(latex_path, caption=caption, multicolumn=False, multirow=False) print(f"Saved LaTeX table with results to path {latex_path}") def make_comparison_dataframe( all_results: Dict[Type[Method], Dict[Type[Setting], Dict[str, Results]]] ) -> pd.DataFrame: """Helper function: takes in the dictionary with all the results and re-arranges it into a pandas dataframe. """ # Get all the method names. all_methods: List[Type[Method]] = list(all_results.keys()) all_method_names: List[str] = [m.get_name() for m in all_methods] # Get all the setting names. all_settings: List[Type[Setting]] = [] for method_class, setting_to_dataset_to_results in all_results.items(): all_settings.extend(setting_to_dataset_to_results.keys()) all_settings = list(set(all_settings)) all_setting_names: List[str] = [s.get_name() for s in all_settings] # Get all the dataset names. all_datasets: List[str] = [] for method_class, setting_to_dataset_to_results in all_results.items(): for setting, dataset_to_results in setting_to_dataset_to_results.items(): all_datasets.extend(dataset_to_results.keys()) all_datasets = list(set(all_datasets)) # Create the a multi-index, so we can later index df[setting, datset][method] # Option 1: All [settings x all datasets] # iterables = [all_setting_names, all_datasets] # columns = pd.MultiIndex.from_product(iterables, names=["setting", "dataset"]) # Option 2: Index will be [Setting, ] # Create the column index using the tuples that apply. tuples = [] for method_class, setting_to_dataset_to_results in all_results.items(): for setting, dataset_to_results in setting_to_dataset_to_results.items(): setting_name = setting.get_name() tuples.extend((setting_name, dataset) for dataset in dataset_to_results.keys()) tuples = sorted(list(set(tuples))) multi_index = pd.MultiIndex.from_tuples(tuples, names=["setting", "dataset"]) single_index = pd.Index(all_method_names, name="Method") df = pd.DataFrame(index=multi_index, columns=single_index) for method_class, setting_to_dataset_to_results in all_results.items(): method_name = method_class.get_name() for setting, dataset_to_results in setting_to_dataset_to_results.items(): setting_name = setting.get_name() for dataset, result in dataset_to_results.items(): df[method_name][setting_name, dataset] = result.objective return df ================================================ FILE: examples/prerequisites/dataclasses_example.py ================================================ """ Example describing dataclasses and how simple-parsing can be used to create command-line arguments from them. """ from dataclasses import dataclass @dataclass class Point: x: float = 1.2 y: float = 4.5 # This generates the following method (among others): # def __init__(self, x: float = 1.2, y: float = 4.5): # self.x = x # self.y = y if __name__ == "__main__": p1 = Point(0, 0) print(p1) expected = "Point(x=0, y=0)" # # Second example: HyperParameters with simple-parsing: # from simple_parsing import ArgumentParser from simple_parsing.helpers import choice @dataclass class HParams: """Hyper-Parameters of my model.""" # Learning rate. learning_rate: float = 3e-4 # L2 regularization coefficient. weight_decay: float = 1e-6 # Choice of optimizer optimizer: str = choice("adam", "sgd", "rmsprop", default="sgd") if __name__ == "__main__": parser = ArgumentParser() parser.add_arguments(HParams, "hparams") parser.print_help() import textwrap expected += textwrap.dedent( """\ usage: dataclasses_example.py [-h] [--learning_rate float] [--weight_decay float] [--optimizer {adam,sgd,rmsprop}] optional arguments: -h, --help show this help message and exit HParams ['hparams']: Hyper-Parameters of my model. --learning_rate float, --hparams.learning_rate float Learning rate. (default: 0.0003) --weight_decay float, --hparams.weight_decay float L2 regularization coefficient. (default: 1e-06) --optimizer {adam,sgd,rmsprop}, --hparams.optimizer {adam,sgd,rmsprop} Choice of optimizer (default: sgd) """ ) args = parser.parse_args("") hparams: HParams = args.hparams print(hparams) expected += """\ HParams(learning_rate=0.0003, weight_decay=1e-06, optimizer='sgd') """ ================================================ FILE: mypy.ini ================================================ # Global options: [mypy] python_version = 3.7 warn_return_any = True warn_unused_configs = True follow_imports = normal ================================================ FILE: pytest.ini ================================================ [pytest] timeout = 30 testpaths = sequoia examples addopts = --doctest-modules norecursedirs = methods/d3rlpy_methods settings/offline_rl examples/advances/procgen_example ================================================ FILE: requirements.txt ================================================ # Fork of gym with more flexible utility functions. gym @ git+https://www.github.com/openai/gym@8819d561132082f6130d4a2388c68a963f41ec4f#egg=gym # nngeometry module used in the EWC method nngeometry @ git+https://github.com/oleksost/nngeometry.git#egg=nngeometry # Temporary fix for issue#128 pyyaml!=5.4.*,>=5.1 simple_parsing==0.1.2.post1 # matplotlib==3.2.2 matplotlib # NOTE: @lebrice: PyTorch suddenly got really picky about type annotations in 1.9.0 for # some reason, and they really don't do a great job at evaluating them, so removing it # for now. torch==1.8.1 torchvision==0.9.1 scikit-learn tqdm continuum==1.0.19 # Only required for the current demo: wandb plotly pandas # Only for python < 3.8 singledispatchmethod;python_version<'3.8' # NOTE: PyTorch-Lightning version 1.4.0 is "working" but raises lots of warnings. pytorch-lightning==1.5.9 lightning-bolts==0.5.0 # Requirements for running tests: pytest-timeout pytest-xdist pytest-xvfb # Prevents the gym popups from displaying during tests. # Required for the RL methods pyvirtualdisplay # Required for the synbols dataset to work. h5py ================================================ FILE: scripts/eai/cancel_all_queuing.sh ================================================ all_ids=$(eai job ls --state queuing -c "$1" --fields id --no-header) for id in $all_ids do eai job kill $id done ================================================ FILE: scripts/eai/cancel_all_running.sh ================================================ all_ids=$(eai job ls --state running -c "$1" --fields id --no-header) for id in $all_ids do eai job kill $id done ================================================ FILE: scripts/eai/job.sh ================================================ #!/bin/bash set -o errexit # Used to exit upon error, avoiding cascading errors set -o errtrace # Show error trace set -o pipefail # Unveils hidden failures # set -o nounset # Exposes unset variables # Get organization name ORG_NAME=$(eai organization get --field name) # Get account name ACCOUNT_NAME=$(eai account get --field name) ACCOUNT_ID=$ORG_NAME.$ACCOUNT_NAME EAI_Registry=${EAI_Registry:-"registry.console.elementai.com/$ACCOUNT_ID"} echo "Using registry $EAI_Registry" CURRENT_BRANCH="`git branch --show-current`" BRANCH=${BRANCH:-$CURRENT_BRANCH} export WANDB_API_KEY=${WANDB_API_KEY?"Need to pass the wandb api key or have it set in the environment variables."} echo "Building eai-specific container for branch $BRANCH" if [ "$NO_BUILD" ]; then echo "skipping build." else echo "building" # TODO: There is something wrong here: How can they possibly build their job, if # they don't have the eai dockerfile? source dockers/eai/build.sh fi # The image we're using is going to be called sequoai_eai:$BRANCH, and will have been # pushed to the user's eai registry. eai job submit \ --restartable \ --data $ACCOUNT_ID.home:/mnt/home \ --data $ACCOUNT_ID.data:/mnt/data \ --data $ACCOUNT_ID.results:/mnt/results \ --env WANDB_API_KEY="$WANDB_API_KEY" \ --env HOME=/home/toolkit \ --image $EAI_Registry/sequoia_eai:$BRANCH \ --gpu 1 --cpu 8 --mem 12 \ -- "$@" # eai job submit \ # --restartable \ # --data $ACCOUNT_ID.home:/mnt/home \ # --data $ACCOUNT_ID.data:/mnt/data \ # --data $ACCOUNT_ID.results:/mnt/results \ # --env WANDB_API_KEY="$WANDB_API_KEY" \ # --env HOME=/home/toolkit \ # --image $EAI_Registry/sequoia_eai:$BRANCH \ # --gpu 1 --cpu 8 --mem 12 --gpu-model-filter 12gb \ # -- "$@" ================================================ FILE: scripts/eai/rl_sweep.sh ================================================ #!/bin/bash set -o errexit # Used to exit upon error, avoiding cascading errors set -o errtrace # Show error trace set -o pipefail # Unveils hidden failures set -o nounset # Exposes unset variables export WANDB_API_KEY=${WANDB_API_KEY?"Need to pass the wandb api key or have it set in the environment variables."} source dockers/eai/build.sh export NO_BUILD=1 # Number of runs per combination. MAX_RUNS=20 PROJECT="crl_study" SETTINGS=( "continual_rl" "discrete_task_agnostic_rl" "incremental_rl" "task_incremental_rl" "multi_task_rl" "traditional_rl" ) METHODS=( "ppo" "a2c" "dqn" "ddpg" "sac" "td3" "baseline" "methods.ewc" ) BENCHMARKS=( "cartpole" "monsterkong_mix" "mountaincar_continuous" ) # "half_cheetah" for METHOD in "${METHODS[@]}"; do for SETTING in "${SETTINGS[@]}"; do for BENCHMARK in "${BENCHMARKS[@]}"; do # Share the trials from different datasets, hopefully reusing something? DATABASE_PATH="/mnt/home/${SETTING}_${METHOD}.pkl" scripts/eai/job.sh sequoia_sweep \ --max_runs $MAX_RUNS --database_path $DATABASE_PATH \ --setting $SETTING --benchmark $BENCHMARK --project $PROJECT \ --method $METHOD \ "$@" done done done # source scripts/eai/job.sh sequoia_sweep --max_runs 20 --database_path /mnt/home/orion_db.pkl --setting class_incremental --dataset cifar10 --project csl_study --method baseline # source scripts/eai/job.sh sequoia_sweep --max_runs 20 --database_path /mnt/home/orion_db.pkl --setting class_incremental --dataset cifar100 --project csl_study --nb_tasks 20 --method baseline # source scripts/eai/job.sh sequoia_sweep --max_runs 20 --database_path /mnt/home/orion_db.pkl --setting class_incremental --dataset synbols --project csl_study --nb_tasks 12 --method baseline ================================================ FILE: scripts/eai/shell_job.sh ================================================ #!/bin/bash set -o errexit # Used to exit upon error, avoiding cascading errors set -o errtrace # Show error trace # set -o pipefail # Unveils hidden failures # set -o nounset # Exposes unset variables # Get organization name ORG_NAME=$(eai organization get --field name) # Get account name ACCOUNT_NAME=$(eai account get --field name) ACCOUNT_ID=$ORG_NAME.$ACCOUNT_NAME EAI_Registry=registry.console.elementai.com/$ACCOUNT_ID CURRENT_BRANCH="`git branch --show-current`" BRANCH=${BRANCH:-$CURRENT_BRANCH} existing_interactive_job_id=`eai job ls --state alive --fields id,interactive | grep true | awk '{print $1}'` if [ $existing_interactive_job_id ]; then echo "Found existing interactive job, with id $existing_interactive_job_id" eai job kill $existing_interactive_job_id echo "Sleeping for 5 seconds, just to give the job a chance to change its status." sleep 5 fi; if [ "$NO_BUILD" ]; then echo "skipping build." else echo "building" # TODO: There is something wrong here: How can they possibly build their job, if # they don't have the eai dockerfile? source dockers/eai/build.sh fi # The image we're using is going to be called sequoai_eai:$BRANCH, and will have been # pushed to the user's eai registry. eai job submit \ --interactive \ --data $ACCOUNT_ID.home:/mnt/home \ --data $ACCOUNT_ID.data:/mnt/data \ --data $ACCOUNT_ID.results:/mnt/results \ --env WANDB_API_KEY="$WANDB_API_KEY" \ --env HOME=/home/toolkit \ --image $EAI_Registry/sequoia_eai:$BRANCH \ --gpu 1 --cpu 8 --mem 12 --gpu-model-filter 12gb ================================================ FILE: scripts/eai/sl_sweep.sh ================================================ #!/bin/bash set -o errexit # Used to exit upon error, avoiding cascading errors set -o errtrace # Show error trace set -o pipefail # Unveils hidden failures set -o nounset # Exposes unset variables export WANDB_API_KEY=${WANDB_API_KEY?"Need to pass the wandb api key or have it set in the environment variables."} source dockers/eai/build.sh export NO_BUILD=1 # Number of runs per combination. MAX_RUNS=20 PROJECT="csl_study" SETTINGS=( "continual_sl" "discrete_task_agnostic_sl" "incremental_sl" "task_incremental_sl" "multi_task_sl" "traditional_sl" ) METHODS=( # "random_baseline" "gdumb" "agem" "ar1" "cwr_star" "gem" "lwf" "replay" "synaptic_intelligence" "avalanche.ewc" "baseline" "methods.ewc" "experience_replay" "hat" "pnn" ) DATASETS=( "synbols --nb_tasks 12" "cifar10" "cifar100 --nb_tasks 10" "mnist" ) for METHOD in "${METHODS[@]}"; do for SETTING in "${SETTINGS[@]}"; do for DATASET in "${DATASETS[@]}"; do # Share the trials from different datasets, hopefully reusing something? DABASE_PATH="/mnt/home/${SETTING}_${METHOD}.pkl" scripts/eai/job.sh sequoia_sweep \ --max_runs $MAX_RUNS --database_path $DABASE_PATH \ --setting $SETTING --dataset $DATASET --project $PROJECT \ --method $METHOD --monitor_training_performance True \ "$@" done done done # source scripts/eai/job.sh sequoia_sweep --max_runs 20 --database_path /mnt/home/orion_db.pkl --setting class_incremental --dataset cifar10 --project csl_study --method baseline # source scripts/eai/job.sh sequoia_sweep --max_runs 20 --database_path /mnt/home/orion_db.pkl --setting class_incremental --dataset cifar100 --project csl_study --nb_tasks 20 --method baseline # source scripts/eai/job.sh sequoia_sweep --max_runs 20 --database_path /mnt/home/orion_db.pkl --setting class_incremental --dataset synbols --project csl_study --nb_tasks 12 --method baseline ================================================ FILE: scripts/slurm/launch_many_sweeps.sh ================================================ #!/bin/bash set -o errexit # Used to exit upon error, avoiding cascading errors set -o errtrace # Show error trace set -o pipefail # Unveils hidden failures set -o nounset # Exposes unset variables export WANDB_API_KEY=${WANDB_API_KEY?"Need to pass the wandb api key or have it set in the environment variables."} module load anaconda/3 conda activate sequoia cd ~/Sequoia pip install -e .[hpo,monsterkong] # Number of runs per combination. MAX_RUNS=20 PROJECT="csl_study" SETTINGS=("class_incremental" "task_incremental" "multi_task" "iid") METHODS=( "gdumb" "random_baseline" "pnn" "agem" "ar1" "cwr_star" "gem" "gdumb" "lwf" "replay" "synaptic_intelligence" "avalanche.ewc" "methods.ewc" "experience_replay" "hat" "baseline" ) DATASETS=( "synbols --nb_tasks 12" "cifar10" "cifar100 --nb_tasks 10" "mnist" ) for METHOD in "${METHODS[@]}"; do for SETTING in "${SETTINGS[@]}"; do for DATASET in "${DATASETS[@]}"; do # Share the trials from different datasets, hopefully reusing something? DABASE_PATH="/mnt/home/${SETTING}_${METHOD}.pkl" scripts/slurm/sweep.sh \ --max_runs $MAX_RUNS --database_path $DABASE_PATH \ --setting $SETTING --dataset $DATASET --project $PROJECT \ --WANDB_API_KEY $WANDB_API_KEY \ --method $METHOD \ "$@" done done done ================================================ FILE: scripts/slurm/run.sh ================================================ #!/bin/bash #SBATCH --array=0-3%2 #SBATCH --cpus-per-task=2 #SBATCH --gres=gpu:1 #SBATCH --mem=10GB #SBATCH --time=11:59:00 module load anaconda/3 conda activate sequoia cd ~/Sequoia pip install -e .[hpo,monsterkong,avalanche] sequoia --data_dir $SLURM_TMPDIR "$@" ================================================ FILE: scripts/slurm/sweep.sh ================================================ #!/bin/bash #SBATCH --array=0-10%2 #SBATCH --cpus-per-task=2 #SBATCH --gres=gpu:1 #SBATCH --mem=10GB #SBATCH --time=11:59:00 set -o errexit # Used to exit upon error, avoiding cascading errors set -o errtrace # Show error trace set -o pipefail # Unveils hidden failures module load anaconda/3 conda activate sequoia cd ~/Sequoia # TODO: Set data_dir in Config to `DATA_DIR` as a priority, and then as SLURM_TMPDIR/DATA (not just SLURM_TMPDIR!) cp -r data $SLURM_TMPDIR/ export DATA_DIR=$SLURM_TMPDIR/data #pip install -e .[hpo,monsterkong,avalanche] # TODO: Change the setting, the number of tasks, the method, etc. /home/mila/n/normandf/.conda/envs/sequoia/bin/sequoia_sweep --data_dir $SLURM_TMPDIR/data "$@" ================================================ FILE: sequoia/README.md ================================================ # sequoia ## Packages: - [settings](settings): definitions for the settings (machine learning problems). - [methods](methods): Contains the methods (which can be applied to settings). - [common](common): utilities such as metrics, transforms, layers, gym wrappers configuration classes, etc. that are used by Settings and Methods. - [utils](utils): miscelaneous utility functions (logging, command-line parsing, etc) - [experiments](experiments): Command-line interface entry-points, via the `Experiment` class. - [client (wip)](client): defines a proxy to a Setting and its environments, in order to further isolate the Method and Setting from each other (used for the CLVision competition). ================================================ FILE: sequoia/__init__.py ================================================ """ Sequoia - The Research Tree """ from ._version import get_versions from .settings import Environment, Method, Setting # from .experiments import Experiment __version__ = get_versions()["version"] del get_versions ================================================ FILE: sequoia/_version.py ================================================ # This file helps to compute a version number in source trees obtained from # git-archive tarball (such as those provided by githubs download-from-tag # feature). Distribution tarballs (built by setup.py sdist) and build # directories (produced by setup.py build) will contain a much shorter file # that just contains the computed version number. # This file is released into the public domain. Generated by # versioneer-0.19 (https://github.com/python-versioneer/python-versioneer) """Git implementation of _version.py.""" import errno import os import re import subprocess import sys def get_keywords(): """Get the keywords needed to look up the version information.""" # these strings will be replaced by git during git-archive. # setup.py/versioneer.py will grep for the variable names, so they must # each be defined on a line of their own. _version.py will just call # get_keywords(). git_refnames = "$Format:%d$" git_full = "$Format:%H$" git_date = "$Format:%ci$" keywords = {"refnames": git_refnames, "full": git_full, "date": git_date} return keywords class VersioneerConfig: """Container for Versioneer configuration parameters.""" def get_config(): """Create, populate and return the VersioneerConfig() object.""" # these strings are filled in when 'setup.py versioneer' creates # _version.py cfg = VersioneerConfig() cfg.VCS = "git" cfg.style = "pep440-post" cfg.tag_prefix = "v" cfg.parentdir_prefix = "sequoia-" cfg.versionfile_source = "sequoia/_version.py" cfg.verbose = False return cfg class NotThisMethod(Exception): """Exception raised if a method is not valid for the current scenario.""" LONG_VERSION_PY = {} HANDLERS = {} def register_vcs_handler(vcs, method): # decorator """Create decorator to mark a method as the handler of a VCS.""" def decorate(f): """Store f in HANDLERS[vcs][method].""" if vcs not in HANDLERS: HANDLERS[vcs] = {} HANDLERS[vcs][method] = f return f return decorate def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, env=None): """Call the given command(s).""" assert isinstance(commands, list) p = None for c in commands: try: dispcmd = str([c] + args) # remember shell=False, so use git.cmd on windows, not just git p = subprocess.Popen( [c] + args, cwd=cwd, env=env, stdout=subprocess.PIPE, stderr=(subprocess.PIPE if hide_stderr else None), ) break except EnvironmentError: e = sys.exc_info()[1] if e.errno == errno.ENOENT: continue if verbose: print("unable to run %s" % dispcmd) print(e) return None, None else: if verbose: print("unable to find command, tried %s" % (commands,)) return None, None stdout = p.communicate()[0].strip().decode() if p.returncode != 0: if verbose: print("unable to run %s (error)" % dispcmd) print("stdout was %s" % stdout) return None, p.returncode return stdout, p.returncode def versions_from_parentdir(parentdir_prefix, root, verbose): """Try to determine the version from the parent directory name. Source tarballs conventionally unpack into a directory that includes both the project name and a version string. We will also support searching up two directory levels for an appropriately named parent directory """ rootdirs = [] for i in range(3): dirname = os.path.basename(root) if dirname.startswith(parentdir_prefix): return { "version": dirname[len(parentdir_prefix) :], "full-revisionid": None, "dirty": False, "error": None, "date": None, } else: rootdirs.append(root) root = os.path.dirname(root) # up a level if verbose: print( "Tried directories %s but none started with prefix %s" % (str(rootdirs), parentdir_prefix) ) raise NotThisMethod("rootdir doesn't start with parentdir_prefix") @register_vcs_handler("git", "get_keywords") def git_get_keywords(versionfile_abs): """Extract version information from the given file.""" # the code embedded in _version.py can just fetch the value of these # keywords. When used from setup.py, we don't want to import _version.py, # so we do it with a regexp instead. This function is not used from # _version.py. keywords = {} try: f = open(versionfile_abs, "r") for line in f.readlines(): if line.strip().startswith("git_refnames ="): mo = re.search(r'=\s*"(.*)"', line) if mo: keywords["refnames"] = mo.group(1) if line.strip().startswith("git_full ="): mo = re.search(r'=\s*"(.*)"', line) if mo: keywords["full"] = mo.group(1) if line.strip().startswith("git_date ="): mo = re.search(r'=\s*"(.*)"', line) if mo: keywords["date"] = mo.group(1) f.close() except EnvironmentError: pass return keywords @register_vcs_handler("git", "keywords") def git_versions_from_keywords(keywords, tag_prefix, verbose): """Get version information from git keywords.""" if not keywords: raise NotThisMethod("no keywords at all, weird") date = keywords.get("date") if date is not None: # Use only the last line. Previous lines may contain GPG signature # information. date = date.splitlines()[-1] # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant # datestamp. However we prefer "%ci" (which expands to an "ISO-8601 # -like" string, which we must then edit to make compliant), because # it's been around since git-1.5.3, and it's too difficult to # discover which version we're using, or to work around using an # older one. date = date.strip().replace(" ", "T", 1).replace(" ", "", 1) refnames = keywords["refnames"].strip() if refnames.startswith("$Format"): if verbose: print("keywords are unexpanded, not using") raise NotThisMethod("unexpanded keywords, not a git-archive tarball") refs = set([r.strip() for r in refnames.strip("()").split(",")]) # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of # just "foo-1.0". If we see a "tag: " prefix, prefer those. TAG = "tag: " tags = set([r[len(TAG) :] for r in refs if r.startswith(TAG)]) if not tags: # Either we're using git < 1.8.3, or there really are no tags. We use # a heuristic: assume all version tags have a digit. The old git %d # expansion behaves like git log --decorate=short and strips out the # refs/heads/ and refs/tags/ prefixes that would let us distinguish # between branches and tags. By ignoring refnames without digits, we # filter out many common branch names like "release" and # "stabilization", as well as "HEAD" and "master". tags = set([r for r in refs if re.search(r"\d", r)]) if verbose: print("discarding '%s', no digits" % ",".join(refs - tags)) if verbose: print("likely tags: %s" % ",".join(sorted(tags))) for ref in sorted(tags): # sorting will prefer e.g. "2.0" over "2.0rc1" if ref.startswith(tag_prefix): r = ref[len(tag_prefix) :] if verbose: print("picking %s" % r) return { "version": r, "full-revisionid": keywords["full"].strip(), "dirty": False, "error": None, "date": date, } # no suitable tags, so version is "0+unknown", but full hex is still there if verbose: print("no suitable tags, using unknown + full revision id") return { "version": "0+unknown", "full-revisionid": keywords["full"].strip(), "dirty": False, "error": "no suitable tags", "date": None, } @register_vcs_handler("git", "pieces_from_vcs") def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): """Get version from 'git describe' in the root of the source tree. This only gets called if the git-archive 'subst' keywords were *not* expanded, and _version.py hasn't already been rewritten with a short version string, meaning we're inside a checked out source tree. """ GITS = ["git"] if sys.platform == "win32": GITS = ["git.cmd", "git.exe"] out, rc = run_command(GITS, ["rev-parse", "--git-dir"], cwd=root, hide_stderr=True) if rc != 0: if verbose: print("Directory %s not under git control" % root) raise NotThisMethod("'git rev-parse --git-dir' returned error") # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty] # if there isn't one, this yields HEX[-dirty] (no NUM) describe_out, rc = run_command( GITS, ["describe", "--tags", "--dirty", "--always", "--long", "--match", "%s*" % tag_prefix], cwd=root, ) # --long was added in git-1.5.5 if describe_out is None: raise NotThisMethod("'git describe' failed") describe_out = describe_out.strip() full_out, rc = run_command(GITS, ["rev-parse", "HEAD"], cwd=root) if full_out is None: raise NotThisMethod("'git rev-parse' failed") full_out = full_out.strip() pieces = {} pieces["long"] = full_out pieces["short"] = full_out[:7] # maybe improved later pieces["error"] = None # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty] # TAG might have hyphens. git_describe = describe_out # look for -dirty suffix dirty = git_describe.endswith("-dirty") pieces["dirty"] = dirty if dirty: git_describe = git_describe[: git_describe.rindex("-dirty")] # now we have TAG-NUM-gHEX or HEX if "-" in git_describe: # TAG-NUM-gHEX mo = re.search(r"^(.+)-(\d+)-g([0-9a-f]+)$", git_describe) if not mo: # unparseable. Maybe git-describe is misbehaving? pieces["error"] = "unable to parse git-describe output: '%s'" % describe_out return pieces # tag full_tag = mo.group(1) if not full_tag.startswith(tag_prefix): if verbose: fmt = "tag '%s' doesn't start with prefix '%s'" print(fmt % (full_tag, tag_prefix)) pieces["error"] = "tag '%s' doesn't start with prefix '%s'" % (full_tag, tag_prefix) return pieces pieces["closest-tag"] = full_tag[len(tag_prefix) :] # distance: number of commits since tag pieces["distance"] = int(mo.group(2)) # commit: short hex revision ID pieces["short"] = mo.group(3) else: # HEX: no tags pieces["closest-tag"] = None count_out, rc = run_command(GITS, ["rev-list", "HEAD", "--count"], cwd=root) pieces["distance"] = int(count_out) # total number of commits # commit date: see ISO-8601 comment in git_versions_from_keywords() date = run_command(GITS, ["show", "-s", "--format=%ci", "HEAD"], cwd=root)[0].strip() # Use only the last line. Previous lines may contain GPG signature # information. date = date.splitlines()[-1] pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1) return pieces def plus_or_dot(pieces): """Return a + if we don't already have one, else return a .""" if "+" in pieces.get("closest-tag", ""): return "." return "+" def render_pep440(pieces): """Build up version string, with post-release "local version identifier". Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty Exceptions: 1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty] """ if pieces["closest-tag"]: rendered = pieces["closest-tag"] if pieces["distance"] or pieces["dirty"]: rendered += plus_or_dot(pieces) rendered += "%d.g%s" % (pieces["distance"], pieces["short"]) if pieces["dirty"]: rendered += ".dirty" else: # exception #1 rendered = "0+untagged.%d.g%s" % (pieces["distance"], pieces["short"]) if pieces["dirty"]: rendered += ".dirty" return rendered def render_pep440_pre(pieces): """TAG[.post0.devDISTANCE] -- No -dirty. Exceptions: 1: no tags. 0.post0.devDISTANCE """ if pieces["closest-tag"]: rendered = pieces["closest-tag"] if pieces["distance"]: rendered += ".post0.dev%d" % pieces["distance"] else: # exception #1 rendered = "0.post0.dev%d" % pieces["distance"] return rendered def render_pep440_post(pieces): """TAG[.postDISTANCE[.dev0]+gHEX] . The ".dev0" means dirty. Note that .dev0 sorts backwards (a dirty tree will appear "older" than the corresponding clean one), but you shouldn't be releasing software with -dirty anyways. Exceptions: 1: no tags. 0.postDISTANCE[.dev0] """ if pieces["closest-tag"]: rendered = pieces["closest-tag"] if pieces["distance"] or pieces["dirty"]: rendered += ".post%d" % pieces["distance"] if pieces["dirty"]: rendered += ".dev0" rendered += plus_or_dot(pieces) rendered += "g%s" % pieces["short"] else: # exception #1 rendered = "0.post%d" % pieces["distance"] if pieces["dirty"]: rendered += ".dev0" rendered += "+g%s" % pieces["short"] return rendered def render_pep440_old(pieces): """TAG[.postDISTANCE[.dev0]] . The ".dev0" means dirty. Exceptions: 1: no tags. 0.postDISTANCE[.dev0] """ if pieces["closest-tag"]: rendered = pieces["closest-tag"] if pieces["distance"] or pieces["dirty"]: rendered += ".post%d" % pieces["distance"] if pieces["dirty"]: rendered += ".dev0" else: # exception #1 rendered = "0.post%d" % pieces["distance"] if pieces["dirty"]: rendered += ".dev0" return rendered def render_git_describe(pieces): """TAG[-DISTANCE-gHEX][-dirty]. Like 'git describe --tags --dirty --always'. Exceptions: 1: no tags. HEX[-dirty] (note: no 'g' prefix) """ if pieces["closest-tag"]: rendered = pieces["closest-tag"] if pieces["distance"]: rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) else: # exception #1 rendered = pieces["short"] if pieces["dirty"]: rendered += "-dirty" return rendered def render_git_describe_long(pieces): """TAG-DISTANCE-gHEX[-dirty]. Like 'git describe --tags --dirty --always -long'. The distance/hash is unconditional. Exceptions: 1: no tags. HEX[-dirty] (note: no 'g' prefix) """ if pieces["closest-tag"]: rendered = pieces["closest-tag"] rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) else: # exception #1 rendered = pieces["short"] if pieces["dirty"]: rendered += "-dirty" return rendered def render(pieces, style): """Render the given version pieces into the requested style.""" if pieces["error"]: return { "version": "unknown", "full-revisionid": pieces.get("long"), "dirty": None, "error": pieces["error"], "date": None, } if not style or style == "default": style = "pep440" # the default if style == "pep440": rendered = render_pep440(pieces) elif style == "pep440-pre": rendered = render_pep440_pre(pieces) elif style == "pep440-post": rendered = render_pep440_post(pieces) elif style == "pep440-old": rendered = render_pep440_old(pieces) elif style == "git-describe": rendered = render_git_describe(pieces) elif style == "git-describe-long": rendered = render_git_describe_long(pieces) else: raise ValueError("unknown style '%s'" % style) return { "version": rendered, "full-revisionid": pieces["long"], "dirty": pieces["dirty"], "error": None, "date": pieces.get("date"), } def get_versions(): """Get version information or return default if unable to do so.""" # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have # __file__, we can work backwards from there to the root. Some # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which # case we can only use expanded keywords. cfg = get_config() verbose = cfg.verbose try: return git_versions_from_keywords(get_keywords(), cfg.tag_prefix, verbose) except NotThisMethod: pass try: root = os.path.realpath(__file__) # versionfile_source is the relative path from the top of the source # tree (where the .git directory might live) to this file. Invert # this to find the root from __file__. for i in cfg.versionfile_source.split("/"): root = os.path.dirname(root) except NameError: return { "version": "0+unknown", "full-revisionid": None, "dirty": None, "error": "unable to find root of source tree", "date": None, } try: pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose) return render(pieces, cfg.style) except NotThisMethod: pass try: if cfg.parentdir_prefix: return versions_from_parentdir(cfg.parentdir_prefix, root, verbose) except NotThisMethod: pass return { "version": "0+unknown", "full-revisionid": None, "dirty": None, "error": "unable to compute version", "date": None, } ================================================ FILE: sequoia/client/README.md ================================================ # (WIP) Sequoia Client This is only currently used for the competition. The idea is that the setting (and its environments) are isolated from the user (the 'client'), in order to prevent any modifications / hacking of the environment. ================================================ FILE: sequoia/client/__init__.py ================================================ from .env_proxy import EnvironmentProxy from .setting_proxy import SettingProxy ================================================ FILE: sequoia/client/__main__.py ================================================ """ TODO: launch the 'sequoia gRPC server' at a given address / port. """ import argparse from .server import server if __name__ == "__main__": parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument("--ip", type=str, help="gRPC host ip", default="") parser.add_argument("-p", "--port", type=int, help="gRPC port", default=13337) args = parser.parse_args() server( grpc_host=args.ip, grpc_port=args.port, ) ================================================ FILE: sequoia/client/env.proto ================================================ syntax = "proto3"; // Adapted from https://github.com/AppliedDeepLearning/gymx/blob/master/gymx/env.proto enum SettingType { CLASS_INCREMENTAL = 0; TASK_INCREMENTAL = 1; CONTINUAL_RL = 2; INCREMENTAL_RL = 3; } service Environment { rpc Make (Name) returns (Info) {}; rpc Reset (Empty) returns (Observation) {}; rpc Step (Action) returns (Transition) {}; } message Name { string value = 1; } message Info { repeated int32 observation_shape = 1; int32 num_actions = 2; int32 max_episode_steps = 3; } message Action { int32 value = 1; } message Observation { repeated float data = 1; repeated int32 shape = 2; } message Transition { Observation observation = 1; float reward = 2; Observation next_episode = 3; } message Empty {} ================================================ FILE: sequoia/client/env_proxy.py ================================================ """TODO: Create an 'environment proxy' that relays observations / actions etc from a remote environment via gRPC. For now this simply holds the 'remote' environment in memory. """ from typing import Any, Dict, List, Optional, Sequence, Tuple, Type, Union import numpy as np from torch import Tensor from sequoia.common.metrics import Metrics from sequoia.settings import ( Actions, ActionType, Environment, Observations, ObservationType, Results, Rewards, RewardType, Setting, ) MISSING = object() class EnvironmentProxy(Environment[ObservationType, ActionType, RewardType]): def __init__(self, env_fn, setting_type: Type[Setting]): # TODO: Actually interact with a given environment of the remote Setting # TODO: env_fn is just a callable that returns the actual env now, but the idea # is that it would perhaps be a handle/address/whatever which we could contact? self.__environment = env_fn() # TODO: Remove this if possible self._environment_type = type(self.__environment) self._setting_type = setting_type self.observation_space = self.get_attribute("observation_space") self.action_space = self.get_attribute("action_space") # NOTE: We don't define the `reward_space` attribute if the underlying env # doesnt have it. missing = object() reward_space = self.get_attribute("reward_space", default=missing) if reward_space is not missing: self.reward_space = reward_space # TODO: Double check this also works for RL batch_size = self.get_attribute("batch_size", default=missing) if batch_size is not missing: self.batch_size: Optional[int] = batch_size def get_attribute(self, name: str, default: Any = MISSING) -> Any: if default is MISSING: # TODO: actually get the value from the 'remote' env. return getattr(self.__environment, name) else: return getattr(self.__environment, name, default) def reset(self) -> ObservationType: obs = self.__environment.reset() return obs def __len__(self) -> int: return self.__environment.__len__() def step( self, actions: ActionType ) -> Tuple[ ObservationType, RewardType, Union[bool, Sequence[bool]], Union[Dict, Sequence[Dict]], ]: # Simulate converting things to a pickleable object? if isinstance(actions, Actions): actions = actions.numpy() actions_pkl = actions # TODO: Use some kind of gRPC endpoint. observations_pkl, rewards_pkl, done_pkl, info_pkl = self.__environment.step(actions_pkl) if isinstance(observations_pkl, (Observations, dict)): observations = self._setting_type.Observations(**observations_pkl) else: observations = observations_pkl if isinstance(rewards_pkl, (Rewards, dict)): rewards = self._setting_type.Rewards(**rewards_pkl) else: rewards = rewards_pkl done = np.array(done_pkl) info = np.array(info_pkl) return observations, rewards, done, info def __iter__(self): return self.__environment.__iter__() def __next__(self) -> ObservationType: return self.__environment.__next__() def send(self, actions: ActionType): if isinstance(actions, Actions): actions = actions.y_pred if isinstance(actions, Tensor): actions = actions.cpu().numpy() actions_pkl = actions rewards_pkl = self.__environment.send(actions_pkl) if isinstance(rewards_pkl, (Rewards, dict)): rewards = self._setting_type.Rewards(**rewards_pkl) else: rewards = rewards_pkl return rewards def close(self): self.__environment.close() @property def is_closed(self) -> bool: return self.get_attribute("is_closed") def render(self, *args, **kwargs): return self.__environment.render(*args, **kwargs) def get_results(self) -> Results: return self.__environment.get_results() def get_online_performance(self) -> List[Metrics]: return self.__environment.get_online_performance() def get_average_online_performance(self) -> Metrics: return self.__environment.get_average_online_performance() def __getattr__(self, name: str): if name.startswith("_"): raise AttributeError(f"attempted to get missing private attribute '{name}'") return self.get_attribute(name) ================================================ FILE: sequoia/client/env_proxy_test.py ================================================ import platform from functools import partial from typing import ClassVar, Iterable, Tuple, Type, TypeVar import gym import numpy as np import psutil import pytest from torch import Tensor from torchvision.datasets import MNIST from sequoia.common.gym_wrappers.env_dataset import EnvDataset from sequoia.common.gym_wrappers.env_dataset_test import TestEnvDataset as _TestEnvDataset from sequoia.common.gym_wrappers.utils import is_proxy_to from sequoia.common.spaces import Image from sequoia.common.transforms import Compose, Transforms from sequoia.settings.assumptions import IncrementalAssumption from sequoia.settings.rl.continual.environment import GymDataLoader from sequoia.settings.rl.continual.environment_test import TestGymDataLoader as _TestGymDataLoader from sequoia.settings.sl.environment import PassiveEnvironment from sequoia.settings.sl.environment_test import TestPassiveEnvironment as _TestPassiveEnvironment from .env_proxy import EnvironmentProxy # Note: import with underscores so we don't re-run those tests again. EnvType = TypeVar("EnvType", bound=gym.Env, covariant=True) def wrap_type_with_proxy(env_type: Type[EnvType]) -> EnvType: class _EnvProxy(EnvironmentProxy): def __init__(self, *args, **kwargs): env_fn = partial(env_type, *args, **kwargs) super().__init__(env_fn, setting_type=IncrementalAssumption) return _EnvProxy ProxyEnvDataset = wrap_type_with_proxy(EnvDataset) ProxyPassiveEnvironment = wrap_type_with_proxy(PassiveEnvironment) ProxyGymDataLoader = wrap_type_with_proxy(GymDataLoader) class TestEnvironmentProxy(_TestEnvDataset, _TestPassiveEnvironment, _TestGymDataLoader): # IDEA: Reuse the tests for the EnvDataset, but using a proxy to the environment # instead. EnvDataset: ClassVar[Type[EnvDataset]] = ProxyEnvDataset # IDEA: Reuse the tests for the PassiveEnvironment, but using a proxy to the env. PassiveEnvironment: ClassVar[Type[PassiveEnvironment]] = ProxyPassiveEnvironment # Reuse the tests for the Gym DataLoader, using a proxy to the loader instead. GymDataLoader: ClassVar[Type[GymDataLoader]] = ProxyGymDataLoader def test_sanity_check(): env = ProxyEnvDataset(gym.make("CartPole-v0")) assert isinstance(env, EnvironmentProxy) assert issubclass(type(env), EnvironmentProxy) @pytest.mark.parametrize("use_wrapper", [False, True]) def test_is_proxy_to(use_wrapper: bool): import numpy as np from sequoia.common.transforms import Compose, Transforms transforms = Compose([Transforms.to_tensor, Transforms.three_channels]) from torchvision.datasets import MNIST from sequoia.common.spaces import Image batch_size = 32 dataset = MNIST("data", transform=transforms) obs_space = Image(0, 255, (1, 28, 28), np.uint8) obs_space = transforms(obs_space) env_type = ProxyPassiveEnvironment if use_wrapper else PassiveEnvironment env: Iterable[Tuple[Tensor, Tensor]] = env_type( dataset, batch_size=batch_size, n_classes=10, observation_space=obs_space, ) if use_wrapper: assert isinstance(env, EnvironmentProxy) assert issubclass(type(env), EnvironmentProxy) assert is_proxy_to(env, PassiveEnvironment) else: assert not is_proxy_to(env, PassiveEnvironment) # TODO: Write a test that first reproduces issue #204 and then check that removing # `self.__environment.reset()` from __iter__ fixed it. @pytest.mark.skipif( platform.system() != "Linux", reason="Not sure this would work the same on non-Linux systems.", ) def test_issue_204(): """Test that reproduces the issue #204, which was that some zombie processes appeared to be created when iterating using an EnvironmentProxy. The issue appears to have been caused by calling `self.__environment.reset()` in `__iter__`, which I think caused another dataloader iterator to be created? """ transforms = Compose([Transforms.to_tensor, Transforms.three_channels]) batch_size = 2048 num_workers = 12 dataset = MNIST("data", transform=transforms) obs_space = Image(0, 255, (1, 28, 28), np.uint8) obs_space = transforms(obs_space) current_process = psutil.Process() print( f"Current process is using {current_process.num_threads()} threads, with " f" {len(current_process.children(recursive=True))} child processes." ) starting_threads = current_process.num_threads() starting_processes = len(current_process.children(recursive=True)) for use_wrapper in [False, True]: threads = current_process.num_threads() processes = len(current_process.children(recursive=True)) assert threads == starting_threads assert processes == starting_processes env_type = ProxyPassiveEnvironment if use_wrapper else PassiveEnvironment env: Iterable[Tuple[Tensor, Tensor]] = env_type( dataset, batch_size=batch_size, n_classes=10, observation_space=obs_space, num_workers=num_workers, persistent_workers=True, ) for i, _ in enumerate(env): threads = current_process.num_threads() processes = len(current_process.children(recursive=True)) assert threads == starting_threads + num_workers assert processes == starting_processes + num_workers print( f"Current process is using {threads} threads, with " f" {processes} child processes." ) for i, _ in enumerate(env): threads = current_process.num_threads() processes = len(current_process.children(recursive=True)) assert threads == starting_threads + num_workers assert processes == starting_processes + num_workers print( f"Current process is using {threads} threads, with " f" {processes} child processes." ) obs = env.reset() done = False while not done: obs, reward, done, info = env.step(env.action_space.sample()) # env.render(mode="human") threads = current_process.num_threads() processes = len(current_process.children(recursive=True)) if not done: assert threads == starting_threads + num_workers assert processes == starting_processes + num_workers print( f"Current process is using {threads} threads, with " f" {processes} child processes." ) env.close() import time # Need to give it a second (or so) to cleanup. time.sleep(1) threads = current_process.num_threads() processes = len(current_process.children(recursive=True)) assert threads == starting_threads assert processes == starting_processes def test_interaction_with_test_environment(): # IDEA: Maybe write tests for the 'test' environments, and see that they work even # through the proxy? pass ================================================ FILE: sequoia/client/server.py ================================================ def server(grpc_host: str, grpc_port: int): raise NotImplementedError(f"TODO") ================================================ FILE: sequoia/client/setting_proxy.py ================================================ import time import warnings from functools import partial from logging import getLogger from pathlib import Path from typing import Any, Callable, Dict, Generic, List, Optional, Type, TypeVar import gym import numpy as np from sequoia.common.config import Config from sequoia.methods import Method from sequoia.settings import ClassIncrementalSetting, IncrementalRLSetting, Results, Setting from sequoia.settings.assumptions.incremental import IncrementalAssumption from sequoia.settings.base import SettingABC from .env_proxy import EnvironmentProxy logger = getLogger(__file__) # IDEA: Dict that indicates for each setting, which attributes are *NOT* writeable. _readonly_attributes: Dict[Type[Setting], List[str]] = { ClassIncrementalSetting: ["test_transforms"], IncrementalRLSetting: ["test_transforms"], } # IDEA: Dict that indicates for each setting, which attributes are *NOT* readable. _hidden_attributes: Dict[Type[Setting], List[str]] = { ClassIncrementalSetting: ["test_class_order"], IncrementalRLSetting: ["test_task_schedule", "test_wrappers"], } SettingType = TypeVar("SettingType", bound=Setting) class SettingProxy(SettingABC, Generic[SettingType]): """Proxy for a Setting. TODO: Creating the Setting locally for now, but we'd spin-up or contact a gRPC service" that would have at least the following endpoints: - get_attribute(name: str) -> Any: returns the attribute from the setting, if that attribute can be read. - set_attribute(name: str, value: Any) -> bool: Sets the given attribute to the given value, if that is allowed. - train_dataloader() - val_dataloader() - test_dataloader() """ # NOTE: Using __slots__ so we can detect errors if Method tries to set non-existent # attribute on the SettingProxy. # TODO: I don't think this has any effect, because we subclass SettingABC which # doesn't use __slots__. __slots__ = ["__setting", "_setting_type", "_train_env", "_val_env", "_test_env"] def __init__( self, setting_type: Type[SettingType], setting_config_path: Path = None, **setting_kwargs, ): self._setting_type = setting_type self.__setting: SettingType if setting_config_path: self.__setting = setting_type.load_benchmark(setting_config_path) if setting_kwargs: raise RuntimeError( "Can't use keyword arguments when passing a path to a yaml file!" ) else: self.__setting = setting_type(**setting_kwargs) self.__setting.monitor_training_performance = True super().__init__() self._train_env = None self._val_env = None self._test_env = None @property def observation_space(self) -> gym.Space: self.set_attribute("train_transforms", self.train_transforms) return self.get_attribute("observation_space") @property def action_space(self) -> gym.Space: return self.get_attribute("action_space") @property def reward_space(self) -> gym.Space: return self.get_attribute("reward_space") @property def train_env(self) -> EnvironmentProxy: return self._train_env @property def val_env(self) -> EnvironmentProxy: return self._val_env @property def test_env(self) -> EnvironmentProxy: if not self._is_readable("test_env"): raise RuntimeError("You don't have access to the test_env attribute!") return self._setting_type.test_env(self) @test_env.setter def test_env(self, value) -> None: if not self._is_writeable("test_env"): raise RuntimeError("You don't have access to the test_env attribute!") self.__setting.test_env = value def _temp_make_readable(self, attribute: str) -> None: """Temporarily makes an attribute readable.""" # if attribute in _hidden_attributes: @property def config(self) -> Config: return self.get_attribute("config") @config.setter def config(self, value: Config) -> None: self.set_attribute("config", value) def prepare_data(self, *args, **kwargs): self.__setting.prepare_data(*args, **kwargs) def setup(self, stage: str = None): self.__setting.setup(stage=stage) def get_name(self): return self.__setting.get_name() def _is_readable(self, attribute: str) -> bool: if self._setting_type in _hidden_attributes: key = self._setting_type else: for parent_setting_type in self._setting_type.get_parents(): if parent_setting_type in _hidden_attributes: key = parent_setting_type break else: return True return attribute not in _hidden_attributes[key] def _is_writeable(self, attribute: str) -> bool: if self._setting_type in _readonly_attributes: key = self._setting_type else: for parent_setting_type in self._setting_type.get_parents(): if parent_setting_type in _readonly_attributes: key = parent_setting_type break else: return True return attribute not in _readonly_attributes[key] @property def batch_size(self) -> Optional[int]: return self.get_attribute("batch_size") @batch_size.setter def batch_size(self, value: Optional[int]) -> None: self.set_attribute("batch_size", value) @property def train_transforms(self) -> List[Callable]: return self.__setting.train_tansforms @train_transforms.setter def train_transforms(self, value: List[Callable]): self.__setting.train_transforms = value @property def val_transforms(self) -> List[Callable]: return self.__setting.val_tansforms @val_transforms.setter def val_transforms(self, value: List[Callable]): self.__setting.val_transforms = value @property def test_transforms(self) -> List[Callable]: return self.__setting.test_tansforms @test_transforms.setter def test_transforms(self, value: List[Callable]): self.__setting.test_transforms = value def apply(self, method: Method, config: Config = None) -> Results: # TODO: Figure out where the 'config' should be defined? method.configure(setting=self) self.config = self._setup_config(method) # TODO: Not sure if the method is changing the train_transforms. # Run the Main loop. self.Observations = self._setting_type.Observations self.Actions = self._setting_type.Actions self.Rewards = self._setting_type.Rewards if hasattr(self._setting_type, "TestEnvironment"): self.TestEnvironment = self._setting_type.TestEnvironment # results = self._setting_type.apply(self, method, config=config) results: Results = self.main_loop(method) logger.info(f"Results objective: {results.objective}") logger.info(results.summary()) method.receive_results(self, results=results) return results def get_attribute(self, name: str) -> Any: value = getattr(self.__setting, name) if value is None: return value if not isinstance(value, (int, str, bool, np.ndarray, gym.Space, list)): warnings.warn( RuntimeWarning( f"TODO: Attribute {name} has a value of type {type(value)}, which " f"wouldn't necessarily be easy to transfer with gRPC. This could " f"mean that we need to implement this on the proxy itself. " ) ) return value def set_attribute(self, name: str, value: Any) -> None: return setattr(self.__setting, name, value) def train_dataloader(self, batch_size: int = None, num_workers: int = None) -> EnvironmentProxy: # TODO: Faking this 'remote-ness' for now: return EnvironmentProxy( env_fn=partial( self.__setting.train_dataloader, batch_size=batch_size, num_workers=num_workers, ), setting_type=self._setting_type, ) batch_size = batch_size if batch_size is not None else self.get_attribute("batch_size") num_workers = num_workers if num_workers is not None else self.get_attribute("num_workers") if self._train_env: self._train_env.close() del self._train_env self._train_env = EnvironmentProxy( env_fn=partial( self.__setting.train_dataloader, batch_size=batch_size, num_workers=num_workers, ), setting_type=self._setting_type, ) return self._train_env def val_dataloader(self, batch_size: int = None, num_workers: int = None) -> EnvironmentProxy: return EnvironmentProxy( env_fn=partial( self.__setting.val_dataloader, batch_size=batch_size, num_workers=num_workers, ), setting_type=self._setting_type, ) if self._val_env: self._val_env.close() del self._val_env self._val_env = EnvironmentProxy( env_fn=partial( self._setting_type.val_dataloader, self, batch_size=batch_size, num_workers=num_workers, ), setting_type=self._setting_type, ) return self._val_env def test_dataloader(self, batch_size: int = None, num_workers: int = None): # TODO: Get the caller, and if it's 'internal' to sequoia then let it through. # raise RuntimeError("You don't have access to the test_dataloader method!") return EnvironmentProxy( env_fn=partial( self.__setting.test_dataloader, batch_size=batch_size, num_workers=num_workers, ), setting_type=self._setting_type, ) # return EnvironmentProxy( # partial(self._setting_type.test_dataloader, self, batch_size=batch_size, num_workers=num_workers), # setting_type=self._setting_type, # ) def __test_dataloader( self, batch_size: int = None, num_workers: int = None ) -> EnvironmentProxy: batch_size = batch_size if batch_size is not None else self.get_attribute("batch_size") num_workers = num_workers if num_workers is not None else self.get_attribute("num_workers") if self._test_env: self._test_env.close() del self._test_env self._test_env = EnvironmentProxy( env_fn=partial( self.__setting.test_dataloader, batch_size=batch_size, num_workers=num_workers, ), setting_type=self._setting_type, ) return self._test_env def main_loop(self, method: Method) -> Results: # TODO: Implement the 'remote' equivalent of the main loop of the IncrementalAssumption. # test_results = self._setting_type.Results() method.set_training() dataset: str = self.get_attribute("dataset") nb_tasks = self.get_attribute("nb_tasks") known_task_boundaries_at_train_time: bool = self.get_attribute( "known_task_boundaries_at_train_time" ) task_labels_at_train_time: bool = self.get_attribute("task_labels_at_train_time") # Send the train / val transforms to the 'remote' env. self.set_attribute("train_transforms", self.train_transforms) self.set_attribute("val_transforms", self.val_transforms) self.Results = self._setting_type.Results # TODO: Can we avoid duplicating the main loop here? # test_results = self.__setting.main_loop(method) # test_results._objective_scaling_factor = ( # 0.01 if dataset.startswith("MetaMonsterKong") else 1.0 # ) test_results = self._setting_type.main_loop(self, method=method) start_time = time.process_time() # for task_id in range(nb_tasks): # logger.info( # f"Starting training" + (f" on task {task_id}." if nb_tasks > 1 else ".") # ) # self.set_attribute("_current_task_id", task_id) # if known_task_boundaries_at_train_time: # # Inform the model of a task boundary. If the task labels are # # available, then also give the id of the new task to the # # method. # # TODO: Should we also inform the method of wether or not the # # task switch is occuring during training or testing? # if not hasattr(method, "on_task_switch"): # logger.warning( # UserWarning( # f"On a task boundary, but since your method doesn't " # f"have an `on_task_switch` method, it won't know about " # f"it! " # ) # ) # elif not task_labels_at_train_time: # method.on_task_switch(None) # else: # # NOTE: on_task_switch won't be called if there is only one "task", # # (as-in one task in a 'sequence' of tasks). # # TODO: in multi-task RL, i.e. RLSetting(dataset=..., nb_tasks=10), # # for instance, then there are indeed 10 tasks, but `self.tasks` # # is used here to describe the number of 'phases' in training and # # testing. # if nb_tasks > 1: # method.on_task_switch(task_id) # task_train_loader = self.train_dataloader() # task_valid_loader = self.val_dataloader() # success = method.fit( # train_env=task_train_loader, valid_env=task_valid_loader, # ) # task_train_loader.close() # task_valid_loader.close() # test_results._online_training_performance.append( # task_train_loader.get_online_performance() # ) # test_loop_results = self.test_loop(method) # test_results.append(test_loop_results) # logger.info(f"Finished Training on task {task_id}.") runtime = time.process_time() - start_time test_results._runtime = runtime return test_results def test_loop(self, method: Method) -> "IncrementalAssumption.Results": """(WIP): Runs an incremental test loop and returns the Results. The idea is that this loop should be exactly the same, regardless of if you're on the RL or the CL side of the tree. NOTE: If `self.known_task_boundaries_at_test_time` is `True` and the method has the `on_task_switch` callback defined, then a callback wrapper is added that will invoke the method's `on_task_switch` and pass it the task id (or `None` if `not self.task_labels_available_at_test_time`) when a task boundary is encountered. This `on_task_switch` 'callback' wrapper gets added the same way for Supervised or Reinforcement learning settings. """ nb_tasks = self.get_attribute("nb_tasks") known_task_boundaries_at_test_time = self.get_attribute( "known_task_boundaries_at_test_time" ) # TODO: Always setting this to False for now. task_labels_at_test_time = self.get_attribute("task_labels_at_test_time") if task_labels_at_test_time: warnings.warn( RuntimeWarning("no task labels at test time for now when using a SettingProxy") ) # TODO: Avoid duplicating the test loop here? test_results = self.__setting.test_loop(method=method) # was_training = method.training # method.set_testing() # test_env = self.__test_dataloader() # if known_task_boundaries_at_test_time and nb_tasks > 1: # # TODO: We need to have a way to inform the Method of task boundaries, if the # # Setting allows it. # # Not sure how to do this. It might be simpler to just do something like # # `obs, rewards, done, info, task_switched = .step(actions)`? # # # Add this wrapper that will call `on_task_switch` when the right step is # # # reached. # # test_env = StepCallbackWrapper(test_env, callbacks=[_on_task_switch]) # pass # obs = test_env.reset() # batch_size = test_env.batch_size # max_steps: int = self.get_attribute("test_steps") // (batch_size or 1) # # Reset on the last step is causing trouble, since the env is closed. # pbar = tqdm.tqdm(itertools.count(), total=train_max_steps, desc="Test") # episode = 0 # for step in pbar: # if test_env.is_closed(): # logger.debug(f"Env is closed") # break # # BUG: This doesn't work if the env isn't batched. # action_space = test_env.action_space # batch_size = getattr( # test_env, "num_envs", getattr(test_env, "batch_size", 0) # ) # env_is_batched = batch_size is not None and batch_size >= 1 # if env_is_batched: # # NOTE: Need to pass an action space that actually reflects the batch # # size, even for the last batch! # obs_batch_size = obs.x.shape[0] if obs.x.shape else None # action_space_batch_size = ( # test_env.action_space.shape[0] # if test_env.action_space.shape # else None # ) # if ( # obs_batch_size is not None # and obs_batch_size != action_space_batch_size # ): # action_space = batch_space( # test_env.single_action_space, obs_batch_size # ) # action = method.get_actions(obs, action_space) # # logger.debug(f"action: {action}") # obs, reward, done, info = test_env.step(action) # # TODO: Add something to `info` that indicates when a task boundary is # # reached, so that we can call the `on_task_switch` method on the Method # # ourselves. # if done and not test_env.is_closed(): # # logger.debug(f"end of test episode {episode}") # obs = test_env.reset() # episode += 1 # test_env.close() # test_results = test_env.get_results() # if was_training: # method.set_training() return test_results # NOTE: Was experimenting with the idea of allowing the regular getattr and setattr # to forward calls to the remote. In the end I think it's better to explicitly # prevent any of these from happening. def __getattr__(self, name: str): # NOTE: This only ever gets called if the attribute was not found on the if self._is_readable(name): print(f"Accessing missing attribute {name} from the 'remote' setting.") return self.get_attribute(name) raise AttributeError( f"Attribute {name} is either not present on the setting, or not marked as " f"readable!" ) # def __setattr__(self, name: str, value: Any) -> None: # # Weird pytorch-lightning stuff: # logger.debug(f"__setattr__ called for attribute {name}") # if name in {"_setting_type", "__setting"}: # assert name not in self.__dict__, f"Can't change attribute {name}" # object.__setattr__(self, name, value) # elif self._is_writeable(name): # logger.info(f"Setting attribute {name} on the 'remote' setting.") # self.set_attribute(name, value) # else: # raise AttributeError(f"Attribute {name} is marked as read-only!") ================================================ FILE: sequoia/client/setting_proxy_test.py ================================================ """TODO: Tests for the SettingProxy. """ from functools import partial from typing import ClassVar, Type import numpy as np import pytest from gym import spaces from sequoia.common.metrics.rl_metrics import EpisodeMetrics from sequoia.common.spaces import Image, Sparse from sequoia.common.transforms import Transforms from sequoia.conftest import slow from sequoia.methods.base_method import BaseMethod from sequoia.methods.method_test import key_fn from sequoia.methods.random_baseline import RandomBaselineMethod from sequoia.settings import Setting, all_settings from sequoia.settings.rl import IncrementalRLSetting, TaskIncrementalRLSetting from sequoia.settings.rl.continual.setting import ContinualRLSetting from sequoia.settings.rl.continual.setting_test import ( TestContinualRLSetting as ContinualRLSettingTests, ) from sequoia.settings.sl import ClassIncrementalSetting, DomainIncrementalSLSetting from sequoia.settings.sl.continual.setting import ContinualSLSetting from sequoia.settings.sl.continual.setting_test import ( TestContinualSLSetting as ContinualSLSettingTests, ) from .setting_proxy import SettingProxy @pytest.mark.parametrize("setting_type", sorted(all_settings, key=key_fn)) def test_spaces_match(setting_type: Type[Setting]): setting = setting_type() s_proxy = SettingProxy(setting_type) assert s_proxy.observation_space == setting.observation_space assert s_proxy.action_space == setting.action_space assert s_proxy.reward_space == setting.reward_space def test_transforms_get_propagated(): for setting in [ TaskIncrementalRLSetting(dataset="MetaMonsterKong-v0"), SettingProxy(TaskIncrementalRLSetting, dataset="MetaMonsterKong-v0"), ]: assert setting.observation_space.x == Image(0, 255, shape=(64, 64, 3), dtype=np.uint8) setting.transforms.append(Transforms.to_tensor) setting.transforms.append(Transforms.resize_32x32) # TODO: The observation space doesn't update directly in RL whenever the # transforms are changed. assert setting.observation_space.x == Image(0, 1, shape=(3, 32, 32)) assert setting.train_dataloader().reset().x.shape == (3, 32, 32) class TestContinualSLSettingProxy(ContinualSLSettingTests): Setting: ClassVar[Type[Setting]] = partial(SettingProxy, ContinualSLSetting) class TestContinualRLSettingProxy(ContinualRLSettingTests): Setting: ClassVar[Type[Setting]] = partial(SettingProxy, ContinualRLSetting) @pytest.mark.timeout(30) def test_random_baseline(config): method = RandomBaselineMethod() setting = SettingProxy(DomainIncrementalSLSetting, config=config) results = setting.apply(method, config=config) # domain incremental mnist: 2 classes per task -> chance accuracy of 50%. assert 0.45 <= results.objective <= 0.55 @pytest.mark.timeout(180) def test_random_baseline_rl(): method = RandomBaselineMethod() setting = SettingProxy( IncrementalRLSetting, dataset="monsterkong", monitor_training_performance=True, # observe_state_directly=False, ## TODO: Make sure this doesn't change anything. train_steps_per_task=1_000, test_steps_per_task=1_000, train_task_schedule={ 0: {"level": 0}, 1: {"level": 1}, 2: {"level": 10}, 3: {"level": 11}, 4: {"level": 0}, }, # Interesting problem: Will it always do at least an entire episode here per # env? # batch_size=2, # num_workers=0, ) assert setting.train_max_steps == 4_000 assert setting.test_max_steps == 4_000 results: IncrementalRLSetting.Results[EpisodeMetrics] = setting.apply(method) assert 20 <= results.average_final_performance.mean_reward_per_episode @pytest.mark.timeout(120) def test_random_baseline_SL_track(): method = RandomBaselineMethod() setting = SettingProxy(ClassIncrementalSetting, dataset="synbols", nb_tasks=12) results = setting.apply(method) assert 1 / 48 * 0.5 <= results.objective <= 1 / 48 * 1.5 @slow @pytest.mark.timeout(300) def test_baseline_SL_track(config): """Applies the BaseMethod on something ressembling the SL track of the competition. """ method = BaseMethod(max_epochs=1) import numpy as np class_order = np.random.permutation(48).tolist() setting = SettingProxy( ClassIncrementalSetting, dataset="synbols", nb_tasks=12, class_order=class_order, ) results = setting.apply(method, config) assert results.to_log_dict() # TODO: Add tests for having a different ordering of test tasks vs train tasks. results: ClassIncrementalSetting.Results online_perf = results.average_online_performance assert 0.30 <= online_perf.objective <= 0.65 final_perf = results.average_final_performance assert 0.02 <= final_perf.objective <= 0.06 def test_rl_track_setting_is_correct(): setting = SettingProxy( IncrementalRLSetting, "rl_track", ) assert setting.nb_tasks == 8 assert setting.dataset == "MetaMonsterKong-v0" assert setting.observation_space == spaces.Dict( x=Image(0, 1, (3, 64, 64), dtype=np.float32), task_labels=Sparse(spaces.Discrete(8)), ) assert setting.action_space == spaces.Discrete(6) # TODO: The reward range of the MetaMonsterKongEnv is (0, 50), which seems wrong. # This isn't really a big deal though. # assert setting.reward_space == spaces.Box(0, 100, shape=(), dtype=np.float32) assert setting.steps_per_task == 200_000 assert setting.test_steps_per_task == 10_000 assert setting.known_task_boundaries_at_train_time is True assert setting.known_task_boundaries_at_test_time is False assert setting.monitor_training_performance is True assert setting.train_transforms == [Transforms.to_tensor, Transforms.three_channels] assert setting.val_transforms == [Transforms.to_tensor, Transforms.three_channels] assert setting.test_transforms == [Transforms.to_tensor, Transforms.three_channels] train_env = setting.train_dataloader() assert train_env.observation_space == spaces.Dict( x=Image(0, 1, (3, 64, 64), dtype=np.float32), task_labels=spaces.Discrete(8), ) assert train_env.reset() in train_env.observation_space valid_env = setting.val_dataloader() assert valid_env.observation_space == spaces.Dict( x=Image(0, 1, (3, 64, 64), dtype=np.float32), task_labels=spaces.Discrete(8), ) # IDEA: Prevent submissions from calling the test_dataloader method or accessing the # test_env / test_dataset property? with pytest.raises(RuntimeError): test_env = setting.test_dataloader() test_env.reset() with pytest.raises(RuntimeError): test_env = setting.test_env test_env.reset() def test_sl_track_setting_is_correct(): setting = SettingProxy( ClassIncrementalSetting, "sl_track", ) assert setting.nb_tasks == 12 assert setting.dataset == "synbols" assert setting.observation_space == spaces.Dict( x=Image(0, 1, (3, 32, 32), dtype=np.float32), task_labels=spaces.Discrete(12), ) assert setting.n_classes_per_task == 4 assert setting.action_space == spaces.Discrete(48) assert setting.reward_space == spaces.Discrete(48) assert setting.known_task_boundaries_at_train_time is True assert setting.known_task_boundaries_at_test_time is False assert setting.monitor_training_performance is True assert setting.train_transforms == [Transforms.to_tensor, Transforms.three_channels] assert setting.val_transforms == [Transforms.to_tensor, Transforms.three_channels] assert setting.test_transforms == [Transforms.to_tensor, Transforms.three_channels] ================================================ FILE: sequoia/common/__init__.py ================================================ from .batch import Batch from .config import Config from .loss import Loss from .metrics import ClassificationMetrics, Metrics, RegressionMetrics, get_metrics from .spaces import Sparse ================================================ FILE: sequoia/common/batch.py ================================================ """ WIP (@lebrice): Playing around with the idea of using a typed object to represent the different forms of "batches" that settings produce and that different models expect. """ import dataclasses import itertools from abc import ABC from collections import namedtuple from dataclasses import dataclass from functools import partial, singledispatch from typing import ( Any, Callable, ClassVar, Dict, Iterable, Iterator, KeysView, List, Mapping, NamedTuple, Optional, Tuple, Type, TypeVar, Union, ) import gym import numpy as np import torch from torch import Tensor from sequoia.utils.logging_utils import get_logger try: from functools import singledispatchmethod # type: ignore except ImportError: from singledispatchmethod import singledispatchmethod # type: ignore logger = get_logger(__name__) B = TypeVar("B", bound="Batch", covariant=True) T = TypeVar("T", Tensor, np.ndarray, "Batch") V = TypeVar("V") def hasmethod(obj: Any, method_name: str) -> bool: return hasattr(obj, method_name) and callable(getattr(obj, method_name)) @dataclass(frozen=True, eq=False) class Batch(ABC, Mapping[str, T]): """Abstract base class for typed, immutable objects holding tensors. Can be used as an immutable dictionary mapping from strings to tensors, or as a tuple if you index with an integer. Also has some Tensor-like helper methods like `to()`, `numpy()`, `detach()`, etc. Other features: - numpy-style indexing/slicing/masking - moving all items between devices - changing the dtype of all tensors - detaching all tensors - Convertign all tensors to numpy arrays - convertible to a tuple or a dict NOTE: Using dataclasses rather than namedtuples, because those aren't really meant to be subclassed, so we couldn't use them to make the 'Observations' hierarchy, for instance. Dataclasses work better for that purpose. Examples: >>> import torch >>> from typing import Optional >>> from dataclasses import dataclass >>> @dataclass(frozen=True) ... class MyBatch(Batch): ... x: Tensor ... y: Tensor = None >>> batch = MyBatch(x=torch.ones([10, 3, 32, 32]), y=torch.arange(10)) >>> batch.shapes {'x': torch.Size([10, 3, 32, 32]), 'y': torch.Size([10])} >>> batch.batch_size 10 >>> batch.dtypes {'x': torch.float32, 'y': torch.int64} >>> batch.dtype # No shared dtype, so dtype returns None. >>> batch.float().dtype # Converting the all items to float dtype: torch.float32 Device-related methods: >>> from dataclasses import dataclass >>> import torch >>> from torch import Tensor >>> @dataclass(frozen=True) ... class Observations(Batch): ... x: Tensor ... task_labels: Tensor ... done: Tensor ... >>> # Example: observations from two gym environments (e.g. VectorEnv) >>> observations = Observations( ... x = torch.arange(10).reshape([2, 5]), ... task_labels = torch.arange(2, dtype=int), ... done = torch.zeros(2, dtype=bool), ... ) >>> observations.shapes {'x': torch.Size([2, 5]), 'task_labels': torch.Size([2]), 'done': torch.Size([2])} >>> observations.batch_size 2 Datatypes: >>> observations.dtypes {'x': torch.int64, 'task_labels': torch.int64, 'done': torch.bool} >>> observations.dtype # No shared dtype, so dtype returns None. >>> observations.float().dtype # Converting the all items to float dtype: torch.float32 Returns the device common to all items, or None: >>> observations.device device(type='cpu') >>> # observations.to("cuda").device >>> # device(type='cuda', index=0) >>> observations[0] tensor([[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]]) Additionally, when slicing a Batch across the first dimension, you get other typed objects as a result! For example: >>> observations[:, 0] Observations(x=tensor([0, 1, 2, 3, 4]), task_labels=tensor(0), done=tensor(False)) >>> observations[:, 1] Observations(x=tensor([5, 6, 7, 8, 9]), task_labels=tensor(1), done=tensor(False)) """ # TODO: Would it make sense to add a gym Space class variable here? space: ClassVar[Optional[gym.Space]] # TODO: Remove these: field_names: ClassVar[List[str]] _namedtuple: ClassVar[Type[NamedTuple]] def __init_subclass__(cls, *args, **kwargs): # IDEA: By not marking 'Batch' a dataclass, we would let the subclass # decide it if wants to be frozen or not! # Subclasses of `Batch` should be dataclasses! if not dataclasses.is_dataclass(cls): raise RuntimeError(f"{__class__} subclass {cls} must be a dataclass!") super().__init_subclass__(*args, **kwargs) def __post_init__(self): # Create some class attributes, if they don't already exist. # TODO: We have to set these here because __init_subclass__ is called # before the dataclasses package sets the 'fields' attribute, it seems. cls = type(self) if "field_names" not in cls.__dict__: type(self).field_names = [f.name for f in dataclasses.fields(self)] # Create a NamedTuple type for this new subclass. if "_named_tuple" not in cls.__dict__: type(self)._namedtuple = namedtuple(type(self).__name__ + "Tuple", self.field_names) def __iter__(self) -> Iterator[str]: """Yield the 'keys' of this object, i.e. the names of the fields.""" return iter(self.field_names) def __len__(self) -> int: """Returns the number of fields.""" return len(self.field_names) def __eq__(self, other: Union["Batch", Any]) -> bool: # Not sure this is useful. return NotImplemented if not isinstance(other, Batch): return NotImplemented if type(self) != type(other): # Not allowing these sorts of comparisons. return NotImplemented items_equal = {k: v == other[k] for k, v in self.items()} return all( is_equal.all() if isinstance(is_equal, (Tensor, np.ndarray)) else is_equal for is_equal in items_equal.values() ) @singledispatchmethod def __getitem__(self, index: Any) -> T: """Select a subset of the fields of this object. Can also be indexed with tuples, boolean numpy arrays or tensors, as well as None. """ raise KeyError(index) @__getitem__.register(type(None)) def _getitem_none(self, index: None) -> "Batch": """Indexing with 'None' gives back a copy with all the items having an extra batch dimension. """ return self.with_batch_dimension() return getattr(self, index) @__getitem__.register def _getitem_by_name(self, index: str) -> Union[Tensor, Any]: return getattr(self, index) @__getitem__.register def _getitem_by_index(self, index: int) -> Union[Tensor, Any]: return getattr(self, self.field_names[index]) @__getitem__.register(slice) def _getitem_with_slice(self, index: slice) -> "Batch": # NOTE: I don't think it would be a good idea to support slice indexing, # as it could be confusing and give the user the impression that it # is slicing into the tensors, rather than into the fields. # I guess this might be doable, but is it really useful? raise NotImplementedError("Batch objects don't support indexing with (just) slices atm.") if index == slice(None, None, None) or index == slice(0, len(self), 1): return self @__getitem__.register(type(Ellipsis)) def _(self: B, index) -> B: return self @__getitem__.register(np.ndarray) @__getitem__.register(Tensor) def _getitem_with_array(self, index: np.ndarray) -> B: """ NOTE: Indexing with just an array uses the array as a 'mask' on all fields, instead of indexing the "keys" of this object. """ assert len(index) == self.batch_size return self[:, index] @__getitem__.register(tuple) def _getitem_with_tuple(self, index: Tuple[Union[slice, Tensor, np.ndarray, int], ...]): """When slicing with a tuple, if the first item is an integer, we get the attribute at that index and slice it with the rest. For now, the first item in the tuple can only be either an int or an empty slice. """ if len(index) <= 1: raise IndexError( f"Invalid index {index}: When indexing with " f"tuples or lists, they need to have len > 1." ) field_index = index[0] item_index = index[1:] # if len(item_index) == 1: # item_index = item_index[0] if isinstance(field_index, int): # logger.debug(f"Getting the {field_index}'th field, with slice {index[1:]}") return self[field_index][item_index] # e.g: forward_pass[:, 1] if isinstance(field_index, slice): if field_index == slice(None): # logger.debug(f"Indexing all fields {field_index} with index: {item_index}") return type(self)( **{ key: ( value[index] if isinstance(value, Batch) else value[item_index] if value is not None else None ) for key, value in self.items() } ) # batch[..., 0] : Not sure this would really be that helpful. if field_index == Ellipsis: logger.debug(f"Using ellipsis (...) as the field index?") return type(self)( **{ key: value[Ellipsis, item_index] if value is not None else None for key, value in self.items() } ) raise NotImplementedError( f"Only support tuple indexing with emptyslices or int as first " f"tuple item for now. (index={index})" ) def slice(self: B, index: Union[int, slice, np.ndarray, Tensor]) -> B: """Gets a slice across the first (batch) dimension. Raises an error if there is no batch size. Always returns an object with a batch dimension, even when `index` has len of 1. """ if not isinstance(index, (int, slice, np.ndarray, Tensor)): raise NotImplementedError(f"can't slice with index {index}") # BUG: By putting a 'None' value in the ForwardPass def getitem_if_val_is_not_none(val, index): if val is None: return None return val[index] sliced_value = self._map(partial(getitem_if_val_is_not_none, index=index), recursive=True) if isinstance(index, int): sliced_value = sliced_value.with_batch_dimension() return sliced_value # return type(self)(**{ # k: v.slice(index) if isinstance(v, Batch) else # v[index] if v is not None else None # for k, v in self.items() # }) def __setitem__(self, index: Union[int, str], value: Any): """Set a value in slices of one or more of the fields. NOTE: Since this class is marked as frozen, we can't change the attributes, so the index should be a tuple (to change parts of the tensors, for instance. """ if not isinstance(index, tuple) or len(index) < 2: raise NotImplementedError("index needs to be tuple with len >= 2") # Get which keys/fields were selected: selected_fields = np.array(self.field_names)[index[0]] for selected_field in selected_fields: item = self[selected_field] if item is not None: item[index[1:]] = value def keys(self) -> KeysView[str]: return KeysView(self.field_names) def values(self) -> Tuple[T, ...]: return self.as_namedtuple() def items(self) -> Iterable[Tuple[str, T]]: for name in self.field_names: yield name, getattr(self, name) @property def devices(self) -> Dict[str, Union[Optional[torch.device], Dict]]: """Dict from field names to their device if they have one, else None. If `self` has `Batch` fields, the values for those will be dicts. """ return { k: v.devices if isinstance(v, Batch) else getattr(v, "device", None) for k, v in self.items() } @property def device(self) -> Optional[torch.device]: """Returns the device common to all items, or `None`. Returns ------- Tuple[Optional[torch.device]] None if the devices are unknown/different, or the common device. """ device: Optional[torch.device] = None # TODO: These kinds of methods can't discriminate between a child item # having all all None tensors and it having different devices atm. for key, value in self.items(): if isinstance(value, Batch): item_device = value.device if item_device is None: # Child item doesn't have a 'device', so `self` also doesnt. return None else: item_device = getattr(value, "device", None) if item_device is None: continue if device is None: device = item_device elif item_device != device: return None return device @property def dtypes(self) -> Dict[str, Union[Optional[torch.dtype], Dict]]: """Dict from field names to their dtypes if they have one, else None. If `self` has `Batch` fields, the values for those will be dicts. """ return { k: v.dtypes if isinstance(v, Batch) else getattr(v, "dtype", None) for k, v in self.items() } @property def dtype(self) -> Tuple[Optional[torch.dtype]]: """Returns the dtype common to all tensors, or None. Returns ------- Dict[Optional[torch.dtype]] The common dtype, or `None` if the dtypes are unknown/different. """ dtype: Optional[torch.dtype] = None for key, value in self.items(): item_dtype = getattr(value, "dtype", None) if item_dtype is None: continue if dtype is None: dtype = item_dtype elif item_dtype != dtype: return None return dtype def as_namedtuple(self) -> Tuple[T, ...]: return self._namedtuple(**{k: v for k, v in self.items()}) def as_list_of_tuples(self) -> Iterable[Tuple[T, ...]]: """Returns an iterable of the items in the 'batch', each item as a namedtuple (list of tuples). """ # If one of the fields is None, then we convert it into a list of Nones, # so we can zip all the fields to create a list of tuples. field_items = [ [items for _ in range(self.batch_size)] if items is None or items is {} else [item for item in items] for items in self.as_tuple() ] assert all([len(items) == self.batch_size for items in field_items]) return list(itertools.starmap(self._namedtuple, zip(*field_items))) def as_tuple(self) -> Tuple[T, ...]: """Returns a namedtuple containing the 'batched' attributes of this object (tuple of lists). """ # TODO: Turning on the namedtuple return value by default. # return tuple( # getattr(self, f.name) for f in dataclasses.fields(self) # ) return self.as_namedtuple() # def as_dict(self) -> Dict[str, T]: # # NOTE: dicts are ordered since python 3.7 # return { # field_name: getattr(self, field_name) # for field_name in self.field_names # } def to(self, *args, **kwargs): def _to(item, *args_, **kwargs_): if hasattr(item, "to") and callable(item.to): return item.to(*args_, **kwargs_) return item return self._map(_to, *args, **kwargs, recursive=True) def float(self, dtype=torch.float): return self.to(dtype=dtype) def float32(self, dtype=torch.float32): return self.to(dtype=dtype) def int(self, dtype=torch.int): return self.to(dtype=dtype) def double(self, dtype=torch.double): return self.to(dtype=dtype) def numpy(self): """Returns a new Batch object of the same type, with all Tensors converted to numpy arrays. Returns ------- [type] [description] """ def _numpy(v): if isinstance(v, (Tensor, Batch)): return v.detach().cpu().numpy() return v return self._map(_numpy, recursive=True) # return type(self)(**{ # k: v.detach().cpu().numpy() if isinstance(v, (Tensor, Batch)) else v # for k, v in self.items() # }) def detach(self): """Returns a new Batch object of the same type, with all Tensors detached. Returns ------- Batch New object of the same type, but with all tensors detached. """ from sequoia.utils.generic_functions import detach return self._map(detach) # return type(self)(**detach({ # k: v.detach() if isinstance(v, (Tensor, Batch)) else v for k, v in self.items() # })) def cpu(self, **kwargs): """Returns a new Batch object of the same type, with all Tensors moved to cpu. Returns ------- Batch New object of the same type, but with all tensors moved to CPU. """ return self.to(device="cpu", **kwargs) def cuda(self, device=None, **kwargs): """Returns a new Batch object of the same type, with all Tensors moved to cuda device. Returns ------- Batch New object of the same type, but with all tensors moved to cuda. """ return self.to(device=(device or "cuda"), **kwargs) @property def shapes(self) -> Dict[str, Union[torch.Size, Dict]]: """Dict from field names to their shapes if they have one, else None. If `self` has `Batch` fields, the values for those will be dicts. """ return { k: v.shapes if isinstance(v, Batch) else getattr(v, "shape", None) for k, v in self.items() } @property def batch_size(self) -> Optional[int]: """Returns the length of the first dimension if it is common to all tensors in this object, else None. """ # NOTE: If all tensors have just one dimension and are all the same # length, then this would give back that length. batch_size: Optional[int] = None for k, v in self.items(): if isinstance(v, Batch): v_batch_size = v.batch_size if v_batch_size is None: # child item doesn't have a batch size, so we dont either. return None elif batch_size is None: batch_size = v_batch_size elif v_batch_size != batch_size: return None else: item_shape = getattr(v, "shape", None) if item_shape is None: continue if not item_shape: return None v_batch_size = item_shape[0] if batch_size is None: batch_size = v_batch_size elif v_batch_size != batch_size: return None return batch_size def with_batch_dimension(self: B) -> B: """Returns a copy of `self` where all numpy arrays / tensors have an extra `batch` dimension of size 1. """ # TODO: Do we 'wrap' the `None` values? or keep them as-is? from sequoia.utils.categorical import Categorical @singledispatch def unsqueeze(v: Any) -> Any: if v is None: return v return np.asarray([v]) @unsqueeze.register(Categorical) @unsqueeze.register(np.ndarray) @unsqueeze.register(Tensor) def _unsqueeze_array( v: Union[np.ndarray, Tensor, Categorical] ) -> Union[np.ndarray, Tensor, Categorical]: return v[None] return self._map(unsqueeze) def remove_batch_dimension(self: B) -> B: """Returns a copy of `self` where all numpy arrays / tensors have an the extra `batch` dimension removed. Raises an error if any non-None value doesn't have a batch dimension of size 1. """ return self[:, 0] def split(self: B) -> List[B]: """Returns an iterable of the items in the 'batch', each item as a object of the same type as `self`. """ # If one of the fields is None, then we convert it into a list of Nones, # so we can zip all the fields to create a list of tuples. return [self[:, i] for i in range(self.batch_size)] @classmethod def stack(cls: Type[B], items: List[B]) -> B: items = list(items) from sequoia.utils.generic_functions import stack # Just to make sure that the returned item will be of the type `cls`. assert isinstance(items[0], cls) return stack(items) @classmethod def concatenate(cls: Type[B], items: List[B], **kwargs) -> B: items = list(items) from sequoia.utils.generic_functions import concatenate assert isinstance(items[0], cls) return concatenate(items, **kwargs) def torch(self, device: Union[str, torch.device] = None, dtype: torch.dtype = None): """Converts any ndarrays to Tensors if possible and returns a new object of the same type. NOTE: This is the opposite of `self.numpy()` """ def _from_numpy(v: Union[np.ndarray, Any]) -> Union[Tensor, Any]: try: return torch.as_tensor(v, device=device, dtype=dtype) except (TypeError, RuntimeError): return v return self._map(_from_numpy, recursive=True) def _map(self: B, func: Callable, *args, recursive: bool = True, **kwargs) -> B: """Returns an object of the same type as `self`, where function `func` has been applied (with positional args `args` and keyword-arguments `kwargs`) to all its values, (inluding the values of nested `Batch` objects if `recursive` is True). """ new_items = {} for key, value in self.items(): if isinstance(value, Batch): if not recursive: # don't apply the function to nested Batch objects unless # `recursive` is True. new_items[key] = value else: new_items[key] = value._map(func, *args, recursive=recursive, **kwargs) else: new_items[key] = func(value, *args, **kwargs) # type: ignore return type(self)(**new_items) def _apply( self: B, func: Callable[[T, Any], None], *args, recursive: bool = True, **kwargs ) -> None: """Applies function `func` to all the values in `self`, and optionally to all its nested values when `recursive` is True. Returns None, as this assumes that `func` modifies the values in-place. """ for key, value in self.items(): if isinstance(value, Batch) and not recursive: # Skip any Batch objects if `recursive` is False. continue func(value, *args, **kwargs) # type: ignore from sequoia.utils.generic_functions.replace import replace @replace.register(Batch) def _replace_batch_items(obj: Batch, **items) -> Batch: return dataclasses.replace(obj, **items) from typing import Sequence from sequoia.utils.generic_functions import get_slice, set_slice @get_slice.register(Batch) def _get_batch_slice(value: Batch, indices: Sequence[int]) -> Batch: return value.slice(indices) # assert False, f"Removing this in favor of just doing Batch[:, indices]. " # return type(value)(**{ # field_name: get_slice(field_value, indices) if field_value is not None else None # for field_name, field_value in value.as_dict().items() # }) @set_slice.register(Batch) def set_batch_slice(target: Batch, indices: Sequence[int], values: Batch) -> None: for key, target_values in target.items(): set_slice(target_values, indices, values[key]) if __name__ == "__main__": import doctest doctest.testmod() ================================================ FILE: sequoia/common/batch_test.py ================================================ """ Tests for the `Batch` class. """ from dataclasses import dataclass from typing import Any, Dict, List, Optional, Tuple, Type import numpy as np import pytest import torch from torch import Tensor from sequoia.utils.categorical import Categorical from .batch import Batch @dataclass(frozen=True) class Observations(Batch): x: Tensor task_labels: Optional[Tensor] = None @dataclass(frozen=True) class Actions(Batch): y_pred: Tensor @dataclass(frozen=True) class RLActions(Actions): action_dist: Categorical @dataclass(frozen=True) class Rewards(Batch): y: Tensor @pytest.mark.parametrize( "batch_type, items_dict", [ ( Observations, dict( x=torch.arange(10), task_labels=torch.arange(10) + 1, ), ), ], ) def test_batch_behaves_like_a_dict(batch_type, items_dict): obj = batch_type(**items_dict) # NOTE: dicts, along with their .keys() and .values() are ordered as of py37 for i, (k, v) in enumerate(obj.items()): original_value = items_dict[k] assert k == list(items_dict.keys())[i] # key order is the same. assert (v == original_value).all() if isinstance(original_value, Tensor): assert v is original_value # Tensors shouldn't be cloned or copied assert (obj[k] == v).all() # values are the same. assert (obj[k] == getattr(obj, k)).all() # getattr same as __getitem__ assert (obj[i] == v).all() # can also be indexed with ints like a tuple. @pytest.mark.parametrize( "batch_type, items_dict", [ ( Observations, dict( x=torch.arange(10), task_labels=torch.arange(10) + 1, ), ), ], ) def test_to(batch_type: Type[Batch], items_dict: Dict[str, Tensor]): """Test that the 'to' method behaves like `torch.Tensor.to`, so that we can move all the items in a `Batch` between devices or dtypes. """ original_devices: Dict[str, torch.device] = {k: v.device for k, v in items_dict.items()} original_dtypes: Dict[str, torch.dtype] = {k: v.dtype for k, v in items_dict.items()} obj = batch_type(**items_dict) # The devices and dtypes remain the same when creating the Batch with the # given items. for k, v in obj.items(): original_value = items_dict[k] assert v.device == original_value.device == original_devices[k] assert v.dtype == original_value.dtype == original_dtypes[k] # The 'devices' and 'dtypes' attributes give the devices and dtypes of all # items. assert obj.devices == original_devices assert obj.dtypes == original_dtypes devices = list(original_devices.values()) dtypes = list(original_dtypes.values()) if len(set(devices)) == 1: # If they all share the same device, then the `device` attribute on the # `batch` is this shared device. common_device = devices[0] assert obj.device == common_device if len(set(dtypes)) == 1: # If all tensors have the same dtype, then the `dtype` attribute on the # `batch` is this shared dtype. common_dtype = dtypes[0] assert obj.dtype == common_dtype # Test moving to another device, if possible. if torch.cuda.is_available(): cuda_obj = obj.to("cuda") for i, (k, v) in enumerate(cuda_obj.items()): assert v.device.type == "cuda" float_obj = obj.to(dtype=torch.float32) for k, v in float_obj.items(): original_value = items_dict[k] assert v.device == original_value.device assert v.dtype == torch.float32 assert (v == original_value.to(dtype=torch.float32)).all() @pytest.mark.parametrize( "batch_type, items_dict", [ ( Observations, dict( x=torch.arange(25).reshape([5, 5]), task_labels=torch.arange(25).reshape([5, 5]) + 1, ), ), ], ) @pytest.mark.parametrize( "index", [ (0, 0), # obj[0, 0] (0, ..., 0), # obj[0, ..., 0] (slice(None), 0), # obj[:, 0] (slice(None), slice(3)), # obj[:, :3] (slice(None), slice(None, -3)), # obj[:, -3:] (slice(None), slice(None, None, 2)), # obj[:, ::2] (slice(None), np.arange(5) % 2 == 0), # obj[:, even_mask] (slice(None), np.arange(5) % 2 == 0), # obj[:, even_mask] ], ) def test_tuple_indexing( batch_type: Type[Batch], items_dict: Dict[str, Tensor], index: Tuple[Any, ...] ): """Test that we can index into the object in the same style as an ndarray""" obj = batch_type(**items_dict) keys = list(items_dict.keys()) print(f"Expected keys: {keys}") expected_items = {k: items_dict[k][index[1:]] for k in np.array(keys)[index[0]]} print(f"expected sliced items:") for key, value in expected_items.items(): print(key, value) actual_slice = obj[index] if index[0] == slice(None): # actual_slice: Batch assert isinstance(actual_slice, batch_type) assert list(actual_slice.keys()) == keys for k, sliced_value in actual_slice.items(): print(f"key {k}, index {index}") print(f"Sliced value: {sliced_value}") expected_value = expected_items[k] print(f"Expected value: {expected_value}") assert (sliced_value == expected_value).all() if isinstance(index[0], int): # e.g. Observations[0, <...>] key = keys[index[0]] expected_value = expected_items[key] assert (actual_slice == expected_value).all() def test_masking(): """Test indexing or changing values in the item using a mask array.""" bob = Observations( x=torch.arange(25).reshape([5, 5]), ) odd_rows = np.arange(5) % 2 == 1 bob[:, odd_rows] = False tensor = torch.as_tensor expected = Observations( x=tensor( [ [0, 1, 2, 3, 4], [0, 0, 0, 0, 0], [10, 11, 12, 13, 14], [0, 0, 0, 0, 0], [20, 21, 22, 23, 24], ] ), task_labels=None, ) assert (expected.x == bob.x).all() assert expected.task_labels == bob.task_labels def test_newaxis(): """WIP: Trying out np.newaxis as a way to add an extra batch dimension.""" x = Observations( x=torch.arange(5), task_labels=1, ) # Test out different ways of 'unsqueezing' the object. for expanded in [x[np.newaxis], x.with_batch_dimension()]: assert str(expanded) == str( Observations( x=torch.tensor([[0, 1, 2, 3, 4]], dtype=int), task_labels=np.array([1]), ) ) def test_single_index(): """observations[0] should gives the first field.""" obs = Observations( x=torch.arange(5), task_labels=1, ) assert obs[0] is obs.x def test_remove_batch_dim(): """Removing an extra batch dimension.""" bob = Observations( x=torch.tensor([[0, 1, 2, 3, 4]], dtype=int), task_labels=np.array([1]), ) expected = Observations( x=torch.arange(5), task_labels=1, ) for expanded in [bob.remove_batch_dimension(), bob[:, 0]]: assert str(expanded) == str(expected) bob = Observations( x=torch.tensor([[0, 1, 2, 3, 4]], dtype=int), task_labels=None, ) expected = Observations( x=torch.arange(5), task_labels=None, ) for expanded in [ bob.remove_batch_dimension(), bob[ :, 0, ], ]: assert str(expanded) == str(expected) def test_remove_batch_dim_with_nested_objects(): obj = ForwardPass( observations=Observations( x=torch.arange(5).reshape([1, 5]), task_labels=None, ), h_x=torch.arange(4).reshape([1, 4]), actions=Actions( y_pred=torch.tensor(1).reshape( [ 1, ] ), ), ) actual = obj.remove_batch_dimension() assert str(actual) == str( ForwardPass( observations=Observations( x=torch.arange(5), task_labels=None, ), h_x=torch.arange(4), actions=Actions( y_pred=torch.tensor(1), ), ) ) def test_split(): """Split a batch into a list of Batch objects""" bob = Observations( x=torch.tensor([[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]], dtype=int), task_labels=np.array([0, 1]), ) expected = [ Observations( x=torch.arange(5) + i * 5, task_labels=i, ) for i in range(2) ] assert str(bob.split()) == str(expected) @pytest.mark.parametrize( "items, expected", [ ( [ Observations( x=torch.as_tensor([0, 1, 2, 3, 4], dtype=int), task_labels=np.array(0), ), Observations( x=torch.as_tensor([5, 6, 7, 8, 9], dtype=int), task_labels=np.array(1), ), ], Observations( x=torch.as_tensor([[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]], dtype=int), task_labels=np.array([0, 1]), ), ), ( [ RLActions( y_pred=torch.as_tensor([0, 1, 2, 3, 4], dtype=int), action_dist=Categorical(logits=torch.ones([5, 5], dtype=float) / 5), ), RLActions( y_pred=torch.as_tensor([0, 1, 2, 3, 4], dtype=int), action_dist=Categorical(logits=torch.ones([5, 5], dtype=float) / 5), ), ], RLActions( y_pred=torch.as_tensor([[0, 1, 2, 3, 4], [0, 1, 2, 3, 4]], dtype=int), action_dist=Categorical(logits=torch.ones([2, 5, 5], dtype=float) / 5), ), ), ], ) def test_stack(items: List[Batch], expected: Batch): """Split a batch into a list of Batch objects""" assert str(type(items[0]).stack(items)) == str(expected) # Same test, but with only numpy arrays as items: assert str(type(items[0]).stack(map(lambda i: i.numpy(), items))) == str(expected.numpy()) # Same test, but with Tensor items: assert str(type(items[0]).stack(map(lambda i: i.torch(), items))) == str(expected.torch()) @pytest.mark.parametrize( "items, expected", [ ( [ Observations( x=torch.as_tensor([0, 1, 2, 3, 4], dtype=int), task_labels=None, ), Observations( x=torch.as_tensor([5, 6, 7, 8, 9], dtype=int), task_labels=None, ), ], Observations( x=torch.as_tensor([[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]], dtype=int), task_labels=None, ), ), ( [ Observations( x=torch.as_tensor([0, 1, 2, 3, 4], dtype=int), task_labels=None, ), Observations( x=torch.as_tensor([5, 6, 7, 8, 9], dtype=int), task_labels=1, ), ], Observations( x=torch.as_tensor([[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]], dtype=int), task_labels=np.array([None, 1]), ), ), ], ) def test_stack_with_none_values(items: List[Batch], expected: Batch): """Test that if all values are None, a single None is produced, but if only some values are None, then an ndarray of dtype `object` is created instead. """ cls = type(items[0]) assert str(cls.stack(items)) == str(expected) # Same test, but with only numpy arrays as items: items = [item.numpy() for item in items] assert str(cls.stack(items)) == str(expected.numpy()) @pytest.mark.parametrize( "items, expected", [ ( [ Observations( x=torch.as_tensor([0, 1, 2, 3, 4], dtype=int), task_labels=0, ), Observations( x=torch.as_tensor([5, 6, 7, 8, 9], dtype=int), task_labels=1, ), ], Observations( x=torch.as_tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=int), task_labels=np.array([0, 1]), ), ), ( [ Observations( x=torch.as_tensor([0, 1, 2, 3, 4], dtype=int), task_labels=None, ), Observations( x=torch.as_tensor([5, 6, 7, 8, 9], dtype=int), task_labels=None, ), ], Observations( x=torch.as_tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=int), task_labels=None, ), ), ( [ RLActions( y_pred=torch.as_tensor([0, 1, 2, 3, 4], dtype=int), action_dist=Categorical(logits=torch.ones([5, 5], dtype=float) / 5), ), RLActions( y_pred=torch.as_tensor([0, 1, 2, 3, 4], dtype=int), action_dist=Categorical(logits=torch.ones([5, 5], dtype=float) / 5), ), ], RLActions( y_pred=torch.as_tensor([0, 1, 2, 3, 4, 0, 1, 2, 3, 4], dtype=int), action_dist=Categorical(logits=torch.ones([10, 5], dtype=float) / 5), ), ), ], ) def test_concatenate(items: List[Batch], expected: Batch): """Split a batch into a list of Batch objects""" assert str(type(items[0]).concatenate(items)) == str(expected) # Same test, but with only numpy arrays as items: assert str(type(items[0]).concatenate(map(lambda i: i.numpy(), items))) == str(expected.numpy()) # Same test, but with Tensor items: assert str(type(items[0]).concatenate(map(lambda i: i.torch(), items))) == str(expected.torch()) @pytest.mark.parametrize( "numpy_batch, torch_batch", [ ( Observations( x=np.array([[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]]), task_labels=np.array([None, None]), ), Observations( x=torch.tensor([[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]], dtype=int), task_labels=np.array([None, None]), ), ), ], ) def test_convert_between_ndarrays_and_tensors(numpy_batch: Batch, torch_batch: Batch): assert str(numpy_batch.torch()) == str(torch_batch) assert str(numpy_batch.torch().numpy()) == str(numpy_batch) assert str(torch_batch.numpy()) == str(numpy_batch) assert str(torch_batch.numpy().torch()) == str(torch_batch) if torch.cuda.is_available(): torch_batch = torch_batch.cuda() assert torch_batch.device.type == "cuda" assert str(numpy_batch.torch(device="cuda")) == str(torch_batch) assert str(numpy_batch.torch(device="cuda").numpy()) == str(numpy_batch) assert str(torch_batch.numpy()) == str(numpy_batch) assert str(torch_batch.numpy().torch(device="cuda")) == str(torch_batch) @dataclass(frozen=True) class ForwardPass(Batch): observations: Observations h_x: Tensor actions: Actions def test_nesting(): obj = ForwardPass( observations=Observations( x=torch.arange(10).reshape([2, 5]), task_labels=torch.arange(2, dtype=int), ), h_x=torch.arange(8).reshape([2, 4]), actions=Actions( y_pred=torch.arange(2, dtype=int), ), ) assert obj.batch_size == 2 assert obj[0, 1, 0] == obj.observations.task_labels[0] tensor = torch.as_tensor assert str(obj.slice(0)) == str( ForwardPass( observations=Observations(x=tensor([[0, 1, 2, 3, 4]]), task_labels=tensor([0])), h_x=tensor([[0, 1, 2, 3]]), actions=Actions(y_pred=tensor([0])), ) ) def test_slicing_with_one_item(): observations = Observations( x=torch.arange(10).reshape([2, 5]), task_labels=torch.arange(2, dtype=int), ) indices = torch.as_tensor([0]) assert observations.slice(indices).shapes == { "x": torch.Size([1, 5]), "task_labels": torch.Size([1]), } ================================================ FILE: sequoia/common/callbacks/__init__.py ================================================ """ TODO: Migrate the addons to Pytorch-Lightning, maybe in the form of callbacks or as optional extensions to be added to Classifier? """ # from .knn_callback import KnnCallback # from .vae_callback import SaveVaeSamplesCallback ================================================ FILE: sequoia/common/callbacks/knn_callback.py ================================================ """ Callback that evaluates representations with a KNN after each epoch. TODO: The code here is split into too many functions and its a bit confusing. Will Need to rework that at some point. NOTE: Currently unused. """ import math from dataclasses import asdict, dataclass from typing import List, Optional, Tuple import numpy as np import torch from pytorch_lightning import Callback, LightningModule, Trainer from simple_parsing import field, mutable_field from sklearn.metrics import log_loss from sklearn.neighbors import KNeighborsClassifier from sklearn.preprocessing import StandardScaler from torch import Tensor from torch.utils.data import DataLoader from sequoia.common.loss import Loss # from sequoia.methods.models.base_model.model import LightningModule from sequoia.settings import Setting from sequoia.settings.sl import ClassIncrementalSetting from sequoia.utils.logging_utils import get_logger, pbar from sequoia.utils.utils import roundrobin, take logger = get_logger(__name__) @dataclass class KnnClassifierOptions: """Set of options for configuring the KnnClassifier.""" n_neighbors: int = field(default=5, alias="n_neighbours") # Number of neighbours. metric: str = "cosine" algorithm: str = "auto" # See the sklearn docs leaf_size: int = 30 # See the sklearn docs p: int = 2 # see the sklean docs n_jobs: Optional[int] = -1 # see the sklearn docs. @dataclass class KnnCallback(Callback): """Addon that adds the option of evaluating representations with a KNN. TODO: Perform the KNN evaluations in different processes using multiprocessing. TODO: We could even evaluate the representations of a DIFFERENT dataset with the KNN, if the shapes were compatible with the model! For example, we could train the model on some CL/RL/etc task, like Omniglot or something, and at the same time, evaluate how good the model's representations are at disentangling the classes from MNIST or Fashion-MNIST or something else entirely! This could be nice when trying to argue about better generalization in the model's representations. """ # Options for the KNN classifier knn_options: KnnClassifierOptions = mutable_field(KnnClassifierOptions) # Maximum number of examples to take from the dataloaders. When None, uses # the entire training/validaton/test datasets. knn_samples: int = 0 def __post_init__(self): self.max_num_batches: int = 0 self.model: LightningModule self.trainer: Trainer def on_train_start(self, trainer, pl_module): """Called when the train begins.""" self.trainer = trainer self.model = pl_module self.setting: ClassIncrementalSetting def setup(self, trainer, pl_module, stage: str): """Called when fit or test begins""" super().setup(trainer, pl_module, stage) def on_epoch_end(self, trainer: Trainer, pl_module: LightningModule): self.trainer = trainer self.model = pl_module self.setting = self.model.setting config = self.model.config if self.knn_samples > 0: batch_size = pl_module.batch_size # We round this up so we always take at least one batch_size of # samples from each dataloader. self.max_num_batches = math.ceil(self.knn_samples / batch_size) logger.debug( f"Taking a maximum of {self.max_num_batches} batches from each dataloader." ) if config.debug: self.knn_samples = min(self.knn_samples, 100) valid_knn_loss, test_knn_loss = self.evaluate_knn(pl_module) # assert False, trainer.callback_metrics.keys() loss: Optional[Loss] = trainer.callback_metrics.get("loss_object") if loss: assert "knn/valid" not in loss.losses assert "knn/test" not in loss.losses loss.losses["knn/valid"] = valid_knn_loss loss.losses["knn/test"] = test_knn_loss def log(self, loss_object: Loss): if self.trainer.logger: self.trainer.logger.log_metrics(loss_object.to_log_dict()) def get_dataloaders(self, model: LightningModule, mode: str) -> List[DataLoader]: """Retrieve the train/val/test dataloaders for all 'tasks'.""" setting = model.datamodule assert setting, "The LightningModule must have its 'datamodule' attribute set for now." # if the setting defines a dataloaders() method, those are for each of the tasks, which is what we want! fn = getattr(setting, f"{mode}_dataloaders", getattr(setting, f"{mode}_dataloader")) loaders = fn() if isinstance(loaders, DataLoader): return [loaders] assert isinstance(loaders, list) return loaders def evaluate_knn(self, model: LightningModule) -> Tuple[Loss, Loss]: """Evaluate the representations with a KNN in the context of CL. We shorten the train dataloaders to take only the first `knn_samples` samples in order to save some compute. TODO: Figure out a way to cleanly add the metrics from the callback to the ``log dict'' which is returned by the model. Right now they are only printed / logged to wandb directly from here. """ setting = model.datamodule assert isinstance(setting, Setting) # TODO: Remove this if we want to use this for something else than a # Continual setting in the future. assert isinstance(setting, ClassIncrementalSetting) num_classes = setting.num_classes # Check wether the method has access to the task labels at train/test time. task_labels_at_test_time: bool = False from sequoia.settings import TaskIncrementalSLSetting if isinstance(setting, TaskIncrementalSLSetting): if setting.task_labels_at_test_time: task_labels_at_test_time = True # TODO: Figure out a way to make sure that we get at least one example # of each class to fit the KNN. self.knn_samples = max(self.knn_samples, num_classes**2) self.max_num_batches = math.ceil(self.knn_samples / model.batch_size) logger.info(f"number of classes: {num_classes}") logger.info(f"Number of KNN samples: {self.knn_samples}") logger.debug(f"Taking a maximum of {self.max_num_batches} batches from each dataloader.") train_loaders: List[DataLoader] = self.get_dataloaders(model, mode="train") valid_loaders: List[DataLoader] = self.get_dataloaders(model, mode="val") test_loaders: List[DataLoader] = self.get_dataloaders(model, mode="test") # Only take the first `knn_samples` samples from each dataloader. def shorten(dataloader: DataLoader): return take(dataloader, n=self.max_num_batches) if self.max_num_batches: train_loaders = list(map(shorten, train_loaders)) valid_loaders = list(map(shorten, valid_loaders)) test_loaders = list(map(shorten, test_loaders)) # Create an iterator that alternates between each of the train dataloaders. # NOTE: we shortened each of the dataloaders just to be sure that we get at least train_loader = roundrobin(*train_loaders) h_x, y = get_hidden_codes_array( model=model, dataloader=train_loader, description="KNN (Train)" ) train_loss, scaler, knn_classifier = fit_knn( x=h_x, y=y, options=self.knn_options, num_classes=num_classes, loss_name="knn/train" ) logger.info(f"KNN Train Acc: {train_loss.accuracy:.2%}") self.log(train_loss) total_valid_loss = Loss("knn/valid") # Save the current task ID so we can reset it after testing. starting_task_id = model.setting.current_task_id for i, dataloader in enumerate(valid_loaders): if task_labels_at_test_time: model.on_task_switch(i, training=False) loss_i = evaluate( model=model, dataloader=dataloader, loss_name=f"[{i}]", scaler=scaler, knn_classifier=knn_classifier, num_classes=setting.num_classes_in_task(i), ) # We use `.absorb(loss_i)` here so that the metrics get merged. # That way, if we access `total_valid_loss.accuracy`, this gives the # accuracy over all the validation tasks. # If we instead used `+= loss_i`, then loss_i would become a subloss # of `total_valid_loss`, since they have different names. # TODO: Explain this in more detail somewhere else. total_valid_loss.absorb(loss_i) logger.info(f"KNN Valid[{i}] Acc: {loss_i.accuracy:.2%}") self.log(loss_i) logger.info(f"KNN Average Valid Acc: {total_valid_loss.accuracy:.2%}") self.log(total_valid_loss) total_test_loss = Loss("knn/test") for i, dataloader in enumerate(test_loaders): if task_labels_at_test_time: model.on_task_switch(i, training=False) # TODO Should we set the number of classes to be the number of # classes in the current task? loss_i = evaluate( model=model, dataloader=dataloader, loss_name=f"[{i}]", scaler=scaler, knn_classifier=knn_classifier, num_classes=num_classes, ) total_test_loss.absorb(loss_i) logger.info(f"KNN Test[{i}] Acc: {loss_i.accuracy:.2%}") self.log(loss_i) if task_labels_at_test_time: model.on_task_switch(starting_task_id, training=False) logger.info(f"KNN Average Test Acc: {total_test_loss.accuracy:.2%}") self.log(total_test_loss) return total_valid_loss, total_test_loss def evaluate( model: LightningModule, dataloader: DataLoader, loss_name: str, scaler: StandardScaler, knn_classifier: KNeighborsClassifier, num_classes: int, ) -> Loss: """Evaluates the 'quality of representations' using a KNN. Assumes that the knn classifier was fitted on the same classes as the ones present in the dataloader. Args: model (Classifier): a Classifier model to use to encode samples. dataloader (DataLoader): a dataloader. loss_name (str): name to give to the resulting loss. scaler (StandardScaler): the scaler used during fitting. knn_classifier (KNeighborsClassifier): The KNN classifier. Returns: Loss: The loss object containing metrics and a 'total loss' which isn't a tensor in this case (since passing through the KNN isn't a differentiable operation). """ h_x_test, y_test = get_hidden_codes_array( model, dataloader, description=f"KNN ({loss_name})", ) train_classes = set(knn_classifier.classes_) test_classes = set(y_test) # Check that the same classes were used. assert test_classes.issubset(train_classes), ( f"y and y_test should contain the same classes: " f"(train classes: {train_classes}, " f"test classes: {test_classes})." ) test_loss = get_knn_performance( x_t=h_x_test, y_t=y_test, loss_name=loss_name, scaler=scaler, knn_classifier=knn_classifier, num_classes=num_classes, ) test_loss.loss = torch.as_tensor(test_loss.loss) logger.info(f"{loss_name} Acc: {test_loss.accuracy:.2%}") return test_loss def get_hidden_codes_array( model: LightningModule, dataloader: DataLoader, description: str = "KNN" ) -> Tuple[np.ndarray, np.ndarray]: """Gets the hidden vectors and corresponding labels.""" h_x_list: List[np.ndarray] = [] y_list: List[np.ndarray] = [] for batch in pbar(dataloader, description, leave=False): # TODO: Debug this, make sure this callback still works. x, y = batch assert isinstance(x, Tensor), type(x) # We only do KNN with examples that have a label. assert y is not None, f"Should have a 'y' for now! {x}, {y}" if y is not None: # TODO: There will probably be some issues with trying to use # the model's encoder to encode stuff when using DataParallel or # DistributedDataParallel, as PL might be interfering somehow. h_x = model.encode(x.to(model.device)) h_x_list.append(h_x.detach().cpu().numpy()) y_list.append(y.detach().cpu().numpy()) codes = np.concatenate(h_x_list) labels = np.concatenate(y_list) return codes.reshape(codes.shape[0], -1), labels def fit_knn( x: np.ndarray, y: np.ndarray, num_classes: int, options: KnnClassifierOptions = None, loss_name: str = "knn", ) -> Tuple[Loss, StandardScaler, KNeighborsClassifier]: # print(x.shape, y.shape, x_t.shape, y_t.shape) options = options or KnnClassifierOptions() scaler = StandardScaler() x_s = scaler.fit_transform(x) # Create and train the Knn Classifier using the options as the kwargs knn_classifier = KNeighborsClassifier(**asdict(options)).fit(x_s, y) train_loss = get_knn_performance( x_t=x, y_t=y, scaler=scaler, knn_classifier=knn_classifier, num_classes=num_classes, ) return train_loss, scaler, knn_classifier def get_knn_performance( x_t: np.ndarray, y_t: np.ndarray, scaler: StandardScaler, knn_classifier: KNeighborsClassifier, num_classes: int, loss_name: str = "KNN", ) -> Loss: # Flatten the inputs to two dimensions only. x_t = x_t.reshape(x_t.shape[0], -1) assert len(x_t.shape) == 2 x_t = scaler.transform(x_t) y_t_prob = knn_classifier.predict_proba(x_t) classes = knn_classifier.classes_ # make sure the classes are sorted: assert np.array_equal(sorted(classes), classes) if y_t_prob.shape[-1] == num_classes: y_t_logits = y_t_prob else: # Not all classes were encountered, so we need to 'expand' the predicted # logits to the right shape. logger.info(f"{y_t_prob.shape} {num_classes}") num_classes = max(num_classes, y_t_prob.shape[-1]) y_t_logits = np.zeros([y_t_prob.shape[0], num_classes], dtype=y_t_prob.dtype) for i, logits in enumerate(y_t_prob): for label, logit in zip(classes, logits): y_t_logits[i][label - 1] = logit ## We were constructing this to reorder the classes in case the ordering was ## not the same between the KNN's internal `classes_` attribute and the task ## classes, However I'm not sure if this is necessary anymore. # y_t_logits = np.zeros((y_t.size, y_t.max() + 1)) # for i, label in enumerate(classes): # y_t_logits[:, label] = y_t_prob[:, i] # We get the Negative Cross Entropy using the scikit-learn function, but we # could instead get it using pytorch's function (maybe even inside the # Loss object! nce_t = log_loss(y_true=y_t, y_pred=y_t_prob, labels=classes) # BUG: There is sometimes a case where some classes aren't present in # `classes_`, and as such the ClassificationMetrics object created in the # Loss constructor has an error. test_loss = Loss(loss_name, loss=nce_t, y_pred=y_t_logits, y=y_t) return test_loss from simple_parsing.helpers.serialization import register_decoding_fn register_decoding_fn(KnnCallback, lambda v: v) ================================================ FILE: sequoia/common/callbacks/vae_callback.py ================================================ from dataclasses import dataclass from typing import Optional import torch from pytorch_lightning import Callback, Trainer from torch import Tensor from torchvision.utils import save_image from sequoia.methods.aux_tasks.reconstruction import AEReconstructionTask, VAEReconstructionTask from sequoia.methods.models import BaseModel from sequoia.utils.logging_utils import get_logger logger = get_logger(__name__) @dataclass class SaveVaeSamplesCallback(Callback): """Callback which saves some generated/reconstructed samples. Reconstructs and/or generates samples periodically during training if any of of the autoencoder/generative model based auxiliary tasks are used. """ def __post_init__(self, *args, **kwargs): self.reconstruction_task: Optional[AEReconstructionTask] = None self.generation_task: Optional[VAEReconstructionTask] = None self.latents_batch: Optional[Tensor] = None self.model: BaseModel self.trainer: Trainer def setup(self, trainer, pl_module, stage: str): """Called when fit or test begins""" super().setup(trainer, pl_module, stage) def on_train_start(self, trainer, pl_module): """Called when the train begins.""" self.trainer = trainer self.model = pl_module from sequoia.methods.models.base_model.self_supervised_model import SelfSupervisedModel if isinstance(pl_module, SelfSupervisedModel): # if our model has auxiliary tasks (i.e., if it's a self-supervised model.) if VAEReconstructionTask.name in self.model.tasks: self.reconstruction_task = self.model.tasks[VAEReconstructionTask.name] self.generation_task = self.reconstruction_task self.latents_batch = torch.randn(64, self.model.hp.hidden_size) elif AEReconstructionTask.name in pl_module.tasks: self.reconstruction_task = self.model.tasks[AEReconstructionTask.name] self.generation_task = None def on_train_epoch_end(self, trainer: Trainer, pl_module: BaseModel): # do something if self.generation_task: # Save a batch of fake images after each epoch. self.generate_samples() ## Reconstruct some samples after each epoch. # TODO: change this to use an interval instead. x_batch = None if x_batch is not None: self.reconstruct_samples(x_batch) @torch.no_grad() def reconstruct_samples(self, data: Tensor): if not self.reconstruction_task or not self.reconstruction_task.enabled: return n = min(data.size(0), 16) originals = data[:n] reconstructed = self.reconstruction_task.reconstruct(originals) comparison = torch.cat([originals, reconstructed]) reconstruction_images_dir = self.model.config.log_dir / "reconstruction" reconstruction_images_dir.mkdir(parents=True, exist_ok=True) file_name = reconstruction_images_dir / f"step_{self.trainer.global_step:08d}.png" comparison = comparison.cpu().detach() # TODO: Debug this: # import wandb # if self.trainer.logger: # self.trainer.logger.log({"reconstruction": wandb.Image(comparison)}) save_image(comparison, file_name, nrow=n) @torch.no_grad() def generate_samples(self): if not self.generation_task or not self.generation_task.enabled: return n = 64 latents = self.latents_batch fake_samples = self.generation_task.generate(latents) fake_samples = fake_samples.cpu().reshape(n, *reversed(self.model.setting.dims)) # fake_samples = (fake_samples * 255).astype(np.uint8) generation_images_dir = self.model.config.log_dir / "generated_samples" generation_images_dir.mkdir(parents=True, exist_ok=True) file_name = generation_images_dir / f"step_{self.trainer.global_step:08d}.png" # import wandb # if self.model.logger: # self.model.logger.experiment.log({"generated": wandb.Image(fake_samples)}) save_image(fake_samples, file_name, normalize=True) logger.debug(f"saved image at path {file_name}") ================================================ FILE: sequoia/common/config/__init__.py ================================================ from .config import Config from .wandb_config import WandbConfig ================================================ FILE: sequoia/common/config/config.py ================================================ """ Config dataclasses for use with pytorch lightning. @author Fabrice Normandin (@lebrice) """ import os from dataclasses import dataclass from pathlib import Path from typing import Optional import numpy as np import torch from pytorch_lightning import seed_everything from pyvirtualdisplay import Display from simple_parsing import Serializable, flag from sequoia.utils.logging_utils import get_logger from sequoia.utils.parseable import Parseable # from .trainer_config import TrainerConfig logger = get_logger(__name__) virtual_display = None @dataclass class Config(Serializable, Parseable): """Configuration options for an experiment. TODO: This should contain configuration options that are not specific to either the Setting or the Method, or common to both. For instance, the random seed, or the log directory, wether CUDA is to be used, etc. """ # Directory containing the datasets. data_dir: Path = Path(os.environ.get("SLURM_TMPDIR", os.environ.get("DATA_DIR", "data"))) # Directory containing the results of an experiment. log_dir: Path = Path(os.environ.get("RESULTS_DIR", "results")) # Run in Debug mode: no wandb logging, extra output. debug: bool = flag(False) # Wether to render the environment observations. Slows down training. render: bool = flag(False) # Enables more verbose logging. verbose: bool = flag(False) # Number of workers for the dataloaders. num_workers: Optional[int] = None # Random seed. seed: Optional[int] = None # Which device to use. Defaults to 'cuda' if available. device: torch.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") def __post_init__(self): self.seed_everything() self._display: Optional[Display] = None self.rng = np.random.default_rng(self.seed) self.log_dir = Path(self.log_dir) self.data_dir = Path(self.data_dir) def __del__(self): if self._display: self._display.stop() def get_display(self) -> Optional[Display]: if self._display: return self._display if not self.render: # If `--render` isn't set, then try to create a virtual display. # This has the same effect as running the script with xvfb-run try: virtual_display = Display(visible=False, size=(1366, 768)) virtual_display.start() self._display = virtual_display except Exception as e: logger.warning( RuntimeWarning( f"Rendering is disabled, but we were unable to start the " f"virtual display! {e}\n" f"Make sure that xvfb is installed on your machine if you " f"want to prevent rendering the environment's observations." ) ) return self._display def seed_everything(self) -> None: if self.seed is not None: seed_everything(self.seed) ================================================ FILE: sequoia/common/config/wandb_config.py ================================================ """TODO: Re-enable the wandb stuff (disabled for now). """ import os import re from dataclasses import dataclass from pathlib import Path from typing import * from pytorch_lightning.loggers import WandbLogger from simple_parsing import field, list_field import wandb from sequoia.utils.logging_utils import get_logger from sequoia.utils.serialization import Serializable def patched_monitor(): vcr = wandb.util.get_module( "gym.wrappers.monitoring.video_recorder", required="Couldn't import the gym python package, install with pip install gym", ) print(f"Using patched version of `wandb.gym.monitor()`") if hasattr(vcr.ImageEncoder, "orig_close"): print(f"wandb.gym.monitor() has already been called.") return else: vcr.ImageEncoder.orig_close = vcr.ImageEncoder.close def close(self): vcr.ImageEncoder.orig_close(self) m = re.match(r".+(video\.\d+).+", self.output_path) if m: key = m.group(1) else: key = "videos" wandb.log({key: wandb.Video(self.output_path)}) vcr.ImageEncoder.close = close wandb.patched["gym"].append(["gym.wrappers.monitoring.video_recorder.ImageEncoder", "close"]) import wandb.integration.gym wandb.integration.gym.monitor = patched_monitor # GYM_MONITOR = os.environ.get("GYM_MONITOR", "") # if not GYM_MONITOR: # wandb.gym.monitor() # os.environ["GYM_MONITOR"] = "True" # else: # assert False, "importing this a second time?" logger = get_logger(__name__) @dataclass class WandbConfig(Serializable): """Set of configurations options for calling wandb.init directly.""" # Which user to use entity: str = "" # project name to use in wandb. project: str = "" # Name used to easily group runs together. # Used to create a parent folder that will contain the `run_name` directory. # A unique string shared by all runs in a given group # Used to create a parent folder that will contain the `run_name` directory. group: Optional[str] = None # Wandb run name. If None, will use wandb's automatic name generation run_name: Optional[str] = None # Identifier unique to each individual wandb run. When given, will try to # resume the corresponding run, generates a new ID each time. run_id: Optional[str] = None # An run number is used to differentiate different iterations of the same experiment. # Runs with the same name can be later grouped with wandb to produce stderr plots. # TODO: Could maybe use the run_id instead? run_number: Optional[int] = None # Path where the wandb files should be stored. If the 'WANDB_DIR' # environment variable is set, uses that value. Otherwise, defaults to # the value of "/wandb" wandb_path: Optional[Path] = ( Path(os.environ["WANDB_DIR"]) if "WANDB_DIR" in os.environ else None ) # Tags to add to this run with wandb. tags: List[str] = list_field() # Notes about this particular experiment. (will be logged to wandb if used.) notes: Optional[str] = None # Root Logging directory. log_dir_root: Path = Path("results") monitor_gym: bool = True # Wandb api key. Useful for preventing the login prompt from wandb from appearing # when running on clusters or docker-based setups where the environment variables # aren't always shared. wandb_api_key: Optional[Union[str, Path]] = field( default=os.environ.get("WANDB_API_KEY"), to_dict=False, # Do not serialize this field. repr=False, # Do not show this field in repr(). ) # Run offline (data can be streamed later to wandb servers). offline: bool = False # Enables or explicitly disables anonymous logging. anonymous: bool = False # Sets the version, mainly used to resume a previous run. version: Optional[str] = None # Save checkpoints in wandb dir to upload on W&B servers. log_model: bool = False # Class variables used to check wether wandb.login has already been called or not. logged_in: ClassVar[bool] = False key_configured: ClassVar[bool] = False @property def log_dir(self): return self.log_dir_root.joinpath( (self.project or ""), (self.group or ""), (self.run_name or "default"), (f"run_{self.run_number}" if self.run_number is not None else ""), ) def wandb_login(self) -> bool: """Calls `wandb.login()`. Returns ------- bool If the key is configured. """ key = None if self.wandb_api_key is not None and self.project: if Path(self.wandb_api_key).is_file(): key = Path(self.wandb_api_key).read_text() else: key = str(self.wandb_api_key) assert isinstance(key, str) cls = type(self) if not cls.logged_in: cls.key_configured = wandb.login(key=key) cls.logged_in = True return cls.key_configured def wandb_init_kwargs(self) -> Dict: """Return the kwargs to pass to wandb.init()""" if self.run_name is None: # TODO: Create a run name using the coefficients of the tasks, etc? # At the moment, if no run name is given, the 'random' name from wandb is used. pass if self.wandb_path is None: self.wandb_path = self.log_dir_root / "wandb" self.wandb_path.mkdir(parents=True, mode=0o777, exist_ok=True) return dict( dir=str(self.wandb_path), project=self.project, entity=self.entity, name=self.run_name, id=self.run_id, group=self.group, notes=self.notes, reinit=True, tags=self.tags, resume="allow", monitor_gym=self.monitor_gym, ) def wandb_init(self, config_dict: Dict = None) -> wandb.wandb_run.Run: """Executes the call to `wandb.init()`. TODO(@lebrice): Not sure if it still makes sense to call `wandb.init` ourselves when using Pytorch Lightning, should probably ask @jeromepl for advice on this. Args: config_dict (Dict): The configuration dictionary. Usually obtained by calling `to_dict()` on a `Serializable` dataclass, or `asdict()` on a regular dataclass. Returns: wandb.wandb_run.Run: Whatever gets returned by `wandb.init()`. """ logger.info(f"Wandb run id: {self.run_id}") logger.info( f"Using wandb. Group name: {self.group} run name: {self.run_name}, " f"log_dir: {self.log_dir}" ) self.wandb_login() init_kwargs = self.wandb_init_kwargs() init_kwargs["config"] = config_dict run = wandb.init(**init_kwargs) logger.info(f"Run: {run}") if run: if self.run_name is None: self.run_name = run.name # run.save() if run.resumed: # TODO: add *proper* wandb resuming, probaby by using @nitarshan 's md5 id cool idea. # wandb.restore(self.log_dir / "checkpoints") pass return run def make_logger(self, wandb_parent_dir: Path = None) -> WandbLogger: logger.info(f"Creating a WandbLogger with using options {self}.") self.wandb_login() wandb_logger = WandbLogger( name=self.run_name, save_dir=str(wandb_parent_dir) if wandb_parent_dir else None, offline=self.offline, id=self.run_id, anonymous=self.anonymous, version=self.version, project=self.project, tags=self.tags, log_model=self.log_model, entity=self.entity, group=self.group, monitor_gym=self.monitor_gym, reinit=True, ) return wandb_logger ================================================ FILE: sequoia/common/gym_wrappers/__init__.py ================================================ """ Contains some potentially useful gym wrappers. """ from .add_done import AddDoneToObservation from .add_info import AddInfoToObservation from .convert_tensors import ConvertToFromTensors from .env_dataset import EnvDataset from .multi_task_environment import MultiTaskEnvironment from .pixel_observation import PixelObservationWrapper from .policy_env import PolicyEnv from .smooth_environment import SmoothTransitions from .step_callback_wrapper import PeriodicCallback, StepCallback, StepCallbackWrapper from .transform_wrappers import TransformAction, TransformObservation, TransformReward from .utils import IterableWrapper, RenderEnvWrapper, has_wrapper ================================================ FILE: sequoia/common/gym_wrappers/action_limit.py ================================================ """ IDEA: same as ObservationLimit, for for the number of total actions (steps). """ import gym from gym.error import ClosedEnvironmentError from sequoia.utils import get_logger from .utils import IterableWrapper logger = get_logger(__name__) class ActionCounter(IterableWrapper): """Wrapper that counts the total number of actions performed so far. (including those in the individual environments when wrapping a VectorEnv.) """ def __init__(self, env: gym.Env): super().__init__(env=env) self._action_counter: int = 0 def step_count(self) -> int: return self._action_counter def action_count(self) -> int: return self._action_counter def step(self, action): obs, reward, done, info = self.env.step(action) self._action_counter += self.env.num_envs if self.is_vectorized else 1 return obs, reward, done, info class ActionLimit(ActionCounter): """Closes the env when `max_steps` actions have been performed *in total*. For vectorized environments, each step consumes up to `num_envs` from this total budget, i.e. the step counter is incremented by the batch size at each step. """ def __init__(self, env: gym.Env, max_steps: int): super().__init__(env=env) self._max_steps = max_steps self._initial_reset = False self._is_closed: bool = False @property def max_steps(self) -> int: return self._max_steps def __len__(self): return self.max_steps def closed_error_message(self) -> str: return f"Env reached max number of steps ({self._max_steps})" def step(self, action): if self._action_counter >= self._max_steps: raise ClosedEnvironmentError(f"Env reached max number of steps ({self._max_steps})") obs, reward, done, info = super().step(action) # logger.debug(f"(step {self._action_counter}/{self._max_steps})") # BUG: If we dont use >=, then iteration with EnvDataset doesn't work. if self._action_counter >= self._max_steps: self.close() # done = True # info["truncated"] = True return obs, reward, done, info ================================================ FILE: sequoia/common/gym_wrappers/action_limit_test.py ================================================ from typing import List import gym import pytest from gym.wrappers import TimeLimit from sequoia.common.gym_wrappers.env_dataset import EnvDataset from .action_limit import ActionLimit def test_basics(): env = gym.make("CartPole-v0") env = ActionLimit(env, max_steps=10) def test_EnvDataset_of_ActionLimit(): max_episode_steps = 10 max_steps = 100 env = gym.make("CartPole-v0") env = TimeLimit(env, max_episode_steps=max_episode_steps) env = ActionLimit(env, max_steps=max_steps) env = EnvDataset(env) done = False episode_steps: List[int] = [] total_steps = 0 for episode in range(15): print(f"Staring episode {episode}, env.is_closed(): {env.is_closed()}") step = None for step, obs in enumerate(env): print(f"Episode {episode}, Step {step}, obs {obs} {env.is_closed()}") assert step <= max_episode_steps env.send(env.action_space.sample()) total_steps += 1 assert step is not None # NOTE: Here we have the last 'step' as 9. episode_steps.append(step) assert total_steps <= max_steps if total_steps == max_steps: break assert env.is_closed() assert sum(step + 1 for step in episode_steps) == max_steps @pytest.mark.xfail( reason="FIXME: Shouldn't use CartPole env for this test since episodes aren't " "always longer than 10." ) def test_ActionLimit_of_EnvDataset(): max_episode_steps = 10 max_steps = 100 env = gym.make("CartPole-v0") env = TimeLimit(env, max_episode_steps=max_episode_steps) env = EnvDataset(env) env = ActionLimit(env, max_steps=max_steps) env.seed(123) done = False episode_steps: List[int] = [] for episode in range(10): print(f"Staring episode {episode}, env.is_closed(): {env.is_closed()}") step = 0 for step, obs in enumerate(env): print(f"Episode {episode}, Step {step}, obs {obs} {env.is_closed()}") assert step <= max_episode_steps env.send(env.action_space.sample()) assert step > 0 # NOTE: Here we have the last 'step' as 9. episode_steps.append(step) assert env.is_closed() assert sum(step + 1 for step in episode_steps) == max_steps from sequoia.settings.sl.wrappers.measure_performance_test import with_is_last @pytest.mark.xfail( reason=( "BUG: Why is the BaseMethod working fine on a `TraditionalRLSetting, but " "not on an IncrementalRLSetting? Seems like the 'max_steps' isn't enforced the " " same way in both somehow." ) ) def test_delayed_EnvDataset_of_ActionLimit(): """Same test as above, however introduce a delay (like what's happening in the pl.Trainer) between the items sent by the trainer and the rewards returned by the env. """ max_episode_steps = 10 max_steps = 100 env = gym.make("CartPole-v0") env = TimeLimit(env, max_episode_steps=max_episode_steps) env = EnvDataset(env) env = ActionLimit(env, max_steps=max_steps) done = False episode_steps: List[int] = [] for episode in range(10): print(f"Staring episode {episode}, env.is_closed(): {env.is_closed()}") step = 0 for step, (obs, is_last) in enumerate(with_is_last(env)): print(f"Episode {episode}, Step {step}, obs {obs} {env.is_closed()}") assert step <= max_episode_steps env.send(env.action_space.sample()) if step == max_episode_steps - 1: assert is_last assert step > 0 # NOTE: Here we have the last 'step' as 9. episode_steps.append(step) assert env.is_closed() assert sum(step + 1 for step in episode_steps) == max_steps ================================================ FILE: sequoia/common/gym_wrappers/add_done.py ================================================ """ Wrapper that adds 'done' as part of the environment's observations. """ from dataclasses import is_dataclass, replace from functools import singledispatch from typing import Any, Dict, Sequence, Tuple, TypeVar, Union import gym import numpy as np from gym import Space, spaces from gym.vector.utils import batch_space from torch import Tensor from sequoia.common.spaces import TypedDictSpace from .utils import IterableWrapper T = TypeVar("T") Bool = TypeVar("Bool", bound=Union[bool, Sequence[bool]]) K = TypeVar("K") V = TypeVar("V") @singledispatch def add_done(observation: Any, done: Any) -> Any: """Generic function that adds the provided `done` value to an observation. Returns the modified observation, which might not always be of the same type. """ if is_dataclass(observation): return replace(observation, done=done) raise NotImplementedError( f"Function add_done has no handler registered for observations of type " f"{type(observation)}." ) @add_done.register(int) @add_done.register(float) @add_done.register(Tensor) @add_done.register(np.ndarray) def _add_done_to_array_obs(observation: T, done: bool) -> Dict[str, Union[T, bool]]: # TODO: use 'x' or 'observation'? return {"x": observation, "done": done} @add_done.register(tuple) def _add_done_to_tuple_obs(observation: Tuple, done: bool) -> Tuple: return observation + (done,) @add_done.register(dict) def _add_done_to_dict_obs(observation: Dict[K, V], done: bool) -> Dict[K, Union[V, bool]]: assert "done" not in observation observation["done"] = done return observation @add_done.register def add_done_to_space(observation: Space, done: Space) -> Space: """Adds the space of the 'done' value to the given space. By default, `done` corresponds to what you'd get from a single (i.e. non-vectorized) environment. """ raise NotImplementedError( f"No handler registered for spaces of type {type(observation)}. " f"(value = {observation}, done={done})" ) @add_done.register(spaces.Discrete) @add_done.register(spaces.MultiDiscrete) @add_done.register(spaces.MultiBinary) @add_done.register(spaces.Box) def _add_done_to_box_space(observation: Space, done: Space) -> spaces.Dict: # TODO: Use 'x' or 'observation' as the key? return TypedDictSpace( x=observation, done=done, ) @add_done.register def _add_done_to_tuple_space(observation: spaces.Tuple, done: Space) -> spaces.Tuple: return spaces.Tuple( [ *observation.spaces, done, ] ) @add_done.register def _add_done_to_dict_space(observation: spaces.Dict, done: Space) -> spaces.Dict: new_spaces = observation.spaces.copy() assert "done" not in new_spaces, "space shouldn't already have a 'done' key." new_spaces["done"] = done return type(observation)(new_spaces) class AddDoneToObservation(IterableWrapper): """Wrapper that adds the 'done' from step to the Need to add the 'done' vector to the observation, so we can get access to the 'end of episode' signal in the shared_step, since when iterating over the env like a dataloader, the yielded items only have the observations, and dont have the 'done' vector. (so as to be consistent with supervised learning). NOTE: NEVER use this *BEFORE* batching, because of how the 'reset' works in all VectorEnvs, the observations will always be the 'new' ones, so `done` (in the obs) will always be False! """ def __init__(self, env: gym.Env, done_space: Space = None): super().__init__(env) # boolean value. (0 or 1) if done_space is None: done_space = spaces.Box(0, 1, (), dtype=np.bool) if self.is_vectorized: self.single_observation_space = add_done(self.single_observation_space, done_space) done_space = batch_space(done_space, self.env.num_envs) self.done_space = done_space self.observation_space = add_done(self.env.observation_space, self.done_space) def reset(self, **kwargs): observation = self.env.reset() if self.is_vectorized: done = self.done_space.low else: done = False return add_done(observation, done) def step(self, action): observation, reward, done, info = self.env.step(action) observation = add_done(observation, done) return observation, reward, done, info ================================================ FILE: sequoia/common/gym_wrappers/add_info.py ================================================ """ Wrapper that adds the 'info' as a part of the environment's observations. """ from dataclasses import is_dataclass, replace from functools import singledispatch from typing import Dict, Sequence, Tuple, TypeVar, Union import gym import numpy as np from gym import Space, spaces from gym.vector import VectorEnv from gym.vector.utils import batch_space from torch import Tensor from .utils import IterableWrapper Info = TypeVar("Info", bound=Union[Dict, Sequence[Dict]]) K = TypeVar("K") V = TypeVar("V") @singledispatch def add_info(observation, info): """Generic function that adds the provided `info` value to an observation. Returns the modified observation, which might not always be of the same type. NOTE: Can also be applied to spaces. """ if is_dataclass(observation): # TODO: This assumes that the dataclass already has the 'info' field, if # that dataclass is frozen. return replace(observation, info=info) raise NotImplementedError( f"Function add_info has no handler registered for inputs of type " f"{type(observation)}." ) @add_info.register(Tensor) @add_info.register(np.ndarray) def _add_info_to_array_obs(observation: np.ndarray, info: Info) -> Tuple[np.ndarray, Info]: return (observation, info) @add_info.register(tuple) def _add_info_to_tuple_obs(observation: Tuple, info: Info) -> Tuple: return observation + (info,) @add_info.register(dict) def _add_info_to_dict_obs(observation: Dict[K, V], info: Info) -> Dict[K, Union[V, Info]]: assert "info" not in observation observation["info"] = info return observation @add_info.register(spaces.Space) def add_info_to_space(observation: Space, info: Space) -> Space: """Adds the space of the 'info' value from the env to this observation space. """ raise NotImplementedError( f"No handler registered for spaces of type {type(observation)}. " f"(value = {observation})" ) @add_info.register def _add_info_to_box_space(observation: spaces.Box, info: Space) -> spaces.Tuple: return spaces.Tuple( [ observation, info, ] ) @add_info.register def _add_info_to_tuple_space(observation: spaces.Tuple, info: Space) -> spaces.Tuple: return spaces.Tuple( [ *observation.spaces, info, ] ) @add_info.register def _add_info_to_dict_space(observation: spaces.Dict, info: Space) -> spaces.Dict: new_spaces = observation.spaces.copy() assert "info" not in new_spaces, "space shouldn't already have an 'info' key." new_spaces["info"] = info return type(observation)(new_spaces) class AddInfoToObservation(IterableWrapper): # TODO: Need to add the 'info' dict to the Observation, so we can have # access to the final observation (which gets stored in the info dict at key # 'final_state'. # Do we through? # TODO: Should we also add the 'final state' to the observations as well? def __init__(self, env: gym.Env, info_space: spaces.Space = None): super().__init__(env) self.is_vectorized = isinstance(env.unwrapped, VectorEnv) # TODO: Should we make 'info_space' mandatory here? if info_space is None: # TODO: There seems to be some issues if we have an empty info space # before the batching. info_space = spaces.Dict({}) if self.is_vectorized: info_space = batch_space(info_space, self.env.num_envs) self.info_space = info_space self.observation = add_info(self.env.observation_space, self.info_space) def reset(self, **kwargs): observation = self.env.reset() info = {} if self.is_vectorized: info = np.array([{} for _ in range(self.env.num_envs)]) obs = add_info(observation, info) return obs def step(self, action): observation, reward, done, info = self.env.step(action) observation = add_info(observation, info) return observation, reward, done, info ================================================ FILE: sequoia/common/gym_wrappers/convert_tensors.py ================================================ from dataclasses import is_dataclass, replace import dataclasses from functools import singledispatch, wraps from typing import Any, Dict, Tuple, TypeVar, Union import gym import numpy as np import torch from gym import Space, spaces from torch import Tensor from sequoia.common.spaces.image import Image, ImageTensorSpace from sequoia.common.spaces.named_tuple import NamedTupleSpace from sequoia.common.spaces.typed_dict import TypedDictSpace from sequoia.utils.generic_functions import from_tensor, move # , to_tensor from sequoia.utils.logging_utils import get_logger from .utils import IterableWrapper @singledispatch def to_tensor(v, device: torch.device = None) -> Union[Tensor, Any]: """Converts `v` into a tensor if `v` is a value, otherwise convert the items of `v` to tensors. - If `v` is a list, tuple, or dict, then the items are converted to tensors recursively. - If `v` is a dataclass, converts the fields to Tensors using `to_tensor` recursively. Otherwise, just uses `torch.as_tensor(v, device=device)`. """ if v is None: return None if dataclasses.is_dataclass(v): return type(v)( **{ field.name: to_tensor(getattr(v, field.name), device=device) for field in dataclasses.fields(v) } ) return torch.as_tensor(v, device=device) @to_tensor.register(tuple) def _( v, device: torch.device = None, ): # NOTE: Choosing to convert tuples of things into tuples of tensor things, rather than torch # tensors. return tuple(to_tensor(v_i, device=device) for v_i in v) @to_tensor.register(dict) def _(v: Dict, device: torch.device = None) -> Dict: return type(v)(**{k: to_tensor(v_i, device=device) for k, v_i in v.items()}) logger = get_logger(__name__) T = TypeVar("T") S = TypeVar("S", bound=Space) # TODO: Add 'TensorSpace' space which wraps a given space, doing the same kinda thing # as in Sparse. class ConvertToFromTensors(IterableWrapper): """Wrapper that converts Tensors into samples/ndarrays and vice versa. Whatever comes into the env is converted into np.ndarrays or samples from the action space, and whatever comes out of the environment (observations, rewards, dones, etc.) get converted to Tensors. Also supports Dict/Tuple/etc observation/action spaces. Also makes it so the `sample` methods of both the observation and action spaces return Tensors, and that their `contains` methods also accept Tensors as an input. If `device` is given, created Tensors are moved to the provided device. """ def __init__(self, env: gym.Env, device: Union[torch.device, str] = None): super().__init__(env=env) self.device = device self.observation_space: Space = add_tensor_support( self.env.observation_space, device=device ) self.action_space: Space = add_tensor_support(self.env.action_space, device=device) self.reward_space: Space if hasattr(self.env, "reward_space"): self.reward_space = self.env.reward_space else: reward_range = getattr(self.env, "reward_range", (-np.inf, np.inf)) reward_shape: Tuple[int, ...] = () if self.is_vectorized: reward_shape = (self.env.num_envs,) self.reward_space = spaces.Box( reward_range[0], reward_range[1], reward_shape, np.float32 ) self.reward_space = add_tensor_support(self.reward_space, device=device) def reset(self, *args, **kwargs): obs = self.env.reset(*args, **kwargs) return self.observation(obs) def observation(self, observation): return to_tensor(observation, device=self.device) def action(self, action): if isinstance(self.action_space, spaces.MultiDiscrete) and is_dataclass(action): # TODO: Fixme, the actions don't currently fit their space! action_np = replace(action, y_pred=from_tensor(self.action_space, action.y_pred)) # FIXME: for now, unwrapping the actions action = action_np["y_pred"] return action return from_tensor(self.action_space, action) def reward(self, reward): return to_tensor(reward, device=self.device) def step(self, action): action = self.action(action) assert action in self.env.action_space, (action, self.env.action_space) result = self.env.step(action) observation, reward, done, info = result observation = self.observation(observation) reward = self.reward(reward) # NOTE: Not sure this is useful, actually! # done = torch.as_tensor(done, device=self.device) # We could actually do this! # info = np.ndarray(info) return observation, reward, done, info def supports_tensors(space: S) -> bool: # TODO: Remove this, instead use a generic function return getattr(space, "_supports_tensors", False) def has_tensor_support(space: S) -> bool: return supports_tensors(space) def _mark_supports_tensors(space: S) -> None: # TODO: Remove this! setattr(space, "_supports_tensors", True) @singledispatch def add_tensor_support(space: S, device: torch.device = None) -> S: """Modifies `space` so its `sample()` method produces Tensors, and its `contains` method also accepts Tensors. For Dict and Tuple spaces, all the subspaces are also modified recursively. Returns the modified Space. """ # Save the original methods so we can use them. sample = space.sample contains = space.contains if supports_tensors(space): # logger.debug(f"Space {space} already supports Tensors.") return space @wraps(space.sample) def _sample(*args, **kwargs): samples = sample(*args, **kwargs) samples = to_tensor(space, samples) if device: samples = move(samples, device) return samples @wraps(space.contains) def _contains(x: Union[Tensor, Any]) -> bool: x = from_tensor(space, x) return contains(x) space.sample = _sample space.contains = _contains _mark_supports_tensors(space) assert has_tensor_support(space) return space @add_tensor_support.register def _(space: Image, device: torch.device = None) -> Image: tensor_box = TensorBox( space.low, space.high, shape=space.shape, dtype=space.dtype, device=device ) return ImageTensorSpace.from_box(tensor_box) @add_tensor_support.register def _(space: spaces.Dict, device: torch.device = None) -> spaces.Dict: space = type(space)( **{key: add_tensor_support(value, device=device) for key, value in space.spaces.items()} ) # TODO: Remove this '_mark_supports_tensors' and instead use a generic function. _mark_supports_tensors(space) return space @add_tensor_support.register def _(space: TypedDictSpace, device: torch.device = None) -> TypedDictSpace: space = type(space)( {key: add_tensor_support(value, device=device) for key, value in space.spaces.items()}, dtype=space.dtype, ) _mark_supports_tensors(space) return space @add_tensor_support.register(NamedTupleSpace) def _(space: Dict, device: torch.device = None) -> Dict: space = type(space)( **{key: add_tensor_support(value, device=device) for key, value in space.items()}, dtype=space.dtype, ) _mark_supports_tensors(space) return space @add_tensor_support.register(spaces.Tuple) def _(space: Dict, device: torch.device = None) -> Dict: space = type(space)([add_tensor_support(value, device=device) for value in space.spaces]) _mark_supports_tensors(space) return space # TODO: Should this be moved to the place where these are defined instead? from sequoia.common.spaces.tensor_spaces import TensorBox, TensorDiscrete, TensorMultiDiscrete @add_tensor_support.register def _(space: spaces.Box, device: torch.device = None) -> spaces.Box: space = TensorBox(space.low, space.high, shape=space.shape, dtype=space.dtype, device=device) _mark_supports_tensors(space) return space @add_tensor_support.register def _(space: spaces.Discrete, device: torch.device = None) -> spaces.Box: space = TensorDiscrete(n=space.n, device=device) _mark_supports_tensors(space) return space @add_tensor_support.register def _(space: spaces.MultiDiscrete, device: torch.device = None) -> spaces.Box: space = TensorMultiDiscrete(nvec=space.nvec, device=device) _mark_supports_tensors(space) return space ================================================ FILE: sequoia/common/gym_wrappers/convert_tensors_test.py ================================================ from typing import Union import gym import pytest import torch from gym import spaces from torch import Tensor from sequoia.conftest import skipif_param from .convert_tensors import ConvertToFromTensors, add_tensor_support @pytest.mark.parametrize( "device", [ None, "cpu", skipif_param( not torch.cuda.is_available(), "cuda", reason="Cuda is required for this test", ), ], ) def test_convert_tensors_wrapper(device: Union[str, torch.device]): env_name = "CartPole-v0" env = gym.make(env_name) env = ConvertToFromTensors(env, device=device) obs = env.reset() assert isinstance(obs, Tensor) if device: assert obs.device.type == device action = env.action_space.sample() obs, reward, done, info = env.step(torch.as_tensor(action)) assert isinstance(obs, Tensor) assert isinstance(reward, Tensor) # TODO: Not quite sure this is the best thing to do: # assert isinstance(done, Tensor) # not sure this is useful! if device: assert obs.device.type == device assert reward.device.type == device # assert done.device.type == device from dataclasses import dataclass from typing import Optional from sequoia.common.batch import Batch from sequoia.common.spaces import NamedTupleSpace, TypedDictSpace @dataclass(frozen=True) class Foo(Batch): x: Tensor task_labels: Optional[Tensor] def test_preserves_dtype_of_namedtuple_space(): input_space = NamedTupleSpace( x=spaces.Box(0, 1, [32, 123, 123, 3]), task_labels=spaces.MultiDiscrete([5 for _ in range(32)]), dtype=Foo, ) output_space = add_tensor_support(input_space) assert output_space.dtype is input_space.dtype def test_preserves_dtype_of_typeddict_space(): input_space = TypedDictSpace( x=spaces.Box(0, 1, [32, 123, 123, 3]), task_labels=spaces.MultiDiscrete([5 for _ in range(32)]), dtype=Foo, ) output_space = add_tensor_support(input_space) assert output_space.dtype is input_space.dtype ================================================ FILE: sequoia/common/gym_wrappers/env_dataset.py ================================================ """ Creates an IterableDataset from a Gym Environment. """ import warnings from typing import Dict, Generic, Iterable, Iterator, Optional, Sequence, Tuple, TypeVar, Union import gym from gym.vector import VectorEnv from torch import Tensor from torch.utils.data import IterableDataset from sequoia.utils.logging_utils import get_logger from .utils import ActionType from .utils import MayCloseEarly as CloseableWrapper from .utils import ObservationType, RewardType, StepResult # from sequoia.settings.base.objects import Observations, Rewards, Actions logger = get_logger(__name__) Item = TypeVar("Item") class EnvDataset( CloseableWrapper, IterableDataset, Generic[ObservationType, ActionType, RewardType, Item], Iterable[Item], ): """Wrapper that exposes a Gym environment as an IterableDataset. This makes it possible to iterate over a gym env with an Active DataLoader. One pass through __iter__ is one episode. The __iter__ method can be called at most `max_episodes` times. """ def __init__( self, env: gym.Env, max_steps: Optional[int] = None, max_episodes: Optional[int] = None, max_steps_per_episode: Optional[int] = None, ): # TODO: Remove these options if max_steps: from .action_limit import ActionLimit env = ActionLimit(env, max_steps=max_steps) self._max_steps = max_steps if max_episodes: from .episode_limit import EpisodeLimit env = EpisodeLimit(env, max_episodes=max_episodes) self._max_episodes = max_episodes super().__init__(env=env) if isinstance(env.unwrapped, VectorEnv): if not max_steps_per_episode: warnings.warn( UserWarning( "Iterations through the dataset (episodes) could be " "infinitely long, since the env is a VectorEnv and " "max_steps_per_episode wasn't given!" ) ) # Maximum number of episodes # self._max_episodes = None # Maximum number of steps per iteration. # self._max_steps = None self._max_steps_per_episode = max_steps_per_episode # Number of steps performed in the current episode. self.n_steps_in_episode_: int = 0 # Total number of steps performed so far. self.n_steps_: int = 0 # Number of episodes performed in the environment. # Starts at -1 so the initial was_reset doesn't count as the end of an episode. self.n_episodes_: int = 0 # Number of times the `send` method was called. self.n_sends_: int = 0 self.observation_: Optional[ObservationType] = None self.action_: Optional[ActionType] = None self.reward_: Optional[RewardType] = None self.done_: Optional[Union[bool, Sequence[bool]]] = None self.info_: Optional[Union[Dict, Sequence[Dict]]] = None self.closed_: bool = False self.reset_: bool = False self.current_step_result_: StepResult = None self.previous_step_result_: StepResult = None def reset_counters(self): self.n_steps_ = 0 self.n_episodes_ = 0 self.n_sends_ = 0 self.n_steps_in_episode_ = 0 def observation(self, observation): return observation def action(self, action): return action def reward(self, reward): return reward def step(self, action) -> StepResult: if self.closed_ or self.is_closed(): if self.reached_episode_limit: raise gym.error.ClosedEnvironmentError( f"Env has already reached episode limit ({self._max_episodes}) and is closed." ) elif self.reached_step_limit: raise gym.error.ClosedEnvironmentError( f"Env has already reached step limit ({self._max_steps}) and is closed." ) else: raise gym.error.ClosedEnvironmentError( f"Can't call step on closed env. ({self.n_steps_})" ) # Here we add calls to the (potentially overwritten) 'observation', # 'action' and 'reward' methods. action = self.action(action) if isinstance(action, Tensor) and action.requires_grad: action = action.detach() observation, reward, done, info = super().step(action) observation = self.observation(observation) reward = self.reward(reward) self.n_steps_ += 1 self.n_steps_in_episode_ += 1 result = StepResult(observation, reward, done, info) self.previous_step_result_ = self.current_step_result_ self.current_step_result_ = result return result def __next__( self, ) -> Tuple[ObservationType, Union[bool, Sequence[bool]], Union[Dict, Sequence[Dict]]]: """Produces the next observations, or raises StopIteration. Returns ------- Tuple[ObservationType, Union[bool, Sequence[bool]], Union[Dict, Sequence[Dict]]] [description] Raises ------ gym.error.ClosedEnvironmentError If the env is already closed. gym.error.ResetNeeded If the env hasn't been reset before this is called. StopIteration When the step limit has been reached. StopIteration When the episode limit has been reached. RuntimeError When an action wasn't passed through 'send', and a default policy isn't set. """ # logger.debug(f"__next__ is being called at step {self.n_steps_}.") if self.closed_: raise gym.error.ClosedEnvironmentError("Env is closed.") if self.reached_episode_limit: logger.debug("Reached episode limit, raising StopIteration.") raise StopIteration if self.reached_step_limit: logger.debug("Reached step limit, raising StopIteration.") raise StopIteration if self.reached_episode_length_limit: logger.debug("Reached episode length limit, raising StopIteration.") raise StopIteration if not self.reset_: raise gym.error.ResetNeeded("Need to reset the env before you can call __next__") if self.action_ is None: raise RuntimeError("You have to send an action using send() between every observation.") if hasattr(self.action_, "detach"): self.action_ = self.action_.detach() self.observation_, self.reward_, self.done_, self.info_ = self.step(self.action_) return self.observation_ def send(self, action: ActionType) -> RewardType: """Sends an action to the environment, returning a reward. This can raise the same errors as calling __next__, namely, StopIteration, ResetNeeded, raise an error when if not called without """ assert action is not None, "Don't send a None action!" self.action_ = action self.observation_, self.reward_, self.done_, self.info_ = self.step(action) # self.observation_ = self.__next__() self.n_sends_ += 1 return self.reward_ def __iter__(self) -> Iterator[ObservationType]: """Iterator for an episode in the environment, which uses the 'active dataset' style with __iter__ and send. TODO: BUG: Wrappers applied on top of the EnvDataset won't have an effect on the values yielded by this iterator. Currently trying to fix this inside the IterableWrapper base class, but it's not that simple. TODO: To allow wrappers to also be iterable, we need to rename all the "private" attributes to "public" names, so that they can call something like: type(self.env).__iter__(self) (from within the wrapper). Yields ------- Observations Observations from the environment. Raises ------ RuntimeError [description] """ if self.closed_ or self.is_closed(): if self.reached_episode_limit: raise gym.error.ClosedEnvironmentError( f"Env has already reached episode limit ({self._max_episodes}) and is closed." ) elif self.reached_step_limit: raise gym.error.ClosedEnvironmentError( f"Env has already reached step limit ({self._max_steps}) and is closed." ) else: raise gym.error.ClosedEnvironmentError(f"Env is closed, can't iterate over it.") # First step reset automatically before iterating, if needed. if not self.reset_: self.observation_ = self.reset() self.done_ = False self.action_ = None self.reward_ = None assert self.observation_ is not None # Yield the first observation_. # TODO: What do we want to yield, actually? Just observations? yield self.observation_ if self.action_ is None: raise RuntimeError( f"You have to send an action using send() between every " f"observation. (env = {self})" ) # logger.debug(f"episode {self.n_episodes_}/{self._max_episodes}") while not any( [ self.done_is_true(), self.reached_step_limit, self.reached_episode_length_limit, self.is_closed(), ] ): # logger.debug(f"step {self.n_steps_}/{self._max_steps}, (episode {self.n_episodes_})") # Set those to None to force the user to call .send() self.action_ = None self.reward_ = None yield self.observation_ if self.action_ is None: raise RuntimeError( f"You have to send an action using send() between every " f"observation. (env = {self})" ) # Force the user to call reset() between episodes. self.reset_ = False self.n_episodes_ += 1 # logger.debug(f"self.n_steps: {self.n_steps_} self.n_episodes: {self.n_episodes_}") # logger.debug(f"Reached step limit: {self.reached_step_limit}") # logger.debug(f"Reached episode limit: {self.reached_episode_limit}") # logger.debug(f"Reached episode length limit: {self.reached_episode_length_limit}") if self.reached_episode_limit or self.reached_step_limit: logger.debug("Done iterating, closing the env.") self.close() @property def reached_step_limit(self) -> bool: if self._max_steps is None: return False return self.n_steps_ >= self._max_steps @property def reached_episode_limit(self) -> bool: if self._max_episodes is None: return False return self.n_episodes_ >= self._max_episodes @property def reached_episode_length_limit(self) -> bool: if self._max_steps_per_episode is None: return False return self.n_steps_in_episode_ >= self._max_steps_per_episode # @property def done_is_true(self) -> bool: """Returns wether self.done_ is True. This will always return False if the wrapped env is a VectorEnv, regardless of if the some of the values in the self.done_ array are true. This is because the VectorEnvs already reset the underlying envs when they have done=True. Returns ------- bool Wether the episode is considered "done" based on self.done_. """ if isinstance(self.done_, bool): return self.done_ if isinstance(self.env.unwrapped, VectorEnv): # VectorEnvs reset themselves, so we consider the "_done" as False, # regarless return False if isinstance(self.done_, Tensor) and not self.done_.shape: return bool(self.done_) raise RuntimeError( f"'done' should be a single boolean, but got " f"{self.done_} of type {type(self.done_)})" ) raise RuntimeError(f"Can't tell if we're done: self.done_={self.done_}") def reset(self, **kwargs) -> ObservationType: observation = self.env.reset(**kwargs) self.observation_ = self.observation(observation) self.reset_ = True self.n_steps_in_episode_ = 0 # self.n_episodes_ += 1 return self.observation_ def close(self) -> None: # This will stop the iterator on the next step. # self._max_steps = 0 self.closed_ = True self.action_ = None self.observation_ = None self.reward_ = None super().close() # TODO: calling `len` on an RL environment probably shouldn't work! (it should # behave the same exact way as an IterableDataset) # def __len__(self) -> Optional[int]: # if self._max_steps is None: # raise RuntimeError(f"The dataset has no length when max_steps is None.") # return self._max_steps def __add__(self, other): from sequoia.utils.generic_functions import concatenate return concatenate(self, other) ================================================ FILE: sequoia/common/gym_wrappers/env_dataset_test.py ================================================ from functools import partial from typing import ClassVar, Type import gym import numpy as np import pytest from gym import spaces from sequoia.common.transforms import Transforms from sequoia.conftest import DummyEnvironment, atari_py_required from sequoia.settings.rl.continual.make_env import make_batched_env from .env_dataset import EnvDataset from .transform_wrappers import TransformObservation class TestEnvDataset: # NOTE: We do this so that other tests for potential subclasses or wrappers around # an env dataset can reuse this while changing the type of wrapper used (for example # in the tests for `EnvProxy`). EnvDataset: ClassVar[Type[EnvDataset]] = EnvDataset @pytest.fixture() def dummy_env_fn(self): return DummyEnvironment def test_step_normally_works_fine(self, dummy_env_fn: Type[DummyEnvironment]): env = dummy_env_fn() env = self.EnvDataset(env) env.seed(123) obs = env.reset() assert obs == 0 obs, reward, done, info = env.step(0) assert (obs, reward, done, info) == (0, 5, False, {}) obs, reward, done, info = env.step(1) assert (obs, reward, done, info) == (1, 4, False, {}) obs, reward, done, info = env.step(1) assert (obs, reward, done, info) == (2, 3, False, {}) obs, reward, done, info = env.step(2) assert (obs, reward, done, info) == (1, 4, False, {}) obs, reward, done, info = env.step(1) assert (obs, reward, done, info) == (2, 3, False, {}) obs, reward, done, info = env.step(1) assert (obs, reward, done, info) == (3, 2, False, {}) obs, reward, done, info = env.step(1) assert (obs, reward, done, info) == (4, 1, False, {}) obs, reward, done, info = env.step(1) assert (obs, reward, done, info) == (5, 0, True, {}) env.reset() obs, reward, done, info = env.step(0) assert (obs, reward, done, info) == (0, 5, False, {}) def test_iterating_with_send(self, dummy_env_fn: Type[DummyEnvironment]): env = dummy_env_fn(target=5) env = self.EnvDataset(env) env.seed(123) actions = [0, 1, 1, 2, 1, 1, 1, 1, 0, 0, 0] expected_obs = [0, 0, 1, 2, 1, 2, 3, 4, 5] expected_rewards = [5, 4, 3, 4, 3, 2, 1, 0] expected_dones = [False, False, False, False, False, False, False, True] reset_obs = 0 # obs = env.reset() # assert obs == reset_obs n_calls = 0 for i, observation in enumerate(env): print(f"Step {i}: batch: {observation}") assert observation == expected_obs[i] action = actions[i] reward = env.send(action) assert reward == expected_rewards[i] # TODO: The episode will end as soon as 'done' is encountered, which means # that we will never be given the 'final' observation. In this case, the # DummyEnvironment will set done=True when the state is state = target = 5 # in this case. assert observation == 4 def test_raise_error_when_missing_action(self, dummy_env_fn: Type[DummyEnvironment]): env = dummy_env_fn() with self.EnvDataset(env) as env: env.reset() env.seed(123) with pytest.raises(RuntimeError): for i, observation in zip(range(5), env): pass def test_doesnt_raise_error_when_action_sent(self, dummy_env_fn: Type[DummyEnvironment]): env = dummy_env_fn() with self.EnvDataset(env) as env: env.reset() env.seed(123) for i, obs in zip(range(5), env): assert obs in env.observation_space reward = env.send(env.action_space.sample()) def test_max_episodes(self): max_episodes = 3 env = self.EnvDataset( env=gym.make("CartPole-v0"), max_episodes=max_episodes, ) env.seed(123) for episode in range(max_episodes): # This makes use of the fact that given this seed, the episode should only # last a set number of frames. for i, observation in enumerate(env): print(f"step {i} {observation}") action = 0 reward = env.send(action) if i >= 50: assert False, "The episode should never be longer than about 10 steps!" with pytest.raises(gym.error.ClosedEnvironmentError): for i, observation in enumerate(env): print(f"step {i} {observation}") env.send(env.action_space.sample()) def test_max_steps(self): epochs = 3 max_steps = 5 env = self.EnvDataset( env=gym.make("CartPole-v0"), max_steps=max_steps, ) all_rewards = [] all_observations = [] with env: # TODO: Should we could what is given back by 'reset' as an observation? all_observations.append(env.reset()) for i, batch in enumerate(env): assert i < max_steps, f"Max steps should have been respected: {i}" rewards = env.send(env.action_space.sample()) all_rewards.append(rewards) assert len(all_rewards) == max_steps with pytest.raises(gym.error.ClosedEnvironmentError): env.reset() with pytest.raises(gym.error.ClosedEnvironmentError): for i in range(10): print(i) observation = next(env) rewards = env.send(env.action_space.sample()) all_rewards.append(rewards) assert len(all_rewards) == max_steps def test_max_steps_per_episode(self): n_episodes = 4 max_steps_per_episode = 5 env = self.EnvDataset( env=gym.make("CartPole-v0"), max_steps_per_episode=max_steps_per_episode, ) all_observations = [] with env: for episode in range(n_episodes): env.reset() for i, batch in enumerate(env): assert ( i < max_steps_per_episode ), f"Max steps per episode should have been respected: {i}" rewards = env.send(env.action_space.sample()) assert i == max_steps_per_episode - 1 @pytest.mark.parametrize("env_name", ["CartPole-v0"]) @pytest.mark.parametrize("batch_size", [1, 2, 5, 10]) def test_not_setting_max_steps_per_episode_with_vector_env_raises_warning( self, env_name: str, batch_size: int ): from functools import partial from gym.vector import SyncVectorEnv env = SyncVectorEnv([partial(gym.make, env_name) for i in range(batch_size)]) with pytest.warns(UserWarning): dataset = self.EnvDataset(env) env.close() @atari_py_required def test_observation_wrapper_applies_to_yielded_objects(self): """Test that when an TransformObservation wrapper (or any wrapper that changes the Observations) is applied on the env, the observations that are yielded by the GymDataLoader are also transformed, in the same way as those returned by step() or reset(). """ env_name = "ALE/Breakout-v5" batch_size = 10 num_workers = 4 max_steps_per_episode = 100 wrapper = partial(TransformObservation, f=Transforms.channels_first) vector_env = make_batched_env(env_name, batch_size=batch_size, num_workers=num_workers) env = self.EnvDataset(vector_env, max_steps_per_episode=max_steps_per_episode) assert env.observation_space == spaces.Box(0, 255, (10, 210, 160, 3), np.uint8) env = TransformObservation(env, f=Transforms.channels_first) # env = wrapper(env) assert env.observation_space == spaces.Box(0, 255, (10, 3, 210, 160), np.uint8) # env = DummyWrapper(env) # assert env.observation_space == spaces.Box(0, 255 // 2, (10, 210, 160, 3), np.uint8) print("Before reset") reset_obs = env.reset() assert reset_obs in env.observation_space print("Before step") step_obs, _, _, _ = env.step(env.action_space.sample()) assert step_obs in env.observation_space # We need to send an action before we can do this. action = env.action_space.sample() print(f"Before send") reward = env.send(action) # TODO: Perhaps going to drop this API, because if really complicates the # wrappers. print("Before __next__") next_obs = next(env) assert next_obs.shape == env.observation_space.shape assert next_obs in env.observation_space print(f"Before iterating") # TODO: This still doesn't call the right .observation() method! for i, iter_obs in zip(range(3), env): assert iter_obs.shape == env.observation_space.shape assert iter_obs in env.observation_space action = env.action_space.sample() reward = env.send(action) env.close() @atari_py_required def test_iteration_with_more_than_one_wrapper(self): """Same as above, but with more than one wrapper applied on top of the EnvDataset. """ env_name = "ALE/Breakout-v5" batch_size = 10 num_workers = 4 max_steps_per_episode = 100 vector_env = make_batched_env(env_name, batch_size=batch_size, num_workers=num_workers) env = self.EnvDataset(vector_env, max_steps_per_episode=max_steps_per_episode) assert env.observation_space == spaces.Box(0, 255, (10, 210, 160, 3), np.uint8) env = TransformObservation(env, f=Transforms.channels_first) assert env.observation_space == spaces.Box(0, 255, (10, 3, 210, 160), np.uint8) env = TransformObservation(env, f=[Transforms.to_tensor, Transforms.resize_64x64]) assert env.observation_space == spaces.Box(0, 1.0, (10, 3, 64, 64), np.float32) # env = DummyWrapper(env) # assert env.observation_space == spaces.Box(0, 255 // 2, (10, 210, 160, 3), np.uint8) print("Before reset") reset_obs = env.reset().numpy() assert reset_obs in env.observation_space print("Before step") step_obs, _, _, _ = env.step(env.action_space.sample()) assert step_obs.numpy() in env.observation_space # We need to send an action before we can do this. action = env.action_space.sample() print(f"Before send") reward = env.send(action) print("Before __next__") next_obs = next(env).numpy() assert next_obs in env.observation_space print(f"Before iterating") # TODO: This still doesn't call the right .observation() method! for i, iter_obs in zip(range(3), env): assert iter_obs.shape == env.observation_space.shape assert iter_obs.numpy() in env.observation_space action = env.action_space.sample() reward = env.send(action) env.close() ================================================ FILE: sequoia/common/gym_wrappers/episode_limit.py ================================================ # IDEA: Limit the total number of episodes, even in vectorized # environments! import warnings from typing import Sequence, Union import gym import numpy as np from gym.error import ClosedEnvironmentError from gym.utils import colorize from sequoia.utils import get_logger from .utils import IterableWrapper logger = get_logger(__name__) class EpisodeCounter(IterableWrapper): """Closes the environment when a given number of episodes is performed. NOTE: This also applies to vectorized environments, i.e. the episode counter is incremented for when every individual environment reaches the end of an episode. """ def __init__(self, env: gym.Env): super().__init__(env=env) self._episode_counter: int = 0 # -1 to account for the initial reset? self._done: Union[bool, Sequence[bool]] = False if self.is_vectorized: self._done = np.zeros(self.env.num_envs, dtype=bool) self._initial_reset: bool = False def episode_count(self) -> int: return self._episode_counter def reset(self): obs = super().reset() if self._episode_counter >= self._max_episodes: raise ClosedEnvironmentError( f"Env reached max number of episodes ({self._max_episodes})" ) if self.is_vectorized: if not self._initial_reset: self._initial_reset = True self._episode_counter = 0 else: # Resetting all envs. n_unfinished_envs: int = (self._done == False).sum() self._episode_counter += n_unfinished_envs self._done[:] = False else: # Increment every time for non-vectorized env, or just once for # VectorEnvs. self._episode_counter += 1 return obs def step(self, action): obs, reward, done, info = self.env.step(action) if self.is_vectorized: self._episode_counter += (done == True).sum() else: # NOTE: We don't increment the episode counter based on `done` here # with non-vectorized environments. Instead, we cound the number of # calls to the `reset()` method. pass # if done: # self._episode_counter += 1 return obs, reward, done, info class EpisodeLimit(EpisodeCounter): """Closes the environment when a given number of episodes is performed. NOTE: This also applies to vectorized environments, i.e. the episode counter is incremented for when every individual environment reaches the end of an episode. """ def __init__(self, env: gym.Env, max_episodes: int): super().__init__(env=env) self._max_episodes = max_episodes @property def max_episodes(self) -> int: return self._max_episodes def closed_error_message(self) -> str: """Return the error message to use when attempting to use the closed env. This can be useful for wrappers that close when a given condition is reached, e.g. a number of episodes has been performed, which could return a more relevant message here. """ return f"Env reached max number of episodes ({self.max_episodes})" def reset(self): # NOTE: MayCloseEarly.reset() will raise a ClosedEnvironmentError if # self.is_closed() is True, which will always be the case if we exceed the # limit. obs = super().reset() assert not self.is_closed() if self.is_vectorized: n_unfinished_envs: int = (~self._done).sum() if self._episode_counter != 0 and n_unfinished_envs: # Wasting some steps in unfinished environments! w = UserWarning( f"Calling .reset() on a VectorEnv resets all the envs, " f"ending episodes prematurely. This env has a limit of " f"{self._max_episodes} episodes in total, so by calling " f"reset() here, you could be wasting {n_unfinished_envs} " f"episodes from your budget!" ) warnings.warn(colorize(f"WARN: {w}", "yellow")) logger.debug(f"Starting episode {self._episode_counter}/{self._max_episodes})") if self._episode_counter == self._max_episodes: logger.warning("Beware, entering last episode") return obs def __iter__(self): return super().__iter__() def step(self, action): if self.is_closed(): if self._episode_counter >= self._max_episodes: raise ClosedEnvironmentError( f"Env reached max number of episodes ({self._max_episodes})" ) raise ClosedEnvironmentError("Can't step through closed env.") obs, reward, done, info = super().step(action) if self.is_vectorized: # BUG: This can be reached while in the last 'send' (which uses self.send) # of the previous epoch while iterating if any(done) and self._episode_counter >= self.max_episodes: logger.info(f"Closing the envs since we reached the max number of episodes.") self.close() done[:] = True else: if done and self._episode_counter == self._max_episodes: logger.info(f"Closing the env since we reached the max number of episodes.") self.close() return obs, reward, done, info ================================================ FILE: sequoia/common/gym_wrappers/episode_limit_test.py ================================================ from functools import partial import gym import numpy as np import pytest from gym.vector import SyncVectorEnv from gym.wrappers import TimeLimit from sequoia.conftest import DummyEnvironment from .env_dataset import EnvDataset from .episode_limit import EpisodeLimit def test_basics(): env = TimeLimit(gym.make("CartPole-v0"), max_episode_steps=10) env = EnvDataset(env) env = EpisodeLimit(env, max_episodes=3) env.seed(123) for episode in range(3): obs = env.reset() done = False step = 0 while not done: print(f"step {step}") obs, reward, done, info = env.step(env.action_space.sample()) step += 1 assert env.is_closed() with pytest.raises(gym.error.ClosedEnvironmentError): _ = env.reset() with pytest.raises(gym.error.ClosedEnvironmentError): _ = env.step(env.action_space.sample()) with pytest.raises(gym.error.ClosedEnvironmentError): for _ in env: break @pytest.mark.parametrize("env_name", ["CartPole-v0"]) def test_episode_limit_with_single_env(env_name: str): """EpisodeLimit should close the env when a given number of episodes is reached. """ env = gym.make(env_name) env = EpisodeLimit(env, max_episodes=3) env.seed(123) done = False assert env.episode_count() == 0 # First episode. obs = env.reset() while not done: obs, reward, done, info = env.step(env.action_space.sample()) assert env.episode_count() == 1 # Second episode. obs = env.reset() done = False while not done: obs, reward, done, info = env.step(env.action_space.sample()) assert env.episode_count() == 2 # Third episode. obs = env.reset() done = False while not done: obs, reward, done, info = env.step(env.action_space.sample()) assert env.episode_count() == 3 assert env.is_closed() with pytest.raises(gym.error.ClosedEnvironmentError): obs = env.reset() with pytest.raises(gym.error.ClosedEnvironmentError): _ = env.step(env.action_space.sample()) @pytest.mark.parametrize("env_name", ["CartPole-v0"]) def test_episode_limit_with_single_env_dataset(env_name: str): """EpisodeLimit should close the env when a given number of episodes is reached when iterating through the env. """ env = gym.make(env_name) env = EpisodeLimit(env, max_episodes=2) env = EnvDataset(env) # TODO: The reverse ordering doesn't work: (EnvDataset(EpisodeLimit)) # TODO: There's a warning that doing this steps even though done = True? env.seed(123) done = False # First episode. for obs in env: print("in loop:", env.episode_count()) reward = env.send(env.action_space.sample()) print("between loops", env.episode_count()) # Second episode. for i, obs in enumerate(env): print("Second loop", env.episode_count()) reward = env.send(env.action_space.sample()) # Trying to start a third episode should fail: with pytest.raises(gym.error.ClosedEnvironmentError): env.reset() for obs in env: assert False @pytest.mark.parametrize("batch_size", [3, 5]) def test_episode_limit_with_vectorized_env(batch_size): """Test that when adding the EpisodeLimit wrapper on top of a vectorized environment, the episode limit is with respect to each individual env rather than the batched env. """ starting_values = [0 for i in range(batch_size)] targets = [10 for i in range(batch_size)] env = SyncVectorEnv( [ partial(DummyEnvironment, start=start, target=target, max_value=10 * 2) for start, target in zip(starting_values, targets) ] ) env = EpisodeLimit(env, max_episodes=2 * batch_size) obs = env.reset() assert obs.tolist() == starting_values print("reset obs: ", obs) for i in range(10): print(i, obs) actions = np.ones(batch_size) obs, reward, done, info = env.step(actions) # all episodes end at step 10 assert all(done) # Because of how VectorEnvs work, the obs are the new 'reset' obs, rather # than the final obs in the episode. assert obs.tolist() == starting_values assert obs.tolist() == starting_values print("reset obs: ", obs) for i in range(10): print(i, obs) actions = np.ones(batch_size) obs, reward, done, info = env.step(actions) # all episodes end at step 10 assert all(done) assert env.is_closed assert obs.tolist() == starting_values with pytest.raises(gym.error.ClosedEnvironmentError): actions = np.ones(batch_size) obs, reward, done, info = env.step(actions) # @pytest.mark.xfail(reason="TODO: Fix the bugs in the interaction between " # "EnvDataset and EpisodeLimit.") @pytest.mark.parametrize("batch_size", [3, 5]) def test_episode_limit_with_vectorized_env_dataset(batch_size): """Test that when adding the EpisodeLimit wrapper on top of a vectorized environment, the episode limit is with respect to each individual env rather than the batched env. """ start = 0 target = 10 starting_values = [start for i in range(batch_size)] targets = [target for i in range(batch_size)] env = SyncVectorEnv( [ partial(DummyEnvironment, start=start, target=target, max_value=10 * 2) for start, target in zip(starting_values, targets) ] ) max_episodes = 2 # TODO: For some reason the reverse order doesn't work! env = EpisodeLimit(env, max_episodes=max_episodes * batch_size) env = EnvDataset(env) for i, obs in enumerate(env): print(i, obs) actions = np.ones(batch_size) reward = env.send(actions) assert i == max_episodes * target - 1 with pytest.raises(gym.error.ClosedEnvironmentError): env.reset() with pytest.raises(gym.error.ClosedEnvironmentError): for i, obs in enumerate(env): print(i, obs) actions = np.ones(batch_size) reward = env.send(actions) # all episodes end at step 10 # @pytest.mark.xfail(reason=f"BUG in EnvDataset, it doesn't finish ") @pytest.mark.parametrize("batch_size", [3, 5]) def test_reset_vectorenv_with_unfinished_episodes_raises_warning(batch_size): """Test that when adding the EpisodeLimit wrapper on top of a vectorized environment, the episode limit is with respect to each individual env rather than the batched env. """ start = 0 target = 10 starting_values = [start for i in range(batch_size)] targets = [target for i in range(batch_size)] env = SyncVectorEnv( [ partial(DummyEnvironment, start=start, target=target, max_value=10 * 2) for start, target in zip(starting_values, targets) ] ) env = EpisodeLimit(env, max_episodes=3 * batch_size) obs = env.reset() _ = env.step(env.action_space.sample()) _ = env.step(env.action_space.sample()) with pytest.warns(UserWarning) as record: env.reset() ================================================ FILE: sequoia/common/gym_wrappers/measure_performance.py ================================================ """ Abstract base class for a Wrapper that gets applied onto the environment in order to measure the online training performance. The concrete versions of this wrapper are located. """ from abc import ABC from typing import Dict, Generic, List, Optional from sequoia.common.gym_wrappers.utils import EnvType, IterableWrapper from sequoia.common.metrics import MetricsType from sequoia.settings.base import Environment class MeasurePerformanceWrapper(IterableWrapper[EnvType], Generic[EnvType, MetricsType], ABC): def __init__(self, env: Environment): super().__init__(env) self._metrics: Dict[int, MetricsType] = {} def get_online_performance(self) -> Dict[int, List[MetricsType]]: """Returns the online performance over the evaluation period. Returns ------- Dict[int, MetricsType] A dict mapping from step number to the Metrics object captured at that step. """ return dict(self._metrics.copy()) def get_average_online_performance(self) -> Optional[MetricsType]: """Returns the average online performance over the evaluation period, or None if the env was not iterated over / interacted with. Returns ------- Optional[MetricsType] Metrics """ if not self._metrics: return None return sum(self._metrics.values()) ================================================ FILE: sequoia/common/gym_wrappers/multi_task_environment.py ================================================ import bisect import dataclasses from functools import singledispatch from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple, Type, TypeVar, Union import gym import numpy as np from gym import spaces from gym.envs.classic_control import CartPoleEnv from torch import Tensor from sequoia.common.spaces.named_tuple import NamedTupleSpace from sequoia.utils.logging_utils import get_logger from .utils import MayCloseEarly task_param_names: Dict[Union[Type[gym.Env], str], List[str]] = { CartPoleEnv: ["gravity", "masscart", "masspole", "length", "force_mag", "tau"] # TODO: Add more of the classic control envs here. } logger = get_logger(__name__) X = TypeVar("X") T = TypeVar("T") K = TypeVar("K") V = TypeVar("V") def make_env_attributes_task( env: gym.Env, task_params: Union[List[str], Dict[str, Any]], seed: int = None, rng: np.random.Generator = None, noise_std: float = 0.2, ) -> Dict[str, Any]: task: Dict[str, Any] = {} rng: np.random.Generator = rng or np.random.default_rng(seed) if isinstance(task_params, list): task_params = {param: getattr(env.unwrapped, param) for param in task_params} for attribute, default_value in task_params.items(): new_value = default_value if isinstance(default_value, (int, float, np.ndarray)): new_value *= rng.normal(1.0, noise_std) # Clip the value to be in the [0.1*default, 10*default] range. new_value = max(0.1 * default_value, new_value) new_value = min(10 * default_value, new_value) if isinstance(default_value, int): new_value = round(new_value) elif isinstance(default_value, bool): new_value = rng.choice([True, False]) else: raise NotImplementedError( f"TODO: Don't yet know how to sample a random value for " f"attribute {attribute} with default value {default_value} of type " f" {type(default_value)}." ) task[attribute] = new_value return task # class ObservationsAndTaskLabels(NamedTuple): # x: Any # task_labels: Any @singledispatch def add_task_labels(observation: Any, task_labels: Any) -> Any: raise NotImplementedError(observation, task_labels) @add_task_labels.register(int) @add_task_labels.register(float) @add_task_labels.register(Tensor) @add_task_labels.register(np.ndarray) def _add_task_labels_to_single_obs(observation: X, task_labels: T) -> Tuple[X, T]: return { "x": observation, "task_labels": task_labels, } # return ObservationsAndTaskLabels(observation, task_labels) from sequoia.common.batch import Batch @add_task_labels.register(Batch) def _add_task_labels_to_batch(observation: Batch, task_labels: T) -> Batch: return dataclasses.replace(observation, task_labels=task_labels) from sequoia.common.spaces import TypedDictSpace @add_task_labels.register(spaces.Space) def _add_task_labels_to_space(observation: spaces.Space, task_labels: T) -> spaces.Dict: # TODO: Return a dict or NamedTuple at some point: return TypedDictSpace( x=observation, task_labels=task_labels, ) # return NamedTupleSpace( # x=observation, task_labels=task_labels, dtype=ObservationsAndTaskLabels, # ) @add_task_labels.register(NamedTupleSpace) def _add_task_labels_to_namedtuple( observation: NamedTupleSpace, task_labels: gym.Space ) -> NamedTupleSpace: assert "task_labels" not in observation._spaces, "space already has task labels!" return type(observation)( **observation._spaces, task_labels=task_labels, dtype=observation.dtype ) @add_task_labels.register(spaces.Tuple) @add_task_labels.register(tuple) def _add_task_labels_to_tuple(observation: Tuple, task_labels: T) -> Tuple: return type(observation)([*observation, task_labels]) @add_task_labels.register(spaces.Dict) def _add_task_labels_to_dict_space(observation: spaces.Dict, task_labels: T) -> spaces.Dict: assert "task_labels" not in observation.spaces d_spaces = observation.spaces.copy() d_spaces["task_labels"] = task_labels return type(observation)(**d_spaces) @add_task_labels.register(TypedDictSpace) def _add_task_labels_to_typed_dict_space( observation: TypedDictSpace, task_labels: T ) -> TypedDictSpace: # TODO: Raise a warning instead? # assert "task_labels" not in observation.spaces, observation d_spaces = observation.spaces.copy() d_spaces["task_labels"] = task_labels # NOTE: We assume here that the `dtype` of the typed dict space (e.g. the # `Observations` class, usually) can handle having a `task_labels` field. return type(observation)(**d_spaces, dtype=observation.dtype) @add_task_labels.register(dict) def _add_task_labels_to_dict(observation: Dict[str, V], task_labels: T) -> Dict[str, Union[V, T]]: new: Dict[str, Union[V, T]] = {key: value for key, value in observation.items()} # TODO: Raise a warning instead? # assert "task_labels" not in new new["task_labels"] = task_labels return type(observation)(**new) # type: ignore class MultiTaskEnvironment(MayCloseEarly): """Creates 'tasks' by modifying attributes or applying functions to the wrapped env. This wrapper accepts a `task_schedule` dictionary, which maps from a given step to either: - dicts of attributes that are to be set on the (unwrapped) env at that step, or - callables to apply to the wrapped environment at the given steps. For example, when wrapping the "CartPole-v0" environment, we could vary any of the "gravity", "masscart", "masspole", "length", "force_mag" or "tau" attributes like so: ``` env = gym.make("CartPole-v0") env = MultiTaskEnvironment(env, task_schedule={ # step -> attributes to set on the environment when step is reached. 10: dict(length=2.0), 20: dict(length=1.0, gravity=20.0), 30: dict(length=0.5, gravity=5.0), }) env.seed(123) env.reset() ``` During steps 0-9, the environment is unchanged (length = 0.5). At step 10, the length of the pole will be set to 2.0 At step 20, the length of the pole will be set to 1.0, and the gravity will be changed from its default value (9.8) to 20. etc. TODO: Might be more accurate to call this a `TaskIncrementalEnvironment`, rather than `MultiTaskEnvironemnt`, which is more related to the `new_random_task_on_reset` behaviour anyway. TODOs: - Copy this to a `incremental_environment.py` or something similar - Remove all references to this `new_random_task_on_reset` stuff. - Rename "smooth_environment" to "nonstationary_environment"? """ def __init__( self, env: gym.Env, task_schedule: Dict[int, Union[Dict[str, float], Callable[[gym.Env], Any]]] = None, task_params: List[str] = None, noise_std: float = 0.2, add_task_dict_to_info: bool = False, add_task_id_to_obs: bool = False, new_random_task_on_reset: bool = False, starting_step: int = 0, nb_tasks: int = None, max_steps: int = None, seed: int = None, ): """Wraps an environment, allowing it to be 'multi-task'. NOTE: Assumes that all the attributes in 'task_param_names' are floats for now. TODO: Check the case where a task boundary is reached and the episode is not done yet. Args: env (gym.Env): The environment to wrap. task_param_names (List[str], optional): The attributes of the environment that will be allowed to change. Defaults to None. task_schedule (Dict[int, Dict[str, float]], optional): Schedule mapping from a given step number to the state that will be set at that time. noise_std (float, optional): The standard deviation of the noise used to create the different tasks. """ super().__init__(env=env) self.env: gym.Env self.noise_std = noise_std if not task_params: unwrapped_type = type(env.unwrapped) if unwrapped_type in task_param_names: task_params = task_param_names[unwrapped_type] elif task_schedule: if not any(isinstance(v, dict) for v in task_schedule.values()): task_params: List[str] = None for value in task_schedule.values(): if not isinstance(value, dict): continue if task_params is None: task_params = list(value.keys()) elif not task_params == list(value.keys()): raise NotImplementedError( "All tasks need to have the same keys for now." ) else: logger.warning( UserWarning( f"You didn't pass any 'task params', and the task " f"parameters aren't known for this type of environment " f"({unwrapped_type}), so we can't make it multi-task with " f"this wrapper." ) ) self._max_steps: Optional[int] = max_steps self._starting_step: int = starting_step self._steps: int = self._starting_step self._episodes: int = 0 self._current_task: Dict = {} self._task_schedule: Dict[int, Dict[str, Any]] = task_schedule or {} self.task_params: List[str] = task_params or [] self.default_task: np.ndarray = self.current_task.copy() self.task_schedule = task_schedule or {} self.new_random_task_on_reset: bool = new_random_task_on_reset # Wether we will add a task id to the observation. self.add_task_id_to_obs = add_task_id_to_obs # Wether we will add the task dict (the values of the attributes) to the # 'info' dict. self.add_task_dict_to_info = add_task_dict_to_info if 0 not in self.task_schedule: self.task_schedule[0] = self.default_task # TODO: Need to do a major refactor of this wrapper. # Need to clean this up: passing the task schedule to the env and having it "mean" different # things depending on the value other arguments (discrete vs continuous, etc) is very ugly. nb_tasks = nb_tasks if nb_tasks is not None else len(self.task_schedule) if self.add_task_id_to_obs: self.observation_space = add_task_labels( self.env.observation_space, spaces.Discrete(n=nb_tasks), ) # self.observation_space = spaces.Tuple([ # self.env.observation_space, # spaces.Discrete(n=n_tasks) # ]) # self._closed = False self._on_task_switch_callback: Optional[Callable[[int], None]] = None self.np_random: np.random.Generator self.seed(seed) @property def current_task_id(self) -> int: """Returns the 'index' of the current task within the task schedule.""" if self.new_random_task_on_reset: # The task id is the index of the key that corresponds to the current task. return self._current_task_id current_step = self._steps assert current_step >= 0 task_steps: List[int] = sorted(self.task_schedule.keys()) assert 0 in task_steps insertion_index = bisect.bisect_right(task_steps, current_step) # The current task id is the insertion index - 1 current_task_index = insertion_index - 1 return current_task_index @current_task_id.setter def current_task_id(self, value: int) -> None: self._current_task_id = value def set_on_task_switch_callback(self, callback: Callable[[int], None]) -> None: self._on_task_switch_callback = callback def on_task_switch(self, task_id: int): if task_id != self.current_task_id: logger.debug(f"Switching from {self.current_task_id} -> {task_id}.") # TODO: We could maybe use this to call the method's 'on_task_switch' # callback? if self._on_task_switch_callback: self._on_task_switch_callback(task_id) def step(self, *args, **kwargs): # If we reach a step in the task schedule, then we change the task to # that given step. # if self._closed: # raise gym.error.ClosedEnvironmentError("Can't step in closed env.") if self.steps in self.task_schedule and not self.new_random_task_on_reset: self.current_task = self.task_schedule[self.steps] logger.debug(f"New task at step {self.steps}: {self.current_task}") # Adding this on_task_switch, since it could maybe be easier than # having to add a callback wrapper to use. task_id = sorted(self.task_schedule.keys()).index(self.steps) self.on_task_switch(task_id) # elif self.new_random_task_on_reset: # self.current_task_id observation, rewards, done, info = super().step(*args, **kwargs) if self.add_task_id_to_obs: observation = add_task_labels(observation, self.current_task_id) if self.add_task_dict_to_info: info.update(self.current_task) self.steps += 1 return observation, rewards, done, info # def close(self, **kwargs) -> None: # return super().close(**kwargs) def reset(self, new_random_task: bool = None, **kwargs): """Resets the wrapped environment. If `new_random_task` is True, this also sets a new random task as the current task. NOTE: This resets the wrapped env, but doesn't reset the number of steps taken, hence the 'task' progression according to the task_schedule doesn't change. """ if new_random_task is None: new_random_task = self.new_random_task_on_reset # if self._closed: # raise gym.error.ClosedEnvironmentError("Can't reset closed env.") if new_random_task: prev_task_id = self.current_task_id previous_task = self.current_task self.current_task = self.random_task() episode = self._episodes step = self._steps if previous_task != self.current_task: logger.debug( f"Switching tasks at step {step} (end of episode {episode}): " f"{prev_task_id} -> {self.current_task_id} {self.current_task}" ) observation = self.env.reset(**kwargs) if self.add_task_id_to_obs: observation = add_task_labels(observation, self.current_task_id) self._episodes += 1 return observation @property def steps(self) -> int: return self._steps @steps.setter def steps(self, value: int) -> None: if value < self._starting_step: value = self._starting_step if self._max_steps is not None and value > self._max_steps: # Reached the maximum number of steps, stagnate. # TODO: What exactly should we do in this case? Should we close # the env? Or just stay at the same 'step' in the task schedule # forever? # TODO: Is this the "correct" way to limit the number of steps in # an environment? value = self._max_steps self._steps = value @property def current_task(self) -> Dict[str, Any]: # NOTE: This caching mechanism assumes that we are the only source # of potential change for these attributes. # At the moment, We're not really concerned with performance, so we # could turn it off it if misbehaves or causes bugs. if not self._current_task: # NOTE: We get the attributes from the unwrapped environment, which # effectively bypasses any wrappers. Don't know if this is good # practice, but oh well. self._current_task = { name: getattr(self.env.unwrapped, name) for name in self.task_params } # Double-checking that the attributes didn't change somehow without us # knowing. # TODO: Maybe remove this when done debugging/testing this since it's a # little bit of a waste of compute. for attribute, value_in_dict in self._current_task.items(): current_env_value = getattr(self.env.unwrapped, attribute) if value_in_dict != current_env_value: raise RuntimeError( f"The value of the attribute '{attribute}' was changed from " f"somewhere else! (value in _current_task: {value_in_dict}, " f"value on env: {current_env_value})" ) return self._current_task @current_task.setter def current_task(self, task: Union[Dict[str, float], Sequence[float], Callable]): # logger.debug(f"(_step: {self.steps}): Setting the current task to {task}.") if isinstance(task, (list, np.ndarray)): assert len(task) == len(self.task_params), "lengths should match!" task_dict = {} for k, value in zip(self.task_params, task): task_dict[k] = value task = task_dict if task in self.task_schedule.values(): self._current_task_id = [ i for i, (k, v) in enumerate(self.task_schedule.items()) if v == task ][0] # assert False, f"Hey, this task is in the values at index {self._current_task_id}" if callable(task): task(self.env) elif isinstance(task, dict): self._current_task.clear() self._current_task.update(self.default_task) if isinstance(task, dict): for k, value in task.items(): assert isinstance(k, str), "The task dict should have str keys." self._current_task[k] = value # Actually change the value of the task attributes in the environment. for name, param_value in self._current_task.items(): assert hasattr( self.env.unwrapped, name ), f"the unwrapped environment doesn't have a {name} attribute!" setattr(self.env.unwrapped, name, param_value) else: raise RuntimeError( f"don't know how to set task {task}! (tasks must be " f"either callables or dicts mapping attributes to " f"values. " ) def random_task(self) -> Dict: """Samples a random 'task'. If the wrapper already has a task schedule, then one of the tasks (values of the task schedule dict) is selected at random. How the random value for an attribute is sampled depends on the type of its default value in the envionment: - `int`, `float`, or `np.ndarray` attributes are sampled by multiplying the default value by a N(mean=1., std=`self.noise_std`). `int` attributes are then rounded to the nearest value. - `bool` attributes are sampled randomly from `True` and `False`. TODO: It might be cool to give an option for passing a prior that could be used for a given attribute, but it would add a bit too much complexity and isn't really needed atm. Raises: NotImplementedError: If the default value has an unsupported type. Returns: Dict: A dict of the attribute name, and the value that would be set for that attribute. """ if self.new_random_task_on_reset: return self.np_random.choice(list(self.task_schedule.values())) return make_env_attributes_task( self, task_params=self.default_task, rng=self.np_random, noise_std=self.noise_std, ) def update_task(self, values: Dict = None, **kwargs): """Updates the current task with the params from values or kwargs. Important: Use this method to update properties of the current task, instead of trying modifying the `current_task` dictionary. For example, `env.current_task["length"] = 2.0` will NOT update the length of the pole in CartPole, whereas using `env.update_task(length=2.0)` will! NOTE: When passing a dictionary, any missing param is kept at its current value (not reset to the default value). """ current_task = self.current_task.copy() if isinstance(values, dict): current_task.update(values) elif values is not None: raise RuntimeError(f"values can only be a dict or None (received {values}).") if kwargs: current_task.update(kwargs) self.current_task = current_task def seed(self, seed: Optional[int] = None) -> List[int]: self.np_random = np.random.default_rng(seed) self.action_space.seed(seed) self.observation_space.seed(seed) return self.env.seed(seed) def task_dict(self, task_array: np.ndarray) -> Dict[str, float]: assert len(task_array) == len( self.task_params ), "Lengths should match the number of task parameters." return dict(zip(self.task_params, task_array)) @property def task_schedule(self) -> Dict: return self._task_schedule @task_schedule.setter def task_schedule(self, value: Dict[str, Any]): self._task_schedule = {} if 0 not in value: self._task_schedule[0] = self.default_task.copy() for step, task in sorted(value.items()): # Convert any numpy arrays or lists in the task schedule to dicts # mapping from attribute name to value to be set. if isinstance(task, (list, np.ndarray)): task = self.task_dict(task) if not (isinstance(task, dict) or callable(task)): raise RuntimeError( f"Task schedule can only contain dicts, lists, numpy arrays or" f"callables, but got {task}!" ) self._task_schedule[step] = task if self._steps in self._task_schedule: self.current_task = self._task_schedule[self._steps] ================================================ FILE: sequoia/common/gym_wrappers/multi_task_environment_test.py ================================================ from typing import Dict, List, Tuple import gym import matplotlib.pyplot as plt import pytest from gym import spaces from gym.envs.classic_control import CartPoleEnv from gym.vector import SyncVectorEnv from gym.wrappers import TimeLimit from sequoia.common.gym_wrappers import MultiTaskEnvironment from sequoia.conftest import atari_py_required, monsterkong_required, param_requires_monsterkong from sequoia.utils.utils import dict_union from .multi_task_environment import MultiTaskEnvironment supported_environments: List[str] = ["CartPole-v0"] def test_task_schedule(): original: CartPoleEnv = gym.make("CartPole-v0") starting_length = original.length starting_gravity = original.gravity task_schedule = { 10: dict(length=0.1), 20: dict(length=0.2, gravity=-12.0), 30: dict(gravity=0.9), } env = MultiTaskEnvironment(original, task_schedule=task_schedule) env.seed(123) env.reset() for step in range(100): _, _, done, _ = env.step(env.action_space.sample()) # env.render() if done: env.reset() if 0 <= step < 10: assert env.length == starting_length and env.gravity == starting_gravity elif 10 <= step < 20: assert env.length == 0.1 elif 20 <= step < 30: assert env.length == 0.2 and env.gravity == -12.0 elif step >= 30: assert env.length == starting_length and env.gravity == 0.9 env.close() @pytest.mark.parametrize("environment_name", supported_environments) def test_multi_task(environment_name: str): original = gym.make(environment_name) env = MultiTaskEnvironment(original) env.reset() env.seed(123) plt.ion() default_task = env.default_task for task_id in range(5): for i in range(20): observation, reward, done, info = env.step(env.action_space.sample()) # env.render() env.reset(new_random_task=True) print(f"New task: {env.current_task}") env.close() plt.ioff() plt.close() @pytest.mark.skip(reason="This generates some output, uncomment this to run it.") @pytest.mark.parametrize("environment_name", supported_environments) def test_monitor_env(environment_name): original = gym.make(environment_name) # original = CartPoleEnv() env = MultiTaskEnvironment(original) env = gym.wrappers.Monitor( env, f"recordings/multi_task_{environment_name}", force=True, write_upon_reset=False, ) env.seed(123) env.reset() plt.ion() task_param_values: List[Dict] = [] default_length: float = env.length for task_id in range(20): for i in range(100): observation, reward, done, info = env.step(env.action_space.sample()) # env.render() if done: env.reset(new_task=False) task_param_values.append(env.current_task.copy()) # env.update_task(length=(i + 1) / 100 * 2 * default_length) env.update_task() print(f"New task: {env.current_task.copy()}") env.close() plt.ioff() plt.close() def test_update_task(): """Test that using update_task changes the given values in the environment and in the current_task dict, and that when a value isn't passed to update_task, it isn't reset to its default but instead keeps its previous value. """ original = gym.make("CartPole-v0") env = MultiTaskEnvironment(original) env.reset() env.seed(123) assert env.length == original.length env.update_task(length=1.0) assert env.current_task["length"] == env.length == 1.0 env.update_task(gravity=20.0) assert env.length == 1.0 assert env.current_task["gravity"] == env.gravity == 20.0 env.close() def test_add_task_dict_to_info(): """Test that the 'info' dict contains the task dict.""" original: CartPoleEnv = gym.make("CartPole-v0") starting_length = original.length starting_gravity = original.gravity task_schedule = { 10: dict(length=0.1), 20: dict(length=0.2, gravity=-12.0), 30: dict(gravity=0.9), } env = MultiTaskEnvironment( original, task_schedule=task_schedule, add_task_dict_to_info=True, ) env.seed(123) env.reset() for step in range(100): _, _, done, info = env.step(env.action_space.sample()) # env.render() if done: env.reset() if 0 <= step < 10: assert env.length == starting_length and env.gravity == starting_gravity assert info == env.default_task elif 10 <= step < 20: assert env.length == 0.1 assert info == dict_union(env.default_task, task_schedule[10]) elif 20 <= step < 30: assert env.length == 0.2 and env.gravity == -12.0 assert info == dict_union(env.default_task, task_schedule[20]) elif step >= 30: assert env.length == starting_length and env.gravity == 0.9 assert info == dict_union(env.default_task, task_schedule[30]) env.close() def test_add_task_id_to_obs(): """Test that the 'info' dict contains the task dict.""" original: CartPoleEnv = gym.make("CartPole-v0") starting_length = original.length starting_gravity = original.gravity task_schedule = { 10: dict(length=0.1), 20: dict(length=0.2, gravity=-12.0), 30: dict(gravity=0.9), } env = MultiTaskEnvironment( original, task_schedule=task_schedule, add_task_id_to_obs=True, ) env.seed(123) env.reset() assert env.observation_space == spaces.Dict( x=original.observation_space, task_labels=spaces.Discrete(4), ) for step in range(100): obs, _, done, info = env.step(env.action_space.sample()) # env.render() x, task_id = obs["x"], obs["task_labels"] if 0 <= step < 10: assert env.length == starting_length and env.gravity == starting_gravity assert task_id == 0, step elif 10 <= step < 20: assert env.length == 0.1 assert task_id == 1, step elif 20 <= step < 30: assert env.length == 0.2 and env.gravity == -12.0 assert task_id == 2, step elif step >= 30: assert env.length == starting_length and env.gravity == 0.9 assert task_id == 3, step if done: obs = env.reset() assert isinstance(obs, dict) env.close() def test_starting_step_and_max_step(): """Test that when start_step and max_step arg given, the env stays within the [start_step, max_step] portion of the task schedule. """ original: CartPoleEnv = gym.make("CartPole-v0") starting_length = original.length starting_gravity = original.gravity task_schedule = { 10: dict(length=0.1), 20: dict(length=0.2, gravity=-12.0), 30: dict(gravity=0.9), } env = MultiTaskEnvironment( original, task_schedule=task_schedule, add_task_id_to_obs=True, starting_step=10, max_steps=19, ) env.seed(123) env.reset() assert env.observation_space == spaces.Dict( x=original.observation_space, task_labels=spaces.Discrete(4), ) # Trying to set the 'steps' to something smaller than the starting step # doesn't work. env.steps = -123 assert env.steps == 10 # Trying to set the 'steps' to something greater than the max_steps # doesn't work. env.steps = 50 assert env.steps == 19 # Here we reset the steps to 10, and also check that this works. env.steps = 10 assert env.steps == 10 for step in range(0, 100): # The environment started at an offset of 10. assert env.steps == max(min(step + 10, 19), 10) obs, _, done, info = env.step(env.action_space.sample()) # env.render() x, task_id = obs["x"], obs["task_labels"] # Check that we're always stuck between 10 and 20 assert 10 <= env.steps < 20 assert env.length == 0.1 assert task_id == 1, step if done: print(f"Resetting on step {step}") obs = env.reset() assert isinstance(obs, dict) env.close() @atari_py_required def test_task_id_is_added_even_when_no_known_task_schedule(): """Test that even when the env is unknown or there are no task params, the task_id is still added correctly and is zero at all times. """ # Breakout doesn't have default task params. original: CartPoleEnv = gym.make("ALE/Breakout-v5") env = MultiTaskEnvironment( original, add_task_id_to_obs=True, ) env.seed(123) env.reset() assert env.observation_space == spaces.Dict( x=original.observation_space, task_labels=spaces.Discrete(1), ) for step in range(0, 100): obs, _, done, info = env.step(env.action_space.sample()) # env.render() x, task_id = obs["x"], obs["task_labels"] assert task_id == 0 if done: x, task_id = env.reset() assert task_id == 0 env.close() @monsterkong_required def test_task_schedule_monsterkong(): env: MetaMonsterKongEnv = gym.make("MetaMonsterKong-v1") from gym.wrappers import TimeLimit env = TimeLimit(env, max_episode_steps=10) env = MultiTaskEnvironment( env, task_schedule={ 0: {"level": 0}, 100: {"level": 1}, 200: {"level": 2}, 300: {"level": 3}, 400: {"level": 4}, }, add_task_id_to_obs=True, ) obs = env.reset() img, task_labels = obs["x"], obs["task_labels"] assert task_labels == 0 assert env.get_level() == 0 for i in range(500): obs, reward, done, info = env.step(env.action_space.sample()) assert obs["task_labels"] == i // 100 assert env.level == i // 100 env.render() assert isinstance(done, bool) if done: print(f"End of episode at step {i}") obs = env.reset() assert obs["task_labels"] == 4 assert env.level == 4 # level stays the same even after reaching that objective. for i in range(500): obs, reward, done, info = env.step(env.action_space.sample()) assert obs["task_labels"] == 4 assert env.level == 4 env.render() if done: print(f"End of episode at step {i}") obs = env.reset() env.close() @monsterkong_required def test_task_schedule_with_callables(): """Apply functions to the env at a given step.""" env: MetaMonsterKongEnv = gym.make("MetaMonsterKong-v1") from gym.wrappers import TimeLimit env = TimeLimit(env, max_episode_steps=10) from operator import methodcaller env = MultiTaskEnvironment( env, task_schedule={ 0: methodcaller("set_level", 0), 100: methodcaller("set_level", 1), 200: methodcaller("set_level", 2), 300: methodcaller("set_level", 3), 400: methodcaller("set_level", 4), }, add_task_id_to_obs=True, ) obs = env.reset() # img, task_labels = obs assert obs["task_labels"] == 0 assert env.get_level() == 0 for i in range(500): obs, reward, done, info = env.step(env.action_space.sample()) assert obs["task_labels"] == i // 100 assert env.level == i // 100 env.render() assert isinstance(done, bool) if done: print(f"End of episode at step {i}") obs = env.reset() assert obs["task_labels"] == 4 assert env.level == 4 # level stays the same even after reaching that objective. for i in range(500): obs, reward, done, info = env.step(env.action_space.sample()) assert obs["task_labels"] == 4 assert env.level == 4 env.render() if done: print(f"End of episode at step {i}") obs = env.reset() @monsterkong_required def test_random_task_on_each_episode(): env: MetaMonsterKongEnv = gym.make("MetaMonsterKong-v1") from gym.wrappers import TimeLimit env = TimeLimit(env, max_episode_steps=10) env = MultiTaskEnvironment( env, task_schedule={ 0: {"level": 0}, 5: {"level": 1}, 200: {"level": 2}, 300: {"level": 3}, 400: {"level": 4}, }, add_task_id_to_obs=True, new_random_task_on_reset=True, ) task_labels = [] for i in range(10): obs = env.reset() task_labels.append(obs["task_labels"]) assert len(set(task_labels)) > 1 # Episodes only last 10 steps. Tasks don't have anything to do with the task # schedule. obs = env.reset() start_task_label = obs["task_labels"] for i in range(10): obs, reward, done, info = env.step(env.action_space.sample()) assert obs["task_labels"] == start_task_label if i == 9: assert done else: assert not done env.close() from sequoia.conftest import monsterkong_required def test_random_task_on_each_episode_and_only_one_task_in_schedule(): """BUG: When the goal is to have only one task, it instead keeps sampling a new task from the 'distribution', in the case of cartpole! """ env: MetaMonsterKongEnv = gym.make("CartPole-v1") from gym.wrappers import TimeLimit env = TimeLimit(env, max_episode_steps=10) env = MultiTaskEnvironment( env, task_schedule={ 0: {"length": 0.1}, }, add_task_id_to_obs=True, new_random_task_on_reset=True, ) task_labels = [] lengths = [] for i in range(10): obs = env.reset() task_labels.append(obs["task_labels"]) lengths.append(env.length) done = False while not done: obs, reward, done, info = env.step(env.action_space.sample()) task_labels.append(obs["task_labels"]) lengths.append(env.length) assert set(task_labels) == {0} assert set(lengths) == {0.1} def env_fn_monsterkong() -> gym.Env: env = gym.make("MetaMonsterKong-v0") env = TimeLimit(env, max_episode_steps=10) env = MultiTaskEnvironment( env, task_schedule={ 0: {"level": 1}, 100: {"level": 2}, 200: {"level": 3}, 300: {"level": 4}, 400: {"level": 5}, }, add_task_id_to_obs=True, new_random_task_on_reset=True, ) return env def env_fn_cartpole() -> gym.Env: env = gym.make("CartPole-v0") env = TimeLimit(env, max_episode_steps=10) env = MultiTaskEnvironment( env, task_schedule={ 0: {"length": 0.1}, 100: {"length": 0.2}, 200: {"length": 0.3}, 300: {"length": 0.4}, 400: {"length": 0.5}, }, add_task_id_to_obs=True, new_random_task_on_reset=True, ) return env @pytest.mark.parametrize("env_id", ["cartpole", param_requires_monsterkong("monsterkong")]) def test_task_sequence_is_reproducible(env_id: str): """Test that the multi-task setup is seeded correctly, i.e. that the task sequence is reproducible given the same seed. """ if env_id == "cartpole": env_fn = env_fn_cartpole elif env_id == "monsterkong": env_fn = env_fn_monsterkong else: assert False, f"just testing on cartpole and monsterkong for now, but got env {env_id}" first_results: List[Tuple[int, int]] = [] n_runs = 5 n_episodes_per_run = 10 for run_number in range(n_runs): print(f"starting run {run_number} / {n_runs}") # For each 'run', we record the task sequence and how long each task lasted for. # Then, we want to check that each run was indentical, for a given seed. env = env_fn() env.seed(123) task_ids: List[int] = [] task_lengths: List[int] = [] for episode in range(n_episodes_per_run): print(f"Episode {episode} / {n_episodes_per_run}") obs = env.reset() task_id: int = obs["task_labels"] task_length = 0 done = False while not done: obs, _, done, _ = env.step(env.action_space.sample()) task_length += 1 task_ids.append(task_id) task_lengths.append(task_length) task_ids_and_lengths = list(zip(task_ids, task_lengths)) print(f"Task ids and length of each one: {task_ids_and_lengths}") assert len(set(task_ids)) > 1, "should have been more than just one task!" if not first_results: first_results = task_ids_and_lengths else: # Make sure that the results from this run are equivalent to the others with # the same seed: assert task_ids_and_lengths == first_results from sequoia.common.gym_wrappers import EnvDataset from sequoia.utils.utils import unique_consecutive_with_index def test_iteration(): nb_tasks = 5 steps_per_task = 10 task_schedule = task_schedule = { i * steps_per_task: dict(length=0.1 + i * 0.2) for i in range(5) } env = gym.make("CartPole-v0") env = MultiTaskEnvironment(env, task_schedule=task_schedule) env = TimeLimit(env, max_episode_steps=14) env = EnvDataset(env) lengths = [] total_steps = 0 for episode in range(10): for step, obs in enumerate(env): # print(total_steps, episode, step, obs, env.length) lengths.append(env.length) rewards = env.send(env.action_space.sample()) total_steps += 1 if total_steps > 100: break actual_task_schedule = dict(unique_consecutive_with_index(lengths)) # NOTE: The keys won't necessarily be the same, since episodes might be shorter # than `n_steps_per_task`. length_schedule = {k: v["length"] for k, v in task_schedule.items()} assert list(actual_task_schedule.values()) == list(length_schedule.values()) # assert False, actual_task_schedule ================================================ FILE: sequoia/common/gym_wrappers/observation_limit.py ================================================ """ IDEA: same as EpisodeLimit, for for the number of total observations. """ import gym from gym.error import ClosedEnvironmentError from sequoia.utils import get_logger from .utils import IterableWrapper logger = get_logger(__name__) class ObservationLimit(IterableWrapper): """Closes the env when `max_steps` steps have been performed *in total*. For vectorized environments, each step consumes up to `num_envs` from this total budget, i.e. the step counter is incremented by the batch size at each step. """ def __init__(self, env: gym.Env, max_steps: int): super().__init__(env=env) self._max_obs = max_steps self._obs_counter: int = 0 self._initial_reset = False self._is_closed: bool = False def reset(self): if self._is_closed: if self._obs_counter >= self._max_obs: raise ClosedEnvironmentError( f"Env reached max number of observations ({self._max_obs})" ) raise ClosedEnvironmentError("Can't step through closed env.") # Resetting actually gives you an observation, so we count it here. self._obs_counter += self.env.num_envs if self.is_vectorized else 1 logger.debug(f"(observation {self._obs_counter}/{self._max_obs})") obs = self.env.reset() if self._obs_counter >= self._max_obs: self.close() return obs @property def is_closed(self) -> bool: return self._is_closed def step(self, action): if self._is_closed: if self._obs_counter >= self._max_obs: raise ClosedEnvironmentError( f"Env reached max number of observations ({self._max_obs})" ) raise ClosedEnvironmentError("Can't step through closed env.") obs, reward, done, info = self.env.step(action) self._obs_counter += self.env.num_envs if self.is_vectorized else 1 logger.debug(f"(observation {self._obs_counter}/{self._max_obs})") # BUG: If we dont use >=, then iteration with EnvDataset doesn't work. if self._obs_counter >= self._max_obs: self.close() return obs, reward, done, info def close(self): self.env.close() self._is_closed = True ================================================ FILE: sequoia/common/gym_wrappers/observation_limit_test.py ================================================ from functools import partial import gym import pytest from gym.vector import SyncVectorEnv from sequoia.conftest import DummyEnvironment from .env_dataset import EnvDataset from .observation_limit import ObservationLimit @pytest.mark.parametrize("env_name", ["CartPole-v0"]) def test_step_limit_with_single_env(env_name: str): """Env should close when a given number of observations have been produced""" env = gym.make(env_name) env = ObservationLimit(env, max_steps=5) env.seed(123) done = False # First episode. obs = env.reset() obs, reward, done, info = env.step(env.action_space.sample()) obs, reward, done, info = env.step(env.action_space.sample()) obs = env.reset() obs, reward, done, info = env.step(env.action_space.sample()) assert env.is_closed with pytest.raises(gym.error.ClosedEnvironmentError): env.reset() with pytest.raises(gym.error.ClosedEnvironmentError): env.step(env.action_space.sample()) @pytest.mark.xfail( reason="TODO: Fix the bugs in the interaction between " "EnvDataset and ObservationLimit." ) @pytest.mark.parametrize("env_name", ["CartPole-v0"]) def test_step_limit_with_single_env_dataset(env_name: str): env = gym.make(env_name) start = 0 target = 10 env = DummyEnvironment(start=start, target=target, max_value=10 * 2) env = EnvDataset(env) max_steps = 5 env = ObservationLimit(env, max_steps=max_steps) env.seed(123) values = [] for i, obs in zip(range(100), env): values.append(obs) _ = env.send(1) assert values == list(range(start, max_steps)) assert env.is_closed with pytest.raises(gym.error.ClosedEnvironmentError): env.reset() with pytest.raises(gym.error.ClosedEnvironmentError): env.step(env.action_space.sample()) with pytest.raises(gym.error.ClosedEnvironmentError): for i, _ in zip(range(5), env): assert False @pytest.mark.parametrize("batch_size", [3, 5]) def test_step_limit_with_vectorized_env(batch_size): start = 0 target = 10 starting_values = [start for i in range(batch_size)] targets = [target for i in range(batch_size)] env = SyncVectorEnv( [ partial(DummyEnvironment, start=start, target=target, max_value=target * 2) for start, target in zip(starting_values, targets) ] ) env = ObservationLimit(env, max_steps=3 * batch_size) obs = env.reset() obs, reward, done, info = env.step(env.action_space.sample()) # obs, reward, done, info = env.step(env.action_space.sample()) obs = env.reset() assert env.is_closed with pytest.raises(gym.error.ClosedEnvironmentError): env.reset() with pytest.raises(gym.error.ClosedEnvironmentError): _ = env.step(env.action_space.sample()) @pytest.mark.parametrize("batch_size", [3, 5]) def test_step_limit_with_vectorized_env_partial_final_batch(batch_size): """In the case where the batch size isn't a multiple of the max observations, the env returns ceil(max_obs / batch_size) * batch_size observations in total. TODO: If we ever get to few-shot learning or something like that, we might have to care about this. """ start = 0 target = 10 starting_values = [start for i in range(batch_size)] targets = [target for i in range(batch_size)] env = SyncVectorEnv( [ partial(DummyEnvironment, start=start, target=target, max_value=target * 2) for start, target in zip(starting_values, targets) ] ) env = ObservationLimit(env, max_steps=3 * batch_size + 1) obs = env.reset() assert not env.is_closed obs, reward, done, info = env.step(env.action_space.sample()) obs, reward, done, info = env.step(env.action_space.sample()) assert not env.is_closed # obs, reward, done, info = env.step(env.action_space.sample()) obs = env.reset() assert env.is_closed with pytest.raises(gym.error.ClosedEnvironmentError): env.reset() with pytest.raises(gym.error.ClosedEnvironmentError): _ = env.step(env.action_space.sample()) ================================================ FILE: sequoia/common/gym_wrappers/pixel_observation.py ================================================ """ Fixes some of the annoying things about the PixelObservationWrapper. """ from typing import Union import gym import numpy as np from gym.wrappers.pixel_observation import PixelObservationWrapper as PixelObservationWrapper_ from sequoia.common.spaces.image import Image from .utils import IterableWrapper class PixelObservationWrapper(PixelObservationWrapper_): """Less annoying version of gym's `PixelObservationWrapper`: - Resets the environment before calling the constructor (fixes crash). - Makes the popup window non-visible when rendering with mode="rgb_array". - State is always pixels instead of dict with pixels at key 'pixels' - TODO: What if we wanted to also have access to the state? We might have to revert this change at some point. - `reset()` returns the pixels. """ def __init__(self, env: Union[str, gym.Env]): if isinstance(env, str): env = gym.make(env) env.reset() super().__init__(env) pixel_space = self.observation_space["pixels"] self.observation_space = Image.from_box(pixel_space) from gym.envs.classic_control.rendering import Viewer self.viewer: Viewer if self.env.viewer is None: self.env.render(mode="rgb_array") if self.env.viewer is not None: self.viewer: Viewer = env.viewer self.viewer.window.set_visible(False) def step(self, *args, **kwargs): state, reward, done, info = super().step(*args, **kwargs) state = state["pixels"] state = self.to_array(state) return state, reward, done, info def reset(self, *args, **kwargs): self.state = super().reset()["pixels"] self.state = self.to_array(self.state) return self.state def render(self, mode: str = "human", **kwargs): if mode == "human" and self.viewer and not self.viewer.window.visible: self.viewer.window.set_visible(True) return super().render(mode=mode, **kwargs) def to_array(self, image) -> np.ndarray: if not isinstance(image, np.ndarray): # TODO: There is something weird happening here, something to do # with the image having a negative stride dimension or something # like that. Also, ideally, we would return a numpy array (without # depending on pytorch here) from sequoia.common.transforms.to_tensor import to_tensor return to_tensor(image) return np.array(image.copy()) return image class ImageObservations(IterableWrapper): def __init__(self, env: gym.Env): super().__init__(env=env) self.observation_space = Image.wrap(self.env.observation_space) ================================================ FILE: sequoia/common/gym_wrappers/pixel_observation_test.py ================================================ import gym import numpy as np import pytest from .pixel_observation import PixelObservationWrapper pyglet = pytest.importorskip("pyglet") def test_passing_string_to_constructor(): env = PixelObservationWrapper("CartPole-v0") assert env.observation_space.shape == (400, 600, 3) def test_observation_space(): env = PixelObservationWrapper(gym.make("CartPole-v0")) assert env.observation_space.shape == (400, 600, 3) def test_reset_gives_pixels(): with PixelObservationWrapper(gym.make("CartPole-v0")) as env: start_state = env.reset() assert start_state.shape == (400, 600, 3) assert start_state.dtype == np.uint8 def test_step_obs_is_pixels(): with PixelObservationWrapper(gym.make("CartPole-v0")) as env: env.reset() obs, _, _, _ = env.step(env.action_space.sample()) assert obs.shape == (400, 600, 3) assert obs.dtype == np.uint8 def test_state_attribute_is_pixels(): with PixelObservationWrapper(gym.make("CartPole-v0")) as env: env.reset() assert env.state.shape == (400, 600, 3) assert env.state.dtype == np.uint8 def test_render_rgb_array(): with PixelObservationWrapper(gym.make("CartPole-v0")) as env: window = env.viewer.window for i in range(50): obs, _, done, _ = env.step(env.action_space.sample()) state = env.render(mode="rgb_array") assert state.shape == (400, 600, 3) assert state.dtype == np.uint8 if done: env.reset() def test_render_with_human_mode(): with PixelObservationWrapper(gym.make("CartPole-v0")) as env: window = env.viewer.window for i in range(50): obs, _, done, _ = env.step(env.action_space.sample()) env.render(mode="human") assert obs.shape == (400, 600, 3) if done: env.reset() assert env.viewer.window is window def test_render_with_human_mode_with_env_dataset(): from .env_dataset import EnvDataset with PixelObservationWrapper(gym.make("CartPole-v0")) as env: env = EnvDataset(env) window = env.viewer.window obs = env.reset() for i, batch in zip(range(500), env): obs = batch env.render(mode="human") assert obs.shape == (400, 600, 3) action = env.action_space.sample() rewards = env.send(action) assert env.viewer.window is window ================================================ FILE: sequoia/common/gym_wrappers/policy_env.py ================================================ """TODO: Idea: create a wrapper that accepts a 'policy' which will decide an action to take whenever the `action` argument to the `step` method is None. This policy should then accept the 'state' or something like that. """ from dataclasses import dataclass from typing import Any, Callable, Dict, Generic, Iterable, Iterator, Optional, Tuple, TypeVar import gym from torch.utils.data import IterableDataset from sequoia.common.batch import Batch from sequoia.utils.logging_utils import get_logger from .utils import StepResult logger = get_logger(__name__) # from sequoia.settings.base.environment import Environment # from sequoia.settings.base.objects import (ActionType, ObservationType, RewardType) ObservationType = TypeVar("ObservationType") ActionType = TypeVar("ActionType") RewardType = TypeVar("RewardType") # Just for type hinting purposes. class Environment(gym.Env, Generic[ObservationType, ActionType, RewardType]): def step(self, action: ActionType) -> Tuple[ObservationType, RewardType, bool, Dict]: raise NotImplementedError def reset(self) -> ObservationType: raise NotImplementedError DatasetItem = TypeVar("DatasetItem") # Type annotation for functions that will create the items of the # IterableDataset below, given the current 'Context', DatasetItemCreator = Callable[ [ ObservationType, # 'current' state ActionType, # actions applied on the 'current' state ObservationType, # resulting 'next' state RewardType, # rewards associated with the transition above bool, # Wether the 'next' state is final (i.e. the last in an episode) Dict, # the 'info' dict associated with the 'next' state (from Env.step) ], DatasetItem, ] @dataclass(frozen=True) class StateTransition(Batch, Generic[ObservationType, ActionType]): observation: ObservationType action: ActionType next_observation: ObservationType # IDEA: Instead of creating extra properties like this, we could have fields # like 'field(aliases="bob")', and getattr and setattr would get/set the # corresponding attribute when an alias is used instead of the actual name. @property def state(self) -> ObservationType: return self.observation @property def next_state(self) -> ObservationType: return self.next_observation # By default, the PolicyEnv will yield this kind of item: DefaultDatasetItem = Tuple[StateTransition, RewardType] def default_dataset_item_creator( observations: ObservationType, actions: ActionType, next_observations: ObservationType, rewards: RewardType, done: bool, info: Dict = None, ) -> DefaultDatasetItem: """Create an item of the IterableDataset below, given the current 'context'. Parameters ---------- observations : Observations The 'starting' observations/state. actions : Actions The actions that were taken in the 'starting' state. next_observations : Observations The resulting observations in the 'end' state. rewards : Rewards The reward associated with that state transition and action. done : bool Wether the 'end' observations/state are the last of an episode. info : Dict, optional Info dict associated with the 'next' observation, by default None. Returns ------- Tuple[StateTransition, Rewards] A Tuple of the form `Tuple[Tuple[Observations, Actions, Observations], Rewards]`. NOTE: `done` and `info` aren't used here, but you could use them in your own version of this function that you'd then pass to the PolicyEnv constructor or to the `set_policy` method. """ state_transition = StateTransition(observations, actions, next_observations) return state_transition, rewards class PolicyEnv(gym.Wrapper, IterableDataset, Iterable[DatasetItem]): """Wrapper for an environment that adds the following capabilities: 1. Makes it possible to call step(None), in which case the policy will be used to determine the action to take given the current observation and the action space. 2. Creates an 'IterableDataset' from the env, where one iteration over the dataset is equivalent to one episode/trajectory in the environment. The types of items yielded by this iterator can be customized by passing a different callable to `make_dataset_item`. The default items are of type `Tuple[StateTransition, Rewards]`, where `StateTransition` is a tuple-like object of the form `Tuple`. """ def __init__( self, env: Environment[ObservationType, ActionType, RewardType], policy: Optional[Callable[[Tuple], Any]] = None, make_dataset_item: DatasetItemCreator = default_dataset_item_creator, ): super().__init__(env) self.make_dataset_item = make_dataset_item self.policy = policy self._step_result: Optional[StepResult] = None self._closed = False self._reset = False self._n_episodes: int = 0 self._n_steps: int = 0 self._n_steps_in_episode: int = 0 self._observation: Optional[Observations] = None self._action: Optional[Actions] = None def set_policy(self, policy: Callable[[ObservationType, gym.Space], ActionType]) -> None: """Sets a new policy to be used to generate missing actions.""" self.policy = policy def step(self, action: Optional[Any] = None) -> StepResult: if action is None: if self.policy is None: raise RuntimeError("Need to have a policy set, since action is None.") if self._observation is None: raise RuntimeError("Reset should have been called before calling step") # Get the 'filler' action using the current policy. action = self.policy(self._observation, self.action_space) if action not in self.action_space: raise RuntimeError( f"The policy returned an action which isn't " f"in the action space: {action}" ) step_result = StepResult(*self.env.step(action)) self._observation = step_result[0] self._n_steps += 1 self._n_steps_in_episode += 1 return step_result def close(self) -> None: self.env.close() self._reset = False self._closed = True self._observation = None def reset(self, *args, **kwargs) -> None: self._observation = self.env.reset(*args, **kwargs) self._reset = True self._n_steps_in_episode = 0 return self._observation def __iter__(self) -> Iterator[DatasetItem]: """Iterator for an episode/trajectory in the env. This uses the policy to iteratively perform an episode in the env, and yields items at each step, which are the result of the `make_dataset_item` function. By default, these items are of the form `Tuple, rewards>`. Returns ------- Iterable[DatasetItem] Iterable for a 'trajectory' in the env. Yields ------- DatasetItem The result of `make_dataset_item(current_context)`, by default a tuple of . Raises ------ RuntimeError If no policy is set. """ if not self.policy: raise RuntimeError("Need to have a policy set in order to iterate " "on this env.") if not self._reset: # Reset the env, if needed. previous_observations = self.reset() else: # The env was just reset, so the observation was set to # self._observation. assert self._observation is not None previous_observations = self._observation logger.debug(f"Start of episode {self._n_episodes}") done = False while not done: logger.debug(f"steps (episode): {self._n_steps_in_episode}, total: {self._n_steps}") # Get the batch of actions using the policy. actions = self.policy(previous_observations, self.action_space) observations, rewards, done, info = self.step(actions) # TODO: Need to figure out what to yield here.. yield self.make_dataset_item( observations=previous_observations, actions=actions, next_observations=observations, rewards=rewards, done=done, info=info, ) # Update the 'previous' observation. previous_observations = observations if not isinstance(done, bool): if any(done): raise RuntimeError( "done should either be a bool or always false, since " "we can't do partial resets." ) done = False self._n_episodes += 1 logger.debug(f"Episode has ended.") self._reset = False ================================================ FILE: sequoia/common/gym_wrappers/policy_env_test.py ================================================ from typing import List from sequoia.conftest import DummyEnvironment from .policy_env import PolicyEnv, StateTransition def test_iterating_with_policy(): env = DummyEnvironment() env = PolicyEnv(env) env.seed(123) actions = [0, 1, 1, 2, 1, 1, 1, 1] expected_obs = [0, 0, 1, 2, 1, 2, 3, 4, 5] expected_rewards = [5, 4, 3, 4, 3, 2, 1, 0] expected_dones = [False, False, False, False, False, False, False, True] # Expect the transitions to have this form. expected_transitions = list(zip(expected_obs[0:], actions[0:], expected_obs[1:])) reset_obs = 0 # obs = env.reset() # assert obs == reset_obs n_calls = 0 def custom_policy(observations, action_space): # Deteministic policy used for testing purposes. nonlocal n_calls action = actions[n_calls] n_calls += 1 return action n_expected_transitions = len(actions) env.set_policy(custom_policy) actual_transitions: List[StateTransition] = [] i = 0 for i, batch in enumerate(env): print(f"Step {i}: batch: {batch}") state_transition, reward = batch actual_transitions.append(state_transition) observation, action, next_observation = state_transition.as_tuple() assert observation == expected_obs[i] assert next_observation == expected_obs[i + 1] assert action == actions[i] assert reward == expected_rewards[i] assert i == n_expected_transitions - 1 assert len(actual_transitions) == n_expected_transitions assert [v.as_tuple() for v in actual_transitions] == expected_transitions ================================================ FILE: sequoia/common/gym_wrappers/smooth_environment.py ================================================ """TODO: A Wrapper that creates smooth transitions between tasks. Could be based on the MultiTaskEnvironment, but with a moving average update of the task, rather than setting a brand new random task. There could also be some kind of 'task_duration' parameter, and the model does linear or smoothed-out transitions between them depending on the step number? """ from typing import Any, Callable, Dict, List, Optional, Union import gym import numpy as np from gym import spaces from sequoia.common.spaces.sparse import Sparse from sequoia.utils.logging_utils import get_logger from .multi_task_environment import MultiTaskEnvironment, add_task_labels logger = get_logger(__name__) ## TODO (@lebrice): Really cool idea!: Create a TaskSchedule class that inherits # from Dict and when you __getitem__ a missing key, returns an interpolation! class SmoothTransitions(MultiTaskEnvironment): """Extends MultiTaskEnvironment to support smooth task boudaries. Same as `MultiTaskEnvironment`, but when in between two tasks, the environment will have its values set to a linear interpolation of the attributes from the two neighbouring tasks. ``` env = gym.make("CartPole-v0") env = SmoothTransitions(env, task_schedule={ 10: dict(length=1.0), 20: dict(length=2.0), }) env.seed(123) env.reset() ``` At step 0, the length is the default value (0.5) at step 1, the length is 0.5 + (1 / 10) * (1.0-0.5) = 0.55 at step 2, the length is 0.5 + (2 / 10) * (1.0-0.5) = 0.60, etc. NOTE: This only works with float attributes at the moment. """ def __init__( self, env: gym.Env, task_schedule: Dict[int, Dict[str, float]] = None, task_params: List[str] = None, noise_std: float = 0.2, add_task_dict_to_info: bool = False, add_task_id_to_obs: bool = False, new_random_task_on_reset: bool = False, starting_step: int = 0, nb_tasks: int = None, max_steps: int = None, seed: int = None, only_update_on_episode_end: bool = False, ): """Wraps the environment, allowing for smooth task transitions. Same as `MultiTaskEnvironment`, but when in between two tasks, the environment will have its values set to a linear interpolation of the attributes from the two neighbouring tasks. TODO: Should we update the task paramers only on resets? or at each step? Might save a little bit of compute to only do it on resets, but then it's not exactly as 'smooth' as we would like it to be, especially if a single episode can be very long! NOTE: Assumes that the attributes are floats for now. Args: env (gym.Env): The gym environment to wrap. task_schedule (Dict[int, Dict[str, float]], optional) (Same as `MultiTaskEnvironment`): Dict mapping from a given step to the attributes to be set at that time. Interpolations between the two neighbouring tasks will be used between task transitions. only_update_on_episode_end (bool, optional): When `False` (default), update the attributes of the environment smoothly after each step. When `True`, only update at the end of episodes (when `reset()` is called). """ if task_schedule: if not all(isinstance(value, dict) for value in task_schedule.values()): raise RuntimeError("Task schedule values should be dicts of attributes to change.") task_params = list( set().union(*[task_dict.keys() for task_dict in task_schedule.values()]) ) elif not task_params: raise RuntimeError( "This wrapper needs either a `task_schedule` or `task_params` (the environment " "attributes to modify)" ) super().__init__( env, task_schedule=task_schedule, task_params=task_params, noise_std=noise_std, add_task_dict_to_info=add_task_dict_to_info, add_task_id_to_obs=add_task_id_to_obs, new_random_task_on_reset=new_random_task_on_reset, starting_step=starting_step, nb_tasks=nb_tasks, max_steps=max_steps, seed=seed, ) self.only_update_on_episode_end = only_update_on_episode_end if self._max_steps is None and len(self.task_schedule) > 1: # TODO: DO we want to prevent going past the 'task step' in the task schedule? pass if isinstance(self.env.unwrapped, gym.vector.VectorEnv): raise NotImplementedError( "This isn't really supposed to be applied on top of a " "vectorized environment, rather, it should be used within each" " individual env." ) if self.add_task_id_to_obs: nb_tasks = nb_tasks if nb_tasks is not None else len(self.task_schedule) self.observation_space = add_task_labels( self.env.observation_space, Sparse(spaces.Discrete(n=nb_tasks), sparsity=1.0), ) def step(self, *args, **kwargs): if not self.only_update_on_episode_end: self.smooth_update() results = super().step(*args, **kwargs) return results def reset(self, **kwargs): # TODO: test this out. if self.only_update_on_episode_end: self.smooth_update() return super().reset(**kwargs) @property def current_task_id(self) -> Optional[int]: """Returns the 'index' of the current task within the task schedule. In this case, we return None, since there aren't clear task boundaries. """ return None def task_array(self, task: Dict[str, float]) -> np.ndarray: return np.array([task.get(k, self.default_task[k]) for k in self.task_params]) def smooth_update(self) -> None: """Update the curren_task at every step, based on a smooth mix of the previous and the next task. Every time we reach a _step that is in the task schedule, we update the 'prev_task_step' and 'next_task_step' attributes. """ current_task: Dict[str, float] = {} for attr in self.task_params: steps: List[int] = [] # list of the fixed_points: List[float] = [] for step, task in sorted(self.task_schedule.items()): steps.append(step) fixed_points.append(task.get(attr, self.default_task[attr])) # logger.debug(f"{attr}: steps={steps}, fp={fixed_points}") interpolated_value: float = np.interp( x=self.steps, xp=steps, fp=fixed_points, ) current_task[attr] = interpolated_value # logger.debug(f"interpolated value of {attr} at step {self.step}: {interpolated_value}") # logger.debug(f"Updating task at step {self.step}: {current_task}") self.current_task = current_task ================================================ FILE: sequoia/common/gym_wrappers/smooth_environment_test.py ================================================ from typing import Dict import gym import matplotlib.pyplot as plt import numpy as np from .smooth_environment import SmoothTransitions def test_task_schedule(): environment_name = "CartPole-v0" # wandb.init(name="SSCL/RL_testing/smooth", monitor_gym=True) original = gym.make(environment_name) starting_length = original.length starting_gravity = original.gravity end_length = 5 * starting_length end_gravity = 5 * starting_gravity total_steps = 100 # Increase the length linearly up to 3 times the starting value. # Increase the gravity linearly up to 5 times the starting value. task_schedule: Dict[int, Dict[str, float]] = { # 0: dict(length=starting_length, gravity=starting_gravity), total_steps: dict(length=end_length, gravity=end_gravity), } env = SmoothTransitions( original, task_schedule=task_schedule, ) # env = gym.wrappers.Monitor(env, f"recordings/smooth_{environment_name}", force=True) env.seed(123) env.reset() assert env.gravity == starting_gravity assert env.length == starting_length # plt.ion() params: Dict[int, Dict[str, float]] = {} for step in range(total_steps): expected_steps = starting_length + (step / total_steps) * (end_length - starting_length) expected_gravity = starting_gravity + (step / total_steps) * ( end_gravity - starting_gravity ) _, reward, done, _ = env.step(env.action_space.sample()) assert np.isclose(env.length, expected_steps) assert np.isclose(env.gravity, expected_gravity) # env.render() # if done: # env.reset() params[step] = env.current_task.copy() # print(f"New task: {env.current_task_dict()}") # assert False, params[step] env.close() # plt.ioff() plt.close() def test_update_only_on_reset(): """Test that when using the 'only_update_on_episode_end' argument with a value of True, the smooth updates don't occur during the episodes, but only once after an episode has ended (when `reset()` is called). """ total_steps = 100 original = gym.make("CartPole-v0") start_length = original.length end_length = 10.0 task_schedule = {total_steps: dict(length=end_length)} env = SmoothTransitions( original, task_schedule=task_schedule, only_update_on_episode_end=True, ) env.reset() env.seed(123) expected_length = start_length for i in range(total_steps): assert env.steps == i _, _, done, _ = env.step(env.action_space.sample()) assert env.steps == i + 1 if done: _ = env.reset() expected_length = start_length + ((i + 1) / total_steps) * (end_length - start_length) assert np.isclose(env.length, expected_length) def test_task_id_is_always_None(): total_steps = 100 original = gym.make("CartPole-v0") start_length = original.length end_length = 10.0 task_schedule = {total_steps: dict(length=end_length)} env = SmoothTransitions( original, task_schedule=task_schedule, only_update_on_episode_end=True, add_task_id_to_obs=True, add_task_dict_to_info=True, ) for observation in (env.observation_space.sample() for i in range(100)): x, task_id = observation["x"], observation["task_labels"] assert task_id is None env.reset() env.seed(123) expected_length = start_length for i in range(total_steps): assert env.steps == i obs, _, done, _ = env.step(env.action_space.sample()) x, task_id = obs["x"], obs["task_labels"] assert task_id is None assert env.steps == i + 1 if done: obs = env.reset() x, task_id = obs["x"], obs["task_labels"] assert task_id is None expected_length = start_length + ((i + 1) / total_steps) * (end_length - start_length) assert np.isclose(env.length, expected_length) ================================================ FILE: sequoia/common/gym_wrappers/step_callback_wrapper.py ================================================ """TODO: Make a wrapper that calls a given function/callback when a given step is reached. """ from abc import ABC, abstractmethod from typing import Callable, List, Tuple, Union import gym from .utils import IterableWrapper class Callback(Callable[[int, gym.Env], None], ABC): @abstractmethod def __call__(self, step: int, env: gym.Env, step_results: Tuple) -> None: raise NotImplementedError() class StepCallback(Callback, ABC): def __init__(self, step: int, func: Callable[[int, gym.Env, Tuple], None] = None): self.step = step self.func = func def __call__(self, step: int, env: gym.Env, step_results: Tuple) -> None: if self.func: return self.func(step, env, step_results) raise NotImplementedError("Create your own callback or pass a func to use.") class PeriodicCallback(Callback): def __init__(self, period: int, offset: int = 0, func: Callable[[int, gym.Env], None] = None): self.period = period self.offset = offset self.func = func def __call__(self, step: int, env: gym.Env, step_results: Tuple) -> None: if self.func: return self.func(step, env, step_results) raise NotImplementedError("Create your own callback or pass a func to use.") class StepCallbackWrapper(IterableWrapper): """Wrapper that will execute some callbacks when certain steps are reached.""" def __init__( self, env: gym.Env, callbacks: List[Callback] = None, ): super().__init__(env) self._steps = 0 self.callbacks = callbacks or [] def add_callback(self, callback: Union[Callback]) -> None: self.callbacks.append(callback) def add_step_callback(self, step: int, callback: Callable[[int, gym.Env], None]): if isinstance(callback, StepCallback): assert step == callback.step else: callback = StepCallback(step=step, func=callback) self.add_callback(callback) def add_periodic_callback(self, period: int, callback: StepCallback, offset: int = 0): if isinstance(callback, PeriodicCallback): assert period == callback.period assert offset == callback.offset else: callback = PeriodicCallback(period=period, offset=offset, func=callback) self.add_callback(callback) def step(self, action): step_results = super().step(action) for callback in self.callbacks: if isinstance(callback, StepCallback): if callback.step == self._steps: callback(self._steps, self, step_results) elif isinstance(callback, PeriodicCallback): if ( self._steps >= callback.offset and (self._steps - callback.offset) % callback.period == 0 ): callback(self._steps, self, step_results) else: # if it's a callable, just call it all the time, assuming that # it will use some condition in it's __call__ to check wether # it should be executed or not. callback(self._steps, self, step_results) self._steps += 1 return step_results ================================================ FILE: sequoia/common/gym_wrappers/step_callback_wrapper_test.py ================================================ from typing import Tuple import gym from .step_callback_wrapper import PeriodicCallback, StepCallback, StepCallbackWrapper i: int = 0 def increment_i(step: int, env: gym.Env, step_results: Tuple): global i print(f"Incrementing i at step {step}: ({i} -> {i+1})") i += 1 def decrement_i(step: int, env: gym.Env, step_results: Tuple): global i print(f"Decrementing i at step {step}: ({i} -> {i-1})") i -= 1 def test_step_callback(): callback = StepCallback(step=7, func=increment_i) env = StepCallbackWrapper(gym.make("CartPole-v0"), callbacks=[callback]) env.reset() global i i = 0 for step in range(10): obs, reward, done, info = env.step(env.action_space.sample()) if step < 7: assert i == 0 else: assert i == 1 if done: env.reset() env.close() def test_periodic_callback(): global i i = 0 inc_callback = PeriodicCallback(period=5, func=increment_i) dec_callback = PeriodicCallback(period=5, func=decrement_i, offset=2) env = StepCallbackWrapper(gym.make("CartPole-v0"), callbacks=[inc_callback, dec_callback]) env.reset() def _next(env) -> int: obs, reward, done, info = env.step(env.action_space.sample()) if done: env.reset() return i assert _next(env) == 1 assert _next(env) == 1 assert _next(env) == 0 assert _next(env) == 0 assert _next(env) == 0 assert _next(env) == 1 assert _next(env) == 1 assert _next(env) == 0 assert _next(env) == 0 assert _next(env) == 0 env.close() ================================================ FILE: sequoia/common/gym_wrappers/transform_wrappers.py ================================================ from typing import Callable, Union import typing import gym from gym import Space, spaces from gym.wrappers import TransformObservation as TransformObservation_ from gym.wrappers import TransformReward as TransformReward_ from sequoia.common.gym_wrappers.convert_tensors import add_tensor_support, has_tensor_support from sequoia.common.transforms.compose import Compose from sequoia.common.transforms.transform import Transform # if typing.TYPE_CHECKING: # from sequoia.common.transforms.transform import Transform from sequoia.utils.logging_utils import get_logger from .utils import IterableWrapper logger = get_logger(__name__) class TransformObservation(TransformObservation_, IterableWrapper): def __init__(self, env: gym.Env, f: Union[Callable, Compose]): if isinstance(f, list) and not callable(f): f = Compose(f) super().__init__(env, f=f) self.f: "Transform" # try: self.observation_space = self(self.env.observation_space) if has_tensor_support(self.env.observation_space): self.observation_space = add_tensor_support(self.observation_space) # except Exception as e: # logger.warning(UserWarning( # f"Don't know how the transform {self.f} will impact the " # f"observation space! (Exception: {e})" # )) def __call__(self, *args, **kwargs): return self.f(*args, **kwargs) def __iter__(self): if self.wrapping_passive_env: # TODO: For now, we assume that the passive environment has already # split stuff correctly for us to use. for obs, rewards in self.env: yield self(obs), rewards else: return super().__iter__() class TransformReward(TransformReward_, IterableWrapper): def __init__(self, env: gym.Env, f: Union[Callable, Compose]): if isinstance(f, list) and not callable(f): f = Compose(f) super().__init__(env, f=f) self.f: Compose # Modify the reward space, if it exists. if hasattr(self.env, "reward_space"): self.reward_space = self.env.reward_space else: self.reward_space = spaces.Box( low=self.env.reward_range[0], high=self.env.reward_range[1], shape=(), ) try: self.reward_space = self.f(self.reward_space) logger.debug(f"New reward space after transform: {self.reward_space}") except Exception as e: logger.warning( UserWarning( f"Don't know how the transform {self.f} will impact the " f"reward space! (Exception: {e})" ) ) class TransformAction(IterableWrapper): def __init__(self, env: gym.Env, f: Callable[[Union[gym.Env, Space]], Union[gym.Env, Space]]): if isinstance(f, list) and not callable(f): f = Compose(f) super().__init__(env) self.f: Compose = f # Modify the action space by applying the transform onto it. self.action_space = self.env.action_space if isinstance(self.f, Transform): self.action_space = self.f(self.env.action_space) # logger.debug(f"New action space after transform: {self.observation_space}") def step(self, action): return self.env.step(self.action(action)) def action(self, action): return self.f(action) ================================================ FILE: sequoia/common/gym_wrappers/transform_wrappers_test.py ================================================ import gym import numpy as np from sequoia.common.spaces import Image from sequoia.common.transforms import Compose, Transforms from sequoia.conftest import monsterkong_required from .transform_wrappers import TransformObservation @monsterkong_required def test_compose_on_image_space(): in_space = Image(0, 255, shape=(64, 64, 3), dtype=np.uint8) transform = Compose([Transforms.to_tensor, Transforms.three_channels]) expected = Image(0, 1.0, shape=(3, 64, 64), dtype=np.float32) actual = transform(in_space) assert actual == expected env = gym.make("MetaMonsterKong-v0") assert env.observation_space == gym.spaces.Box(0, 255, (64, 64, 3), np.uint8) assert env.observation_space == in_space wrapped_env = TransformObservation(env, transform) assert wrapped_env.observation_space == expected import pytest import torch from torchvision.datasets import MNIST from sequoia.common.transforms import Compose @pytest.mark.skipif(not torch.cuda.is_available(), reason="Need cuda for this test.") def test_move_wrapper_and_iteration(): batch_size = 1 transforms = Compose([Transforms.to_tensor]) dataset = MNIST("data", transform=transforms) obs_space = Image(0, 255, (1, 28, 28), np.uint8) obs_space = transforms(obs_space) from sequoia.settings.sl.environment import PassiveEnvironment env = PassiveEnvironment( dataset, batch_size=batch_size, n_classes=10, observation_space=obs_space, ) from functools import partial from sequoia.utils.generic_functions import move from .transform_wrappers import TransformReward env = TransformObservation(env, partial(move, device="cuda")) env = TransformReward(env, partial(move, device="cuda")) obs, rewards_next = next(iter(env)) rewards_send = env.send(env.action_space.sample()) assert obs.device.type == "cuda" assert rewards_next.device.type == "cuda" assert rewards_send.device.type == "cuda" ================================================ FILE: sequoia/common/gym_wrappers/utils.py ================================================ import inspect from abc import ABC from functools import partial from typing import ( Any, Callable, Dict, Generic, Iterator, NamedTuple, Optional, Sequence, Tuple, Type, TypeVar, Union, ) import warnings import gym import numpy as np from gym.envs import registry from gym.envs.classic_control import ( AcrobotEnv, CartPoleEnv, Continuous_MountainCarEnv, MountainCarEnv, PendulumEnv, ) from gym.envs.registration import load from gym.vector import VectorEnv from torch.utils.data import IterableDataset from sequoia.utils.logging_utils import get_logger classic_control_envs = ( AcrobotEnv, CartPoleEnv, PendulumEnv, MountainCarEnv, Continuous_MountainCarEnv, ) classic_control_env_prefixes: Tuple[str, ...] = ( "CartPole", "Pendulum", "Acrobot", "MountainCar", "MountainCarContinuous", ) logger = get_logger(__name__) def is_classic_control_env(env: Union[str, gym.Env, Type[gym.Env]]) -> bool: """Returns `True` if the given env id, env class, or env instance is a classic-control env. Parameters ---------- env : Union[str, gym.Env] Env id, or env class, or env instance. Returns ------- bool Wether the given env is a classic-control env from Gym. Examples: >>> import gym >>> is_classic_control_env("CartPole-v0") True >>> is_classic_control_env("Breakout-v1") False >>> is_classic_control_env("bob") False >>> from gym.envs.classic_control import CartPoleEnv >>> is_classic_control_env(CartPoleEnv) True """ if isinstance(env, partial): if env.func is gym.make and isinstance(env.args[0], str): logger.warning( RuntimeWarning( "Don't pass partial(gym.make, 'some_env'), just use the env string instead." ) ) env = env.args[0] if isinstance(env, str): try: spec = registry.spec(env) if isinstance(spec.entry_point, str): return "gym.envs.classic_control" in spec.entry_point if inspect.isclass(spec.entry_point): env = spec.entry_point except gym.error.Error as e: # malformed env id, for instance. logger.debug(f"can't tell if env id {env} is a classic-control env! ({e})") return False if inspect.isclass(env): return issubclass(env, classic_control_envs) if isinstance(env, gym.Env): return isinstance(env.unwrapped, classic_control_envs) return False def is_proxy_to(env, env_type_or_types: Union[Type[gym.Env], Tuple[Type[gym.Env], ...]]) -> bool: """Returns wether `env` is a proxy to an env of the given type or types.""" from sequoia.client.env_proxy import EnvironmentProxy return isinstance(env.unwrapped, EnvironmentProxy) and issubclass( env.unwrapped._environment_type, env_type_or_types ) def is_atari_env(env: Union[str, gym.Env]) -> bool: """Returns `True` if the given env id, env class, or env instance is a Atari environment. Parameters ---------- env : Union[str, gym.Env] Env id, or env class, or env instance. Returns ------- bool Wether the given env is an Atari env from Gym. Examples: >>> import gym >>> is_atari_env("CartPole-v0") False >>> is_atari_env("bob") False >>> # is_atari_env("ALE/Breakout-v5") # True >>> # is_atari_env("Breakout-v0") # True NOTE: Removing this doctest, since recent changes to gym have changed this a bit. >>> #from gym.envs import atari >>> #is_atari_env(atari.AtariEnv) # requires atari_py to be installed # True """ from sequoia.settings.rl.envs import ATARI_PY_INSTALLED if not isinstance(env, (str, gym.Env)): raise RuntimeError(f"`env` needs to be either a str or gym env, not {env}") if isinstance(env, str): try: spec = registry.spec(env) except gym.error.NameNotFound: return False except gym.error.NamespaceNotFound: return False if spec.namespace is None: return False return spec.namespace is "ALE" if not ATARI_PY_INSTALLED: return False raise NotImplementedError(f"TODO: Check if isinstance(env.unwrapped, AtariEnv)") if isinstance(env, partial): if env.func is gym.make and isinstance(env.args[0], str): logger.warning( RuntimeWarning( "Don't pass partial(gym.make, 'some_env'), just use the env string instead." ) ) env = env.args[0] # assert False, [env_spec for env_spec in registry.all()] if isinstance(env, str): # and env.startswith("Breakout"): try: spec = registry.spec(env) if isinstance(spec.entry_point, str): return "gym.envs.atari" in spec.entry_point or "ale_py" in spec.entry_point if inspect.isclass(spec.entry_point): env = spec.entry_point except gym.error.Error as e: # malformed env id, for instance. logger.debug(f"can't tell if env id {env} is an atari env! ({e})") return False try: from gym.envs import atari AtariEnv = atari.AtariEnv if inspect.isclass(env) and issubclass(env, AtariEnv): return True return isinstance(env, gym.Env) and isinstance(env.unwrapped, AtariEnv) except (ImportError, gym.error.DependencyNotInstalled): return False return False def get_env_class(env: Union[str, gym.Env, Type[gym.Env], Callable[[], gym.Env]]) -> Type[gym.Env]: if isinstance(env, partial): if env.func is gym.make and isinstance(env.args[0], str): return get_env_class(env.args[0]) return get_env_class(env.func) if isinstance(env, str): return load(env) if isinstance(env, gym.Wrapper): return type(env.unwrapped) if isinstance(env, gym.Env): return type(env) if inspect.isclass(env) and issubclass(env, gym.Env): return env raise NotImplementedError(f"Don't know how to get the class of env being used by {env}!") def is_monsterkong_env(env: Union[str, gym.Env, Callable[[], gym.Env]]) -> bool: if isinstance(env, str): return env.lower().startswith(("metamonsterkong", "monsterkong")) try: from meta_monsterkong.make_env import MetaMonsterKongEnv if inspect.isclass(env): return issubclass(env, MetaMonsterKongEnv) if isinstance(env, gym.Env): return isinstance(env, MetaMonsterKongEnv) return False except ImportError: return False logger = get_logger(__name__) EnvType = TypeVar("EnvType", bound=gym.Env) ObservationType = TypeVar("ObservationType") ActionType = TypeVar("ActionType") RewardType = TypeVar("RewardType") class StepResult(NamedTuple): observation: ObservationType reward: RewardType done: Union[bool, Sequence[bool]] info: Union[Dict, Sequence[Dict]] def has_wrapper( env: gym.Wrapper, wrapper_type_or_types: Union[Type[gym.Wrapper], Tuple[Type[gym.Wrapper], ...]], ) -> bool: """Returns wether the given `env` has a wrapper of type `wrapper_type`. Args: env (gym.Wrapper): a gym.Wrapper or a gym environment. wrapper_type (Type[gym.Wrapper]): A type of Wrapper to check for. Returns: bool: Wether there is a wrapper of that type wrapping `env`. """ # avoid cycles, although that would be very weird to encounter. while hasattr(env, "env") and env.env is not env: if isinstance(env, wrapper_type_or_types): return True env = env.env return isinstance(env, wrapper_type_or_types) class MayCloseEarly(gym.Wrapper, ABC): """ABC for Wrappers that may close an environment early depending on some conditions. WIP: Also prevents calling `step` and `reset` on a closed env. """ def __init__(self, env: gym.Env): super().__init__(env) self._is_closed: bool = False def is_closed(self) -> bool: # First, make sure that we're not 'overriding' the 'is_closed' of the # wrapped environment. if hasattr(self.env, "is_closed"): assert callable(self.env.is_closed) self._is_closed = self.env.is_closed() return self._is_closed def closed_error_message(self) -> str: """Return the error message to use when attempting to use the closed env. This can be useful for wrappers that close when a given condition is reached, e.g. a number of episodes has been performed, which could return a more relevant message here. """ return "Env is closed" def reset(self, **kwargs): if self.is_closed(): raise gym.error.ClosedEnvironmentError( f"Can't call `reset()`: {self.closed_error_message()}" ) return super().reset(**kwargs) def step(self, action): if self.is_closed(): raise gym.error.ClosedEnvironmentError( f"Can't call `step()`: {self.closed_error_message()}" ) return super().step(action) def close(self) -> None: if self.is_closed(): # TODO: Prevent closing an environment twice? return # raise gym.error.ClosedEnvironmentError(self.closed_error_message()) self.env.close() self._is_closed = True from .env_dataset import EnvDataset class IterableWrapper(MayCloseEarly, IterableDataset, Generic[EnvType], ABC): """ABC for a gym Wrapper that supports iterating over the environment. This allows us to wrap dataloader-based Environments and still use the gym wrapper conventions, as well as iterate over a gym environment as in the Active-dataloader case. NOTE: We have IterableDataset as a base class here so that we can pass a wrapped env to the DataLoader function. This wrapper however doesn't perform the actual iteration, and instead depends on the wrapped environment already supporting iteration. """ def __init__(self, env: gym.Env): super().__init__(env) from sequoia.settings.sl import PassiveEnvironment self.wrapping_passive_env = isinstance(self.unwrapped, PassiveEnvironment) @property def is_vectorized(self) -> bool: """Returns wether this wrapper is wrapping a vectorized environment.""" return isinstance(self.unwrapped, VectorEnv) def __next__(self): # TODO: This is tricky. We want the wrapped env to use *our* step, # reset(), action(), observation(), reward() methods, instead of its own! # Otherwise if we are transforming observations for example, those won't # be affected. # logger.debug(f"Wrapped env {self.env} isnt a PolicyEnv or an EnvDataset") # return type(self.env).__next__(self) from sequoia.settings.rl.environment import ActiveDataLoader # from sequoia.settings.sl.environment import PassiveEnvironment if has_wrapper(self.env, EnvDataset) or is_proxy_to( self.env, (EnvDataset, ActiveDataLoader) ): obs, reward, done, info = self.step(self.unwrapped.action_) return obs # raise RuntimeError(f"WIP: Dropping this '__next__' API in RL.") # logger.debug(f"Wrapped env is an EnvDataset, using EnvDataset.__iter__.") # return EnvDataset.__next__(self) # return EnvDataset.__next__(self) return self.env.__next__() # return self.observation(obs) def observation(self, observation): # logger.debug(f"Observation won't be transformed.") return observation def action(self, action): return action def reward(self, reward): return reward # def __len__(self): # return self.env.__len__() def get_length(self) -> Optional[int]: """Attempts to return the "length" (in number of steps/batches) of this env. When not possible, returns None. NOTE: This is a bit ugly, but the idea seems alright. """ try: # Try to call self.__len__() without recursing into the wrapped env: return len(self) except TypeError: pass try: # Try to call self.env.__len__() without recursing into the wrapped^2 env: return len(self.env) except TypeError: pass try: # Try to call self.env.__len__(), allowing recursing down the chain: return self.env.__len__() except TypeError: pass try: # If all else fails, delegate to the wrapped env's length() method, if any: return self.env.get_length() except AttributeError: pass # In the worst case, return None, meaning that we don't have a length. return None def send(self, action): # TODO: Make `send` use `self.step`, that way wrappers can apply the same way to # RL and SL environments. if self.wrapping_passive_env: action = self.action(action) reward = self.env.send(action) reward = self.reward(reward) return reward self.unwrapped.action_ = action ( self.unwrapped.observation_, self.unwrapped.reward_, self.unwrapped.done_, self.unwrapped.info_, ) = self.step(action) return self.unwrapped.reward_ # (Option 1 below) # return self.env.send(action) # (Option 2 below) # return self.env.send(self.action(action)) # (Option 3 below) # return type(self.env).send(self, action) # (Following option 4 below) # if has_wrapper(self.env, EnvDataset): # # logger.debug(f"Wrapped env is an EnvDataset, using EnvDataset.send.") # return EnvDataset.send(self, action) # if hasattr(self.env, "send"): # action = self.action(action) # reward = self.env.send(action) # reward = self.reward(reward) # return reward def __iter__(self) -> Iterator: # TODO: Pretty sure this could be greatly simplified by just always using the loop from EnvDataset. if self.wrapping_passive_env: # NOTE: Also applies the `self.observation` `self.reward` methods while # iterating. for obs, rewards in self.env: obs = self.observation(obs) if rewards is not None: rewards = self.reward(rewards) yield obs, rewards else: self.unwrapped.observation_ = self.reset() self.unwrapped.done_ = False self.unwrapped.action_ = None self.unwrapped.reward_ = None # Yield the first observation_. yield self.unwrapped.observation_ if self.unwrapped.action_ is None: raise RuntimeError( f"You have to send an action using send() between every " f"observation. (env = {self})" ) def done_is_true(done: Union[bool, np.ndarray, Sequence[bool]]) -> bool: return done if isinstance(done, bool) or not done.shape else all(done) while not any([done_is_true(self.unwrapped.done_), self.is_closed()]): # logger.debug(f"step {self.n_steps_}/{self.max_steps}, (episode {self.n_episodes_})") # Set those to None to force the user to call .send() self.unwrapped.action_ = None self.unwrapped.reward_ = None yield self.unwrapped.observation_ if self.unwrapped.action_ is None: raise RuntimeError( f"You have to send an action using send() between every " f"observation. (env = {self})" ) # assert False, "WIP" # Option 1: Return the iterator from the wrapped env. This ignores # everything in the wrapper. # return self.env.__iter__() # Option 2: apply the transformations on the items yielded by the # iterator of the wrapped env (this doesn't use the self.observaion(), self.action()) # from .transform_wrappers import TransformObservation, TransformAction, TransformReward # return map(self.observation, self.env.__iter__()) # Option 3: Calling the method on the wrapped env, but with `self` being # the wrapper, rather than the wrapped env: # return type(self.env).__iter__(self) # Option 4: Slight variation on option 3: We cut straight to the # EnvDataset iterator. # from sequoia.settings.rl.environment import ActiveDataLoader # from sequoia.settings.sl.environment import PassiveEnvironment # if has_wrapper(self.env, EnvDataset) or is_proxy_to( # self.env, (EnvDataset, ActiveDataLoader) # ): # # logger.debug(f"Wrapped env is an EnvDataset, using EnvDataset.__iter__ with the wrapper as `self`.") # return EnvDataset.__iter__(self) # # TODO: Should probably remove this since we don't actually use this 'PolicyEnv'. # if has_wrapper(self.env, PolicyEnv) or is_proxy_to(self.env, PolicyEnv): # # logger.debug(f"Wrapped env is a PolicyEnv, will use PolicyEnv.__iter__ with the wrapper as `self`.") # return PolicyEnv.__iter__(self) # # NOTE: This works even though IterableDataset isn't a gym.Wrapper. # if not has_wrapper(self.env, IterableDataset) and not isinstance( # self.env, DataLoader # ): # logger.warning( # UserWarning( # f"Will try to iterate on a wrapper for env {self.env} which " # f"doesn't have the EnvDataset or PolicyEnv wrappers and isn't " # f"an IterableDataset." # ) # ) # # if isinstance(self.env, DataLoader): # # return self.env.__iter__() # # raise NotImplementedError(f"Wrapper {self} doesn't know how to iterate on {self.env}.") # return self.env.__iter__() # @property # def wrapping_passive_env(self) -> bool: # """ Returns wether this wrapper is applied over a 'passive' env, in which case # iterating over the env will yield (up to) 2 items, rather than just 1. # """ # from sequoia.settings.sl.environment import PassiveEnvironment # return isinstance(self.unwrapped, PassiveEnvironment) or is_proxy_to( # self, PassiveEnvironment # ) # def __setattr__(self, attr, value): # """ # TODO: Remove/replace this: # Redirect the __setattr__ of attributes 'owned' by the EnvDataset to # the EnvDataset. # We need to do this because we change the value of `self` and call # EnvDataset.__iter__(self), which might get and set attributes to/from # `self`, which is what you'd expect normally. However when `self` is a # wrapper over the env, rather than the env itself, then when attributes # are set on `self` inside __iter__ or __next__ or send, etc, they are # actually set on the wrapper, rather than on the env. # We solve this by detecting when an attribute with a name ending with "_" # and part of a given list of attributes is set. # """ # if attr.endswith("_") and has_wrapper(self.env, EnvDataset): # if attr in { # "observation_", # "action_", # "reward_", # "done_", # "info_", # "n_sends_", # }: # # logger.debug(f"Attribute {attr} will be set on the wrapped env rather than on the wrapper itself.") # env = self.env # while not isinstance(env, EnvDataset) and env.env is not env: # env = env.env # assert isinstance(env, EnvDataset) # setattr(env, attr, value) # else: # object.__setattr__(self, attr, value) class RenderEnvWrapper(IterableWrapper): """Simple Wrapper that renders the env at each step.""" def __init__(self, env: gym.Env, display: Any = None): super().__init__(env) # TODO: Maybe use the given display? def step(self, action): self.env.render("human") return self.env.step(action) def tile_images(img_nhwc): """ TAKEN FROM https://github.com/openai/gym/pull/1624/files Tile N images into one big PxQ image (P,Q) are chosen to be as close as possible, and if N is square, then P=Q. input: img_nhwc, list or array of images, ndim=4 once turned into array n = batch index, h = height, w = width, c = channel returns: bigim_HWc, ndarray with ndim=3 """ img_nhwc = np.asarray(img_nhwc) N, h, w, c = img_nhwc.shape if c not in {1, 3}: img_nhwc = img_nhwc.transpose([0, 2, 3, 1]) N, h, w, c = img_nhwc.shape assert c in {1, 3} H = int(np.ceil(np.sqrt(N))) W = int(np.ceil(float(N) / H)) img_nhwc = np.array(list(img_nhwc) + [img_nhwc[0] * 0 for _ in range(N, H * W)]) img_HWhwc = img_nhwc.reshape(H, W, h, w, c) img_HhWwc = img_HWhwc.transpose(0, 2, 1, 3, 4) img_Hh_Ww_c = img_HhWwc.reshape(H * h, W * w, c) return img_Hh_Ww_c if __name__ == "__main__": import doctest doctest.testmod() ================================================ FILE: sequoia/common/gym_wrappers/utils_test.py ================================================ import gym import pytest from gym.wrappers import ClipAction from gym.wrappers.pixel_observation import PixelObservationWrapper from sequoia.conftest import param_requires_pyglet from .pixel_observation import PixelObservationWrapper from .utils import has_wrapper @pytest.mark.parametrize( "env,wrapper_type,result", [ param_requires_pyglet( lambda: PixelObservationWrapper(gym.make("CartPole-v0")), ClipAction, False ), param_requires_pyglet( lambda: PixelObservationWrapper(gym.make("CartPole-v0")), PixelObservationWrapper, True ), param_requires_pyglet( lambda: PixelObservationWrapper(gym.make("CartPole-v0")), PixelObservationWrapper, True ), # param_requires_atari_py(AtariPreprocessing(gym.make("ALE/Breakout-v5")), ClipAction, True), ], ) def test_has_wrapper(env, wrapper_type, result): assert has_wrapper(env(), wrapper_type) == result ================================================ FILE: sequoia/common/hparams/__init__.py ================================================ """ Utilities for creating hyper-parameter dataclasses and their fields. """ from simple_parsing.helpers.hparams import categorical, log_uniform, loguniform, uniform from simple_parsing.helpers.hparams.hyperparameters import HyperParameters, Point ================================================ FILE: sequoia/common/layers.py ================================================ import math from typing import Callable, List, Optional, Tuple, Union import numpy as np import torch from gym import spaces from torch import Tensor, nn from sequoia.common.spaces.image import Image from sequoia.utils.generic_functions import singledispatchmethod from sequoia.utils.logging_utils import get_logger logger = get_logger(__name__) class Lambda(nn.Module): def __init__(self, func: Callable): super().__init__() self.func = func def forward(self, x): return self.func(x) class Reshape(nn.Module): def __init__(self, target_shape: Union[List[int], Tuple[int, ...]]): self.target_shape = target_shape super().__init__() def forward(self, inputs): return inputs.reshape([inputs.shape[0], *self.target_shape]) class ConvBlock(nn.Module): def __init__( self, in_channels: int, out_channels: int, kernel_size: int = 3, padding: int = 1, **kwargs ): super().__init__() self.in_channels = in_channels self.out_channels = out_channels self.kernel_size = kernel_size self.conv = nn.Conv2d( in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, padding=padding, **kwargs, ) self.norm = nn.BatchNorm2d(out_channels) self.relu = nn.ReLU() self.pool = nn.MaxPool2d(2) def forward(self, x): x = self.conv(x) x = self.norm(x) x = self.relu(x) return self.pool(x) class DeConvBlock(nn.Module): """Block that performs: Upsample (2x) Conv BatchNorm2D Relu Conv BatchNorm2D Relu (optional) """ def __init__( self, in_channels: int, out_channels: int, hidden_channels: Optional[int] = None, kernel_size: int = 3, padding: int = 1, last_relu: bool = True, **kwargs, ): self.in_channels = in_channels self.out_channels = out_channels self.hidden_channels = hidden_channels or out_channels self.kernel_size = kernel_size self.last_relu = last_relu super().__init__() self.upsample = nn.Upsample(scale_factor=2) self.conv1 = nn.Conv2d( in_channels=in_channels, out_channels=self.hidden_channels, kernel_size=kernel_size, padding=padding, **kwargs, ) self.norm1 = nn.BatchNorm2d(self.hidden_channels) self.conv2 = nn.Conv2d( in_channels=self.hidden_channels, out_channels=out_channels, kernel_size=kernel_size, padding=padding, **kwargs, ) self.norm2 = nn.BatchNorm2d(self.hidden_channels) self.relu = nn.ReLU() def forward(self, x): x = self.upsample(x) x = self.conv1(x) x = self.norm1(x) x = self.relu(x) x = self.conv2(x) x = self.norm2(x) if self.last_relu: x = self.relu(x) return x def n_output_features( in_features: int, padding: int = 1, kernel_size: int = 3, stride: int = 1 ) -> int: """Calculates the number of output features of a conv2d layer given its parameters.""" return math.floor((in_features + 2 * padding - kernel_size) / stride) + 1 class Conv2d(nn.Conv2d): @singledispatchmethod def forward(self, input: Union[Image, Tensor]) -> Union[Tensor, Image]: return super().forward(input) @forward.register(Image) def _(self, input: Image) -> Image: assert input.channels_first, f"Need channels first inputs for conv2d: {input}" # NOTE: Not strictly necessary for computing the output space, but it would be # better for the input space to already have a batch size, since conv2d only # accepts 4-dimensional inputs. # assert input.batch_size, ( # f"Image space should be batched, since conv2d only accepts 4-dimensional " # f"inputs. (input={input})" # ) assert input.channels == self.in_channels, ( f"Input space doesn't have the right number of channels: " f"input.channels: {input.channels} != self.in_channels: {self.in_channels}" ) new_height = n_output_features( input.height, padding=self.padding[0], kernel_size=self.kernel_size[0], stride=self.stride[0], ) new_width = n_output_features( input.width, padding=self.padding[1], kernel_size=self.kernel_size[1], stride=self.stride[1], ) new_channels = self.out_channels new_shape = [new_channels, new_height, new_width] if input.batch_size: new_shape.insert(0, input.batch_size) output_space: Image = type(input)(low=-np.inf, high=np.inf, shape=new_shape) output_space.channels_first = True return output_space class MaxPool2d(nn.MaxPool2d): @singledispatchmethod def forward(self, input: Union[Image, Tensor]) -> Union[Tensor, Image]: return super().forward(input) @forward.register(Image) def _(self, input: Image) -> Image: assert input.channels_first, f"Need channels first inputs: {input}" # assert not self.padding, "assuming no padding for now." padding = [self.padding] * 2 if isinstance(self.padding, int) else self.padding kernel_size = ( [self.kernel_size] * 2 if isinstance(self.kernel_size, int) else self.kernel_size ) stride = [self.stride] * 2 if isinstance(self.stride, int) else self.stride new_height = n_output_features( input.height, padding=padding[0], kernel_size=kernel_size[0], stride=stride[0], ) new_width = n_output_features( input.width, padding=padding[1], kernel_size=kernel_size[1], stride=stride[1], ) new_channels = input.channels new_shape = [new_channels, new_height, new_width] if input.batch_size: new_shape.insert(0, input.batch_size) output_space: Image = type(input)(low=-np.inf, high=np.inf, shape=new_shape) output_space.channels_first = True # assert False, (self.forward(torch.as_tensor([input.sample()])).shape, output_space) return output_space class Sequential(nn.Sequential): # NB: We can't really type check this function as the type of input # may change dynamically (as is tested in # TestScript.test_sequential_intermediary_types). Cannot annotate # with Any as TorchScript expects a more precise type def forward(self, input): if isinstance(input, spaces.Space): space = input for module in self: try: space = module(space) except: if isinstance(space, (spaces.Box, Image)): # Apply the module to a sample from the space, and create an # output space of the same shape. space = Image.from_box(space) in_sample: Tensor = torch.as_tensor(space.sample()) if not space.batch_size: in_sample = in_sample.unsqueeze(0) out_sample = module(in_sample) out_space = type(space)(low=-np.inf, high=np.inf, shape=out_sample.shape) space = out_space else: logger.debug( f"Unable to apply module {module} on space {space}: assuming that it doesn't change the space." ) return space return super().forward(input) ================================================ FILE: sequoia/common/loss.py ================================================ """ Module that defines a `Loss` class that holds losses and associated metrics. This Loss object is used to bundle together the Loss and the Metrics. Loss objects are used to simplify training with multiple "loss signals" (e.g. in Self-Supervised Learning) by keeping track of the contribution of each individual 'task' to the total loss, as well as their corresponding metrics. For example: >>> from pprint import pprint >>> loss = Loss("total") >>> loss += Loss("task_a", loss=1.23, metrics={"accuracy": 0.95}) >>> loss += Loss("task_b", loss=torch.Tensor([2.10])) >>> loss += Loss("task_c", loss=3.00) >>> log_dict = loss.to_log_dict() >>> pprint(log_dict) {'total/loss': tensor([6.3300]), 'total/task_a/accuracy': 0.95, 'total/task_a/loss': 1.23, 'total/task_b/loss': tensor([2.1000]), 'total/task_c/loss': 3.0} Another feature of Loss objects is that they can automatically generate relevant metrics when the associated tensors are passed. For example, consider a classification problem: >>> # some fake classification logits. >>> y_pred = torch.Tensor([ ... [.8, .1, .1], ... [.0, .9, .1], ... [.0, .1, .9], ... ]) >>> y = [0, 1, 1] >>> loss = Loss("test", y_pred=y_pred, y=y) >>> loss.metric ClassificationMetrics(n_samples=3, accuracy=0.666667) Or, consider a regression problem: >>> y_true = [0.0, 1.0, 2.0, 3.0] >>> y_pred = [0.0, 1.0, 2.0, 5.0] # mse = 1/4 * (5-3)**2 == 1.0 >>> reg_loss = Loss("test", y_pred=y_pred, y=y_true) >>> reg_loss.metric RegressionMetrics(n_samples=4, mse=tensor(1.), l1_error=tensor(0.5000)) See the `Loss` constructor for more info on which tensors are accepted. """ from collections.abc import Mapping as MappingABC from dataclasses import InitVar, dataclass, fields from typing import Any, ClassVar, Dict, Iterable, List, Optional, Tuple, Union import torch from simple_parsing import field from simple_parsing.helpers import dict_field from torch import Tensor from sequoia.utils.logging_utils import cleanup, get_logger from sequoia.utils.serialization import Serializable from sequoia.utils.utils import add_dicts, add_prefix from .metrics import ClassificationMetrics, Metrics, RegressionMetrics, get_metrics logger = get_logger(__name__) @dataclass class Loss(Serializable, MappingABC): """Object used to store the losses and metrics. Used to simplify the return type of the different `get_loss` functions and also to help in debugging models that use a combination of different loss signals. TODO: Add some kind of histogram plot to show the relative contribution of each loss signal? TODO: Maybe create a `make_plots()` method to create wandb plots? """ name: str loss: Tensor = 0.0 # type: ignore losses: Dict[str, "Loss"] = dict_field() # NOTE: By setting to_dict=False below, we don't include the tensors when # serializing the attributes. # TODO: Does that also mean that the tensors can't be pickled (moved) by # pytorch-lightning during training? Is there a case where that would be # useful? tensors: Dict[str, Tensor] = dict_field(repr=False, to_dict=False) # Dictionary of metrics related to this loss. For example, could be the Accuracy. # TODO: Test out using this with metrics from `torchmetrics`. metrics: Dict[str, Union[Metrics, Tensor]] = dict_field() # When multiplying the Loss by a value, this keep track of the coefficients # used, so that if we wanted to we could recover the 'unscaled' loss. _coefficient: Union[float, Tensor] = field(1.0, repr=False) x: InitVar[Optional[Tensor]] = None h_x: InitVar[Optional[Tensor]] = None y_pred: InitVar[Optional[Tensor]] = None y: InitVar[Optional[Tensor]] = None _field_names: ClassVar[Tuple[str, ...]] def __post_init__( self, x: Tensor = None, h_x: Tensor = None, y_pred: Tensor = None, y: Tensor = None ): if isinstance(self.name, dict): # TODO: ugly-ish 'hack', we need to do this because of the infamous # 'apply_to_collection' function, which does a Loss({k: v for k, v in loss.items()}) # Check that all other fields are empty, so we're not overwriting anything. assert (isinstance(self.loss, float) or not self.loss.shape) and self.loss == 0.0 assert not self.metrics assert not self.losses assert not self.tensors assert self._coefficient == 1.0 field_values = self.name self.name = field_values.pop("name") for k, v in field_values.items(): setattr(self, k, v) assert self.name, "Loss objects should be given a name!" if self.name not in self.metrics: # Create a Metrics object if given the necessary tensors. metrics = get_metrics(x=x, h_x=h_x, y_pred=y_pred, y=y) if metrics: self.metrics[self.name] = metrics self._device: torch.device = None for name in list(self.tensors.keys()): tensor = self.tensors[name] if not isinstance(tensor, Tensor): self.tensors[name] = torch.as_tensor(tensor) elif self._device is None: self._device = tensor.device if "_field_names" not in type(self).__dict__: type(self)._field_names = tuple(f.name for f in fields(self)) def __contains__(self, key: str) -> bool: if isinstance(key, str): return key in type(self)._field_names return NotImplemented def __getitem__(self, key: str) -> Any: if key not in self: raise KeyError(key) return getattr(self, key) def __iter__(self) -> Iterable[str]: return type(self)._field_names def __len__(self) -> int: return len(type(self)._field_names) @property def total_loss(self) -> Tensor: return self.loss @property def requires_grad(self) -> bool: """Returns wether the loss tensor in this object requires grad.""" return isinstance(self.loss, Tensor) and self.loss.requires_grad def backward(self, *args, **kwargs): """Calls `self.loss.backward(*args, **kwargs)`.""" return self.loss.backward(*args, **kwargs) @property def metric(self) -> Optional[Metrics]: """Shortcut for `self.metrics[self.name]`. Returns: Optional[Metrics]: The main metrics associated with this Loss. """ return self.metrics.get(self.name) @metric.setter def metric(self, value: Metrics) -> None: """Shortcut for `self.metrics[self.name] = value`. Parameters ---------- value : Metrics The main metrics associated with this Loss. """ assert self.name not in self.metrics, "There's already be a metric?" self.metrics[self.name] = value @property def accuracy(self) -> float: if isinstance(self.metric, ClassificationMetrics): return self.metric.accuracy @property def mse(self) -> Tensor: assert isinstance(self.metric, RegressionMetrics), self return self.metric.mse def __add__(self, other: Union["Loss", Any]) -> "Loss": """Adds two Loss instances together. Adds the losses, total loss and metrics. Overwrites the tensors. Keeps the name of the first one. This is useful when doing something like: ``` loss = Loss("Test") for x, y in dataloader: loss += model.get_loss(x=x, y=y) ``` Returns ------- Loss The merged/summed up Loss. """ if other == 0: return self if not isinstance(other, Loss): return NotImplemented name = self.name loss = self.loss + other.loss if self.name == other.name: losses = add_dicts(self.losses, other.losses) metrics = add_dicts(self.metrics, other.metrics) else: # IDEA: when the names don't match, store the entire Loss # object into the 'losses' dict, rather than a single loss tensor. losses = add_dicts(self.losses, {other.name: other}) # TODO: setting in the 'metrics' dict, we are duplicating the # metrics, since they now reside in the `self.metrics[other.name]` # and `self.losses[other.name].metrics` attributes. metrics = self.metrics # metrics = add_dicts(self.metrics, {other.name: other.metrics}) tensors = add_dicts(self.tensors, other.tensors, add_values=False) return Loss( name=name, loss=loss, losses=losses, tensors=tensors, metrics=metrics, _coefficient=self._coefficient, ) def __iadd__(self, other: Union["Loss", Any]) -> "Loss": """Adds Loss to `self` in-place. Adds the losses, total loss and metrics. Overwrites the tensors. Keeps the name of the first one. This is useful when doing something like: ``` loss = Loss("Test") for x, y in dataloader: loss += model.get_loss(x=x, y=y) ``` Returns ------- Loss `self`: The merged/summed up Loss. """ self.loss = self.loss + other.loss if self.name == other.name: self.losses = add_dicts(self.losses, other.losses) self.metrics = add_dicts(self.metrics, other.metrics) else: # IDEA: when the names don't match, store the entire Loss # object into the 'losses' dict, rather than a single loss tensor. self.losses = add_dicts(self.losses, {other.name: other}) self.tensors = add_dicts(self.tensors, other.tensors, add_values=False) return self def __radd__(self, other: Any): """Addition operator for when forward addition returned `NotImplemented`. For example, doing something like `None + Loss()` will use __radd__, whereas doing `Loss() + None` will use __add__. """ if other is None: return self elif other == 0: return self if isinstance(other, Tensor): # TODO: Other could be a loss tensor, maybe create a Loss object for it? pass return NotImplemented def __mul__(self, factor: Union[float, Tensor]) -> "Loss": """Scale each loss tensor by `coefficient`. Returns ------- Loss returns a scaled Loss instance. """ result = Loss( name=self.name, loss=self.loss * factor, losses={k: value * factor for k, value in self.losses.items()}, metrics=self.metrics, tensors=self.tensors, _coefficient=self._coefficient * factor, ) return result def __rmul__(self, factor: Union[float, Tensor]) -> "Loss": # assert False, f"rmul: {factor}" return self.__mul__(factor) def __truediv__(self, coefficient: Union[float, Tensor]) -> "Loss": return self * (1 / coefficient) @property def unscaled_losses(self): """Recovers the 'unscaled' version of this loss. TODO: This isn't used anywhere. We could probably remove it. """ return {k: value / self._coefficient for k, value in self.losses.items()} def to_log_dict(self, verbose: bool = False) -> Dict[str, Union[str, float, Dict]]: """Creates a dictionary to be logged (e.g. by `wandb.log`). Args: verbose (bool, optional): Wether to include a lot of information, or to only log the 'essential' stuff. See the `cleanup` function for more info. Defaults to False. Returns: Dict: A dict containing the things to be logged. """ # TODO: Could also produce some wandb plots and stuff here when verbose? log_dict: Dict[str, Union[str, float, Dict, Tensor]] = {} # log_dict["loss"] = round(float(self.loss), 6) # Preserving the Torch Dtype, if present. log_dict["loss"] = self.loss for name, metric in self.metrics.items(): if isinstance(metric, Serializable): log_dict[name] = metric.to_log_dict(verbose=verbose) else: log_dict[name] = metric for name, loss in self.losses.items(): if isinstance(loss, Serializable): log_dict[name] = loss.to_log_dict(verbose=verbose) else: log_dict[name] = loss log_dict = add_prefix(log_dict, prefix=self.name, sep="/") keys_to_remove: List[str] = [] if not verbose: # when NOT verbose, remove any entries with this matching key. # TODO: add/remove keys here if you want to customize what doesn't get logged to wandb. # TODO: Could maybe make this a class variable so that it could be # extended/overwritten, but that sounds like a bit too much rn. keys_to_remove = [ "n_samples", "name", "confusion_matrix", "class_accuracy", "_coefficient", ] result = cleanup(log_dict, keys_to_remove=keys_to_remove, sep="/") return result def to_pbar_message(self) -> Dict[str, float]: """Smaller, less-detailed version of `to_log_dict()` for progress bars.""" # NOTE: PL actually doesn't seem to accept strings as values message: Dict[str, Union[str, float]] = {} message["Loss"] = float(self.loss) for name, metric in self.metrics.items(): if isinstance(metric, Metrics): message[name] = metric.to_pbar_message() else: message[name] = metric for name, loss_info in self.losses.items(): message[name] = loss_info.to_pbar_message() message = add_prefix(message, prefix=self.name, sep=" ") return cleanup(message, sep=" ") def clear_tensors(self) -> None: """Clears the `tensors` attribute of `self` and of sublosses. NOTE: This could be useful if you want to save some space/compute, but it isn't being used atm, and there's no issue. You might want to call this if you are storing big tensors (or passing them to the constructor) """ self.tensors.clear() for _, loss in self.losses.items(): loss.clear_tensors() return self def absorb(self, other: "Loss") -> None: """Absorbs `other` into `self`, merging the losses and metrics. Args: other (Loss): Another loss to 'merge' into this one. """ new_name = self.name old_name = other.name # Here we create a new 'other' and use __iadd__ to merge the attributes. new_other = Loss(name=new_name) new_other.loss = other.loss # We also replace the name in the keys, if present. new_other.metrics = {k.replace(old_name, new_name): v for k, v in other.metrics.items()} new_other.losses = {k.replace(old_name, new_name): v for k, v in other.losses.items()} self += new_other def all_metrics(self) -> Dict[str, Metrics]: """Returns a 'cleaned up' dictionary of all the Metrics objects.""" assert self.name result: Dict[str, Metrics] = {} result.update(self.metrics) for name, loss in self.losses.items(): # TODO: Aren't we potentially colliding with 'self.metrics' here? subloss_metrics = loss.all_metrics() for key, metric in subloss_metrics.items(): assert key not in result, ( f"Collision in metric keys of subloss {name}: key={key}, " f"result={result}" ) result[key] = metric result = add_prefix(result, prefix=self.name, sep="/") return result if __name__ == "__main__": import doctest doctest.testmod() ================================================ FILE: sequoia/common/loss_test.py ================================================ """ TODO: Write some tests that also help illustrate how the Loss class works. """ from .loss import Loss def test_demo(): """Simple test to demonstrate addition of Loss objects.""" loss = Loss("total") loss += Loss("task_a", loss=1.23, metrics={"accuracy": 0.95}) loss += Loss("task_b", loss=2.10) loss += Loss("task_c", loss=3.00) # Get a dict to be logged, for example with wandb. loss_dict = loss.to_log_dict() assert loss_dict == { "total/loss": 6.33, "total/task_a/loss": 1.23, "total/task_a/accuracy": 0.95, "total/task_b/loss": 2.1, "total/task_c/loss": 3.0, } def test_all_metrics(): """Using `all_metrics()` gives a dict of all the metrics in the Loss.""" loss = Loss("total") loss += Loss("task_a", loss=1.23, metrics={"accuracy": 0.95}) loss += Loss("task_b", loss=2.10) loss += Loss("task_c", loss=3.00) assert loss.all_metrics() == { "total/task_a/accuracy": 0.95, } def test_to_log_dict_order(): """Simple test to demonstrate addition of Loss objects.""" task_a_loss = Loss("task_a", loss=1.23, metrics={"accuracy": 0.95}) task_b_loss = Loss("task_b", loss=2.10) task_c_loss = Loss("task_c", loss=3.00) total_loss = Loss("total") + task_a_loss + task_b_loss + task_c_loss loss_dict = total_loss.to_log_dict() assert loss_dict == { "total/loss": 6.33, "total/task_a/loss": 1.23, "total/task_a/accuracy": 0.95, "total/task_b/loss": 2.1, "total/task_c/loss": 3.0, } ================================================ FILE: sequoia/common/metrics/__init__.py ================================================ from .classification import ClassificationMetrics from .get_metrics import get_metrics from .metrics import Metrics, MetricsType from .metrics_utils import accuracy, class_accuracy, get_class_accuracy, get_confusion_matrix from .regression import RegressionMetrics from .rl_metrics import EpisodeMetrics, GradientUsageMetric ================================================ FILE: sequoia/common/metrics/classification.py ================================================ """ Metrics class for classification. Gives the accuracy, the class accuracy, and the confusion matrix for a given set of (raw/pre-activation) logits Tensor `y_pred` and the class labels `y`. """ from dataclasses import InitVar, dataclass from typing import Dict, Optional, Union import numpy as np import torch from simple_parsing import field from torch import Tensor from sequoia.utils.serialization import detach, move from .metrics import Metrics from .metrics_utils import get_accuracy, get_class_accuracy, get_confusion_matrix # TODO: Might be a good idea to add a `task` attribute to Metrics or # Loss objects, in order to check that we aren't adding the class # accuracies or confusion matrices from different tasks by accident. # We could also maybe add them but fuse them properly, for instance by # merging the class accuracies and confusion matrices? # # For example, if a first metric has class accuracy [0.1, 0.5] # (n_samples=100) and from a task with classes [0, 1] is added to a # second Metrics with class accuracy [0.9, 0.8] (n_samples=100) for task # with classes [0,3], the resulting Metrics object would have a # class_accuracy of [0.5 (from (0.1+0.9)/2 = 0.5), 0.5, 0 (no data), 0.8] # n_samples would then also have to be split on a per-class basis. # n_samples could maybe be just the sum of the confusion matrix entries? # # As for the confusion matrices, they could be first expanded to fit the # range of both by adding empty columns/rows to each and then be added # together. @dataclass class ClassificationMetrics(Metrics): # fields we generate from the confusion matrix (if provided) or from the # forward pass tensors. accuracy: float = 0.0 confusion_matrix: Optional[Union[Tensor, np.ndarray]] = field( default=None, repr=False, compare=False ) class_accuracy: Optional[Union[Tensor, np.ndarray]] = field( default=None, repr=False, compare=False ) # Optional arguments used to create the attributes of the metrics above. # NOTE: These wont become attributes on the object, just args to postinit. x: InitVar[Optional[Tensor]] = None h_x: InitVar[Optional[Tensor]] = None logits: InitVar[Optional[Tensor]] = None y_pred: InitVar[Optional[Tensor]] = None y: InitVar[Optional[Tensor]] = None num_classes: InitVar[Optional[int]] = None def __post_init__( self, x: Tensor = None, h_x: Tensor = None, logits: Tensor = None, y_pred: Tensor = None, y: Tensor = None, num_classes: int = None, ): super().__post_init__(x=x, h_x=h_x, logits=logits, y_pred=y_pred, y=y) if ( self.confusion_matrix is None and (y_pred is not None or logits is not None) and y is not None ): self.confusion_matrix = get_confusion_matrix( y_pred=logits if logits is not None else y_pred, y=y, num_classes=num_classes ) # TODO: add other useful metrics (potentially ones using x or h_x?) if self.confusion_matrix is not None: self.accuracy = get_accuracy(self.confusion_matrix) self.accuracy = round(self.accuracy, 6) self.class_accuracy = get_class_accuracy(self.confusion_matrix) @property def objective_name(self) -> str: return "Accuracy" def __add__(self, other: "ClassificationMetrics") -> "ClassificationMetrics": confusion_matrix: Optional[Tensor] = None if self.n_samples == 0: return other if not isinstance(other, ClassificationMetrics): return NotImplemented # Create the 'sum' confusion matrix: confusion_matrix: Optional[np.ndarray] = None if self.confusion_matrix is None and other.confusion_matrix is not None: confusion_matrix = other.confusion_matrix.clone() elif other.confusion_matrix is None: confusion_matrix = self.confusion_matrix.clone() else: confusion_matrix = self.confusion_matrix + other.confusion_matrix result = ClassificationMetrics( n_samples=self.n_samples + other.n_samples, confusion_matrix=confusion_matrix, num_classes=self.num_classes, ) return result def to_log_dict(self, verbose=False): log_dict = super().to_log_dict(verbose=verbose) log_dict["accuracy"] = self.accuracy if verbose: # Maybe add those as plots, rather than tensors? log_dict["class_accuracy"] = self.class_accuracy log_dict["confusion_matrix"] = self.confusion_matrix return log_dict # def __str__(self): # s = super().__str__() # s = s.replace(f"accuracy={self.accuracy}", f"accuracy={self.accuracy:.3%}") # return s def to_pbar_message(self) -> Dict[str, Union[str, float]]: message = super().to_pbar_message() message["acc"] = float(self.accuracy) return message def detach(self) -> "ClassificationMetrics": return ClassificationMetrics( n_samples=detach(self.n_samples), accuracy=float(self.accuracy), class_accuracy=detach(self.class_accuracy), confusion_matrix=detach(self.confusion_matrix), ) def to(self, device: Union[str, torch.device]) -> "ClassificationMetrics": """Returns a new Metrics with all the attributes 'moved' to `device`.""" return ClassificationMetrics( n_samples=move(self.n_samples, device), accuracy=move(self.accuracy, device), class_accuracy=move(self.class_accuracy, device), confusion_matrix=move(self.confusion_matrix, device), ) @property def objective(self) -> float: return float(self.accuracy) # def __lt__(self, other: Union["ClassificationMetrics", Any]) -> bool: # if isinstance(other, ClassificationMetrics): # return self.accuracy < other.accuracy # return NotImplemented # def __ge__(self, other: Union["ClassificationMetrics", Any]) -> bool: # if isinstance(other, ClassificationMetrics): # return self.accuracy >= other.accuracy # return NotImplemented # def __eq__(self, other: Union["ClassificationMetrics", Any]) -> bool: # if isinstance(other, ClassificationMetrics): # return self.accuracy == other.accuracy and self.n_samples == other.n_samples # return NotImplemented ================================================ FILE: sequoia/common/metrics/classification_test.py ================================================ import numpy as np import torch from .classification import ClassificationMetrics from .get_metrics import get_metrics def test_classification_metrics_add_properly(): y_pred = torch.as_tensor( [ [0.01, 0.90, 0.09], [0.01, 0, 0.99], [0.01, 0, 0.99], ] ) y = torch.as_tensor( [ 1, 2, 0, ] ) m1 = ClassificationMetrics(y_pred=y_pred, y=y) assert m1.n_samples == 3 assert np.isclose(m1.accuracy, 2 / 3) y_pred = torch.as_tensor( [ [0.01, 0.90, 0.09], [0.01, 0, 0.99], [0.01, 0, 0.99], [0.01, 0, 0.99], [0.01, 0, 0.99], ] ) y = torch.as_tensor( [ 1, 2, 2, 0, 0, ] ) m2 = ClassificationMetrics(y_pred=y_pred, y=y) assert m2.n_samples == 5 assert np.isclose(m2.accuracy, 3 / 5) assert all(np.isclose(m2.class_accuracy, [0, 1, 1])) m3 = m1 + m2 assert m3.n_samples == 8 assert np.isclose(m3.accuracy, 5 / 8) def test_metrics_from_tensors(): y_pred = torch.as_tensor( [ [0.01, 0.90, 0.09], [0.01, 0, 0.99], [0.01, 0, 0.99], ] ) y = torch.as_tensor( [ 1, 2, 0, ] ) m = get_metrics(y_pred=y_pred, y=y) assert m.n_samples == 3 assert np.isclose(m.accuracy, 2 / 3) ================================================ FILE: sequoia/common/metrics/get_metrics.py ================================================ """ Defines the get_metrics function with gives back appropriate metrics for the given tensors. TODO: Add more metrics! Maybe even fancy things that are based on the hidden vectors like wasserstein distance, etc? """ from typing import List, Optional, Union import numpy as np import torch from torch import Tensor from sequoia.utils.logging_utils import get_logger from .classification import ClassificationMetrics from .metrics import Metrics from .regression import RegressionMetrics logger = get_logger(__name__) def to_optional_tensor(x: Optional[Union[Tensor, np.ndarray, List]]) -> Optional[Tensor]: """Converts `x` into a Tensor if `x` is not None, else None.""" return x if x is None else torch.as_tensor(x) @torch.no_grad() def get_metrics( y_pred: Union[Tensor, np.ndarray], y: Union[Tensor, np.ndarray], x: Union[Tensor, np.ndarray] = None, h_x: Union[Tensor, np.ndarray] = None, ) -> Optional[Metrics]: y = to_optional_tensor(y) y_pred = to_optional_tensor(y_pred) x = to_optional_tensor(x) h_x = to_optional_tensor(h_x) if y is not None and y_pred is not None: if y.shape != y_pred.shape or not torch.is_floating_point(y): # TODO: I think this condition also works for binary classification, # at least when the logits have a shape[-1] == 2, but I don't know if it # would cause some trouble if there is a single logit, rather than 2. return ClassificationMetrics(x=x, h_x=h_x, y_pred=y_pred, y=y) return RegressionMetrics(x=x, h_x=h_x, y_pred=y_pred, y=y) return None ================================================ FILE: sequoia/common/metrics/metrics.py ================================================ """ Cute little dataclass that is used to describe a given type of Metrics. This is a bit like the Metrics from pytorch-lightning, but seems easier to use, as far as I know. Also totally transferable between gpus etc. (Haven't used the metrics from PL much yet, to be honest). """ from dataclasses import dataclass, field, fields from typing import Any, Dict, TypeVar, Union import numpy as np from torch import Tensor from sequoia.utils.serialization import Serializable MetricsType = TypeVar("MetricsType", bound="Metrics") @dataclass class Metrics(Serializable): # This field isn't used in comparisons between Metrics. n_samples: int = field(default=0, compare=False) # TODO: Refactor this to take any kwargs, and then let each metric type # specify its own InitVars. def __post_init__(self, **tensors): """Creates metrics given `y_pred` and `y`. NOTE: Doesn't use `x` and `h_x` for now. Args: x (Tensor, optional): The input Tensor. Defaults to None. h_x (Tensor, optional): The hidden representation for x. Defaults to None. y_pred (Tensor, optional): The predicted label. Defaults to None. y (Tensor, optional): The true label. Defaults to None. """ # get the batch size: for tensor in tensors.values(): if isinstance(tensor, (np.ndarray, Tensor)) and tensor.shape: self.n_samples = tensor.shape[0] break def __add__(self, other): # Instances of the Metrics base class shouldn't be added together, as # the subclasses should implement the method. We just return the other. return other def __radd__(self, other): # Instances of the Metrics base class shouldn't be added together, as # the subclasses should implement the method. We just return the other. if isinstance(other, (int, float)) and other == 0.0: return self if isinstance(other, Metrics) and type(self) is Metrics: assert self.n_samples == 0 return other return NotImplemented def __mul__(self, factor: Union[float, Tensor]) -> "Loss": # By default, multiplying or dividing a Metrics object doesn't change # anything about it. return self def __rmul__(self, factor: Union[float, Tensor]) -> "Loss": # Reverse-order multiply, used to do b * a when a * b returns # NotImplemented. return self.__mul__(factor) def __truediv__(self, coefficient: Union[float, Tensor]) -> "Metrics": # By default, multiplying or dividing a Metrics object doesn't change # anything about it. return self def to_log_dict(self, verbose: bool = False) -> Dict: """Creates a dictionary to be logged (e.g. by `wandb.log`). Args: verbose (bool, optional): Wether to include a lot of information, or to only log the 'essential' metrics. See the `cleanup` function for more info. Defaults to False. Returns: Dict: A dict containing the things to be logged. TODO: Maybe create a `make_plots()` method to get wandb plots from the metric? """ log_dict = {} for field in fields(self): if not (field.repr or verbose): continue # skip field. value = getattr(self, field.name) if isinstance(value, Metrics): log_dict[field.name] = value.to_log_dict(verbose=verbose) else: log_dict[field.name] = value return log_dict return {f.name: getattr(self, f.name) for f in fields(self) if f.repr or verbose} if verbose: return {"n_samples": self.n_samples} return {} def to_pbar_message(self) -> Dict[str, Union[str, float]]: return {} def numpy(self): """Returns a new object with all the tensor fields converted to numpy arrays.""" def to_numpy(val: Any): if isinstance(val, Tensor): return val.detach().cpu().numpy() if isinstance(val, (list, tuple)): return np.array(val) return val return type(self)(**{name: to_numpy(val) for name, val in self.items()}) @property def objective(self) -> float: """Returns the 'main' metric from this object, as a float. Returns ------- float The most important metric from this object, as a float. """ return 0 # raise NotImplementedError(f"TODO: Add the 'objective' property to class {type(self)}") @property def objective_name(self) -> str: """Returns the name to be associated with the objective of this class. Returns ------- float The name associated with the objective. """ raise NotImplementedError(f"TODO: Add the 'objective_name' property to class {type(self)}") ================================================ FILE: sequoia/common/metrics/metrics_utils.py ================================================ """ Utility functions for calculating metrics. """ from typing import Union import numpy as np import torch from torch import Tensor @torch.no_grad() def get_confusion_matrix( y_pred: Union[np.ndarray, Tensor], y: Union[np.ndarray, Tensor], num_classes: int = None ) -> Union[Tensor, np.ndarray]: """Taken from https://discuss.pytorch.org/t/how-to-find-individual-class-accuracy/6348 NOTE: `y_pred` is assumed to be the logits with shape [B, C], while the labels `y` is assumed to have shape either `[B]` or `[B, 1]`, unless `num_classes` is given, in which case y_pred can be the predicted labels. """ if isinstance(y_pred, Tensor): y_pred = y_pred.detach().cpu().numpy() if isinstance(y, Tensor): y = y.detach().cpu().numpy() # FIXME: How do we properly check if something is an integer type in np? if len(y_pred.shape) == 1 and y_pred.dtype not in {np.float32, np.float64}: # y_pred is already the predicted labels. y_preds = y_pred if num_classes is None: raise NotImplementedError( f"Can't determine the number of classes. Pass logits rather than predicted labels." ) n_classes = num_classes elif y_pred.shape[-1] == 1: n_classes = 2 # y_pred is the logit for binary classification. y_preds = y_pred.round() else: # y_pred is assumed to be the logits. n_classes = y_pred.shape[-1] y_preds = y_pred.argmax(-1) y = y.flatten().astype(int) y_preds = y_preds.flatten().astype(int) # BUG: This is failing on the last batch. assert y.shape == y_preds.shape, (y.shape, y_preds.shape) # assert y.dtype == y_preds.dtype == np.int, (y.dtype, y_preds.dtype) confusion_matrix = np.zeros([n_classes, n_classes]) assert 0 <= y.min() and y.max() < n_classes, (y, n_classes) assert 0 <= y_preds.min() and y_preds.max() < n_classes, (y_preds, n_classes) for y_t, y_p in zip(y, y_preds): confusion_matrix[y_t, y_p] += 1 return confusion_matrix @torch.no_grad() def accuracy(y_pred: Union[Tensor, np.ndarray], y: Union[Tensor, np.ndarray]) -> float: confusion_mat = get_confusion_matrix(y_pred=y_pred, y=y) batch_size = y_pred.shape[0] _, predicted = y_pred.max(-1) acc = (predicted == y).sum(dtype=float) / batch_size return acc.item() @torch.no_grad() def get_accuracy(confusion_matrix: Union[Tensor, np.ndarray]) -> float: if isinstance(confusion_matrix, Tensor): diagonal = confusion_matrix.diag() else: diagonal = np.diag(confusion_matrix) return (diagonal.sum() / confusion_matrix.sum()).item() @torch.no_grad() def class_accuracy(y_pred: Tensor, y: Tensor) -> Tensor: confusion_mat = get_confusion_matrix(y_pred=y_pred, y=y) return get_class_accuracy(confusion_mat) @torch.no_grad() def get_class_accuracy(confusion_matrix: Tensor) -> Tensor: if isinstance(confusion_matrix, Tensor): diagonal = confusion_matrix.diag() else: diagonal = np.diag(confusion_matrix) sum_of_columns = confusion_matrix.sum(1) if isinstance(confusion_matrix, Tensor): sum_of_columns.clamp_(min=1e-10) else: sum_of_columns = sum_of_columns.clip(min=1e-10) return diagonal / sum_of_columns ================================================ FILE: sequoia/common/metrics/metrics_utils_test.py ================================================ import numpy as np import torch from .metrics_utils import accuracy, class_accuracy, get_confusion_matrix def test_accuracy(): y_pred = torch.as_tensor( [ [0.01, 0.90, 0.09], [0.01, 0, 0.99], [0.01, 0, 0.99], ] ) y = torch.as_tensor( [ 1, 2, 0, ] ) assert np.isclose(accuracy(y_pred, y), 2 / 3) def test_per_class_accuracy_perfect(): y_pred = torch.as_tensor( [ [0.1, 0.9, 0.0], [0.1, 0.0, 0.9], [0.1, 0.4, 0.5], [0.9, 0.1, 0.0], ] ) y = torch.as_tensor( [ 1, 2, 2, 0, ] ) expected = [1, 1, 1] class_acc = class_accuracy(y_pred, y).tolist() assert class_acc == expected def test_per_class_accuracy_zero(): y_pred = torch.as_tensor( [ [0.1, 0.9, 0.0], [0.1, 0.9, 0.0], [0.1, 0.9, 0.0], [0.1, 0.9, 0.0], ] ) y = torch.as_tensor( [ 0, 0, 0, 0, ] ) expected = [0, 0, 0] class_acc = class_accuracy(y_pred, y).tolist() assert class_acc == expected def test_confusion_matrix(): y_pred = torch.as_tensor( [ [0.1, 0.9, 0.0], [0.1, 0.4, 0.5], [0.1, 0.9, 0.0], [0.9, 0.0, 0.1], ] ) y = torch.as_tensor( [ 0, 0, 1, 0, ] ) expected = [ [1, 1, 1], [0, 1, 0], [0, 0, 0], ] confusion_mat = get_confusion_matrix(y_pred=y_pred, y=y).tolist() assert confusion_mat == expected def test_per_class_accuracy_realistic(): y_pred = torch.as_tensor( [ [0.9, 0.0, 0.0], # correct for class 0 [0.1, 0.5, 0.4], # correct for class 1 [0.1, 0.0, 0.9], # correct for class 2 [0.1, 0.8, 0.1], # wrong, should be 1 [0.1, 0.0, 0.9], # wrong, should be 0 [0.9, 0.0, 0.0], # wrong, should be 1 [0.1, 0.5, 0.4], # wrong, should be 2 [0.1, 0.4, 0.5], # correct for class 2 ] ) y = torch.as_tensor( [ 0, 1, 2, 0, 0, 1, 2, 2, ] ) expected = [1 / 3, 1 / 2, 2 / 3] class_acc = class_accuracy(y_pred, y).tolist() assert all(np.isclose(class_acc, expected)) ================================================ FILE: sequoia/common/metrics/regression.py ================================================ """ Metrics class for regression. Gives the mean squared error between a prediction Tensor `y_pred` and the target tensor `y`. """ from dataclasses import InitVar, dataclass from functools import total_ordering from typing import Any, Dict, Optional, Union import torch import torch.nn.functional as functional from torch import Tensor from sequoia.utils.logging_utils import get_logger from .metrics import Metrics logger = get_logger(__name__) @total_ordering @dataclass class RegressionMetrics(Metrics): """TODO: Use this in the RL settings!""" mse: Tensor = 0.0 # type: ignore l1_error: Tensor = 0.0 # type: ignore x: InitVar[Optional[Tensor]] = None h_x: InitVar[Optional[Tensor]] = None y_pred: InitVar[Optional[Tensor]] = None y: InitVar[Optional[Tensor]] = None def __post_init__( self, x: Tensor = None, h_x: Tensor = None, y_pred: Tensor = None, y: Tensor = None ): super().__post_init__(x=x, h_x=h_x, y_pred=y_pred, y=y) if y_pred is not None and y is not None: if y.shape != y_pred.shape: logger.warning( UserWarning( f"Shapes aren't the same! (y_pred.shape={y_pred.shape}, " f"y.shape={y.shape}" ) ) else: self.mse = functional.mse_loss(y_pred, y) self.l1_error = functional.l1_loss(y_pred, y) self.mse = torch.as_tensor(self.mse) self.l1_error = torch.as_tensor(self.l1_error) @property def objective(self) -> float: return float(self.mse) def __add__(self, other: "RegressionMetrics") -> "RegressionMetrics": # NOTE: Creates new tensors, and links them to the previous ones by # addition so the grads are linked. if self.mse is not None: mse = self.mse.clone() if other.mse is not None: mse = other.mse.clone() else: mse = torch.zeros(1) if self.l1_error is not None: l1_error = self.l1_error.clone() if other.l1_error is not None: l1_error = other.l1_error.clone() else: l1_error = torch.zeros(1) return RegressionMetrics( n_samples=self.n_samples + other.n_samples, mse=mse, l1_error=l1_error, ) def to_pbar_message(self) -> Dict[str, Union[str, float]]: message = super().to_pbar_message() message["mse"] = float(self.mse.item()) message["l1_error"] = float(self.l1_error.item()) return message def to_log_dict(self, verbose=False): log_dict = super().to_log_dict(verbose=verbose) log_dict["mse"] = self.mse log_dict["l1_error"] = self.l1_error return log_dict def __mul__(self, factor: Union[float, Tensor]) -> "Loss": # Multiplying a 'RegressionMetrics' object multiplies its 'mse'. return RegressionMetrics( n_samples=self.n_samples, mse=self.mse * factor, l1_error=self.l1_error * factor, ) def __rmul__(self, factor: Union[float, Tensor]) -> "Loss": # Reverse-order multiply, used to do b * a when a * b returns # NotImplemented. return self.__mul__(factor) def __truediv__(self, coefficient: Union[float, Tensor]) -> "RegressionMetrics": # Dividing a RegressionMetrics object divides its mean squared error. return RegressionMetrics( n_samples=self.n_samples, mse=self.mse / coefficient, l1_error=self.l1_error / coefficient, ) def __lt__(self, other: Union["RegressionMetrics", Any]) -> bool: if isinstance(other, RegressionMetrics): return self.mse < other.mse return NotImplemented def __ge__(self, other: Union["RegressionMetrics", Any]) -> bool: if isinstance(other, RegressionMetrics): return self.mse >= other.mse return NotImplemented ================================================ FILE: sequoia/common/metrics/rl_metrics.py ================================================ from dataclasses import dataclass, field from typing import Any, Dict, Union from .metrics import Metrics @dataclass class EpisodeMetrics(Metrics): """Metrics for Episodes in RL. n_samples is the number of stored episodes. """ n_samples: int = field(default=1, compare=False) # The average reward per episode. mean_episode_reward: float = 0.0 # The average length of each episode. mean_episode_length: float = 0 @property def n_episodes(self) -> int: return self.n_samples @property def objective_name(self) -> str: """Returns the name to be associated with the objective of this class. Returns ------- str The name associated with the objective. """ return "Mean Reward per Episode" @property def mean_reward_per_step(self) -> float: return self.mean_episode_reward / self.mean_episode_length def __add__(self, other: Union["EpisodeMetrics", Any]): if isinstance(other, (int, float)) and other == 0: # This makes `sum(list_of_metrics)` work!. return self if isinstance(other, Metrics) and other == Metrics(): return self if not isinstance(other, EpisodeMetrics): return NotImplemented other: EpisodeMetrics other_total_reward = other.mean_episode_reward * other.n_samples other_total_length = other.mean_episode_length * other.n_samples self_total_reward = self.mean_episode_reward * self.n_samples self_total_length = self.mean_episode_length * self.n_samples new_n_samples = self.n_samples + other.n_samples new_mean_reward = (self_total_reward + other_total_reward) / new_n_samples new_mean_length = (self_total_length + other_total_length) / new_n_samples return EpisodeMetrics( n_samples=new_n_samples, mean_episode_reward=new_mean_reward, mean_episode_length=new_mean_length, ) @property def total_reward(self) -> float: return self.n_episodes * self.mean_episode_reward @property def total_steps(self) -> int: return round(self.n_episodes * self.mean_episode_length) def to_pbar_message(self) -> Dict[str, Union[str, float]]: return self.to_log_dict() @property def objective(self) -> float: return self.mean_episode_reward def to_log_dict(self, verbose: bool = False): log_dict = { "Episodes": self.n_episodes, "Mean reward per episode": self.mean_episode_reward, "Mean reward per step": self.mean_reward_per_step, } if verbose: log_dict.update( { "Total steps": int(self.total_steps), "Total reward": int(self.total_reward), "Mean episode length": float(self.mean_episode_length), } ) return log_dict @property def episodes(self) -> int: return self.n_samples @property def mean_reward_per_episode(self) -> float: return self.mean_episode_reward # @dataclass # class RLMetrics(Metrics): # episodes: List[EpisodeMetrics] = field(default_factory=list, repr=False) # average_episode_length: int = field(default=0) # average_episode_reward: float = field(default=0.) # def __post_init__(self): # if self.episodes: # self.n_samples = len(self.episodes) # self.average_episode_length = sum(ep.episode_length for ep in self.episodes) / self.n_samples # self.average_episode_reward = sum(ep.total_reward for ep in self.episodes) / self.n_samples # def __add__(self, other: Union["RLMetrics", EpisodeMetrics, Any]) -> "RLMetrics": # if isinstance(other, RLMetrics): # return RLMetrics( # episodes = self.episodes + other.episodes, # ) # if isinstance(other, EpisodeMetrics): # self.episodes.append(other) # return self # return NotImplemented # def to_pbar_message(self) -> Dict[str, Union[str, float]]: # log_dict = self.to_log_dict() # # Rename "n_samples" to "episodes": # log_dict["episodes"] = log_dict.pop("n_samples") # return log_dict @dataclass class GradientUsageMetric(Metrics): """Small Metrics to report the fraction of gradients that were used vs 'wasted', when using batch_size > 1. """ used_gradients: int = 0 wasted_gradients: int = 0 used_gradients_fraction: float = 0.0 def __post_init__(self): self.n_samples = self.used_gradients + self.wasted_gradients if self.n_samples: self.used_gradients_fraction = self.used_gradients / self.n_samples def __add__(self, other: Union["GradientUsageMetric", Any]) -> "GradientUsageMetric": if not isinstance(other, GradientUsageMetric): return NotImplemented return GradientUsageMetric( used_gradients=self.used_gradients + other.used_gradients, wasted_gradients=self.wasted_gradients + other.wasted_gradients, ) def to_pbar_message(self) -> Dict[str, Union[str, float]]: return {"used_fraction": self.used_gradients_fraction} ================================================ FILE: sequoia/common/replay.py ================================================ """ Labeled, Unlabeled and Semi-supervised Replay buffer objects. TODO: Unused for now, but could be used in a LightningModule. """ import random from collections import Counter, deque from dataclasses import dataclass from typing import * import torch from simple_parsing import field from torch import Tensor from torch.utils.data import TensorDataset from sequoia.utils.logging_utils import get_logger from sequoia.utils.serialization import Pickleable, Serializable logger = get_logger(__name__) T = TypeVar("T") class ReplayBuffer(deque, Deque[T], Pickleable): """Simple implementation of a replay buffer. Uses a doubly-ended Queue, which unfortunately isn't registered as a buffer for pytorch. """ def __init__(self, capacity: int): super().__init__(maxlen=capacity) # self.extend("ABC") self.capacity: int = capacity # TODO: figure out how to persist the buffer with state_dict maybe? # self.register_buffer("memory", torch.zeros(1)) self.labeled: Optional[bool] = None self.current_size: int = 0 def as_dataset(self) -> TensorDataset: contents = zip(*self) return TensorDataset(*map(torch.stack, contents)) def _push_and_sample(self, *values: T, size: int) -> List[T]: """Pushes `values` into the buffer and samples `size` samples from it. NOTE: In contrast to `push`, allows sampling more than `len(self)` samples from the buffer (up to `len(self) + len(values)`) Args: *values (T): An iterable of items to push. size (int): Number of samples to take. """ extended = list(self) extended.extend(values) # NOTE: Type hints indicate that random.shuffle expects a list, not # a deque. Seems to work just fine though. random.shuffle(extended) # type: ignore assert size <= len( extended ), f"Asked to sample {size} values, while there are only {len(extended)} in the batch + buffer!" self.extend(extended) return extended[:size] def _sample(self, size: int) -> List[T]: assert size <= len( self ), f"Asked to sample {size} values while there are only {len(self)} in the buffer!" return random.sample(self, size) @property def full(self) -> bool: return len(self) == self.capacity class UnlabeledReplayBuffer(ReplayBuffer[Tensor]): def sample_batch(self, size: int) -> Tensor: batch = super()._sample(size) return torch.stack(batch) def push(self, x_batch: Tensor, y_batch: Tensor = None) -> None: super().extend(x_batch) def push_and_sample(self, x_batch: Tensor, y_batch: Tensor = None, size: int = None) -> Tensor: size = x_batch.shape[0] if size is None else size return torch.stack(super()._push_and_sample(x_batch, size=size)) class LabeledReplayBuffer(ReplayBuffer[Tuple[Tensor, Tensor]]): def sample(self, size: int) -> Tuple[Tensor, Tensor]: list_of_pairs = super()._sample(size) data_list, target_list = zip(*list_of_pairs) return torch.stack(data_list), torch.stack(target_list) def push(self, x_batch: Tensor, y_batch: Tensor) -> None: super().extend(zip(x_batch, y_batch)) def push_and_sample( self, x_batch: Tensor, y_batch: Tensor, size: int = None ) -> Tuple[Tensor, Tensor]: size = x_batch.shape[0] if size is None else size list_of_pairs = super()._push_and_sample(*zip(x_batch, y_batch), size=size) data_list, target_list = zip(*list_of_pairs) return torch.stack(data_list), torch.stack(target_list) def samples_per_class(self) -> Dict[int, int]: """Returns a Counter showing how many samples there are per class.""" # TODO: Idea, could use the None key for unlabeled replay buffer. return Counter(int(y) for x, y in self) class SemiSupervisedReplayBuffer(object): def __init__(self, labeled_capacity: int, unlabeled_capacity: int = 0): """Semi-Supervised (ish) version of a replay buffer. With the default parameters, acts just like a regular replay buffer. When passed `unlabeled_capacity`, allows for storing unlabeled samples as well as labeled samples. Unlabeled samples are stored in a different buffer than labeled samples. Allows sampling both labeled and unlabeled samples. Args: labeled_capacity (int): [description] unlabeled_capacity (int, optional): [description]. Defaults to 0. """ super().__init__() self.labeled_capacity = labeled_capacity self.unlabeled_capacity = unlabeled_capacity self.labeled = LabeledReplayBuffer(labeled_capacity) self.unlabeled = UnlabeledReplayBuffer(unlabeled_capacity) def sample(self, size: int) -> Tuple[Tensor, Tensor]: """Takes `size` (labeled) samples from the buffer. Args: size (int): Number of samples to return. Returns: Tuple[Tensor, Tensor]: batched data and label tensors. """ assert size <= len(self.labeled), ( f"Asked to sample {size} values while there are only " f"{len(self.labeled)} labeled samples in the buffer! " ) return self.labeled.sample(size) def sample_unlabeled(self, size: int, take_from_labeled_buffer_first: bool = None) -> Tensor: """Samples `size` unlabeled samples. Can also use samples from the labeled replay buffer (while discarding the labels) if there is no unlabeled replay buffer. Args: size (int): Number of x's to sample take_from_labeled_buffer_first (bool, optional): When `None` (default), doesn't take any samples from the labeled buffer. When `True`, prioritizes taking samples from the labeled replay buffer. When `False`, prioritizes taking samples from the unlabeled replay buffer, but take the remaining samples from the labeled buffer. Returns: Tensor: A batch of X's. """ total = len(self.unlabeled) if take_from_labeled_buffer_first is not None: total += len(self.labeled) assert size <= total, ( f"Asked to sample {size} values while there are only " f"{total} unlabeled samples in total in the buffer! " ) # Number of x's we still have to sample. samples_left = size tensors: List[Tensor] = [] if take_from_labeled_buffer_first: # Take labeled samples and drop the label. n_samples_from_labeled = min(len(self.labeled), samples_left) if n_samples_from_labeled > 0: data, _ = self.labeled.sample(size) samples_left -= data.shape[0] tensors.append(data) # Take the rest of the samples from the unlabeled buffer. n_samples_from_labeled = min(len(self.labeled), samples_left) data = self.unlabeled.sample_batch(samples_left) tensors.append(data) samples_left -= data.shape[0] if take_from_labeled_buffer_first is False: # Take the rest of the labeled samples and drop the label. n_samples_from_labeled = min(len(self.labeled), samples_left) if n_samples_from_labeled > 0: data, _ = self.labeled.sample(size) samples_left -= data.shape[0] tensors.append(data) data = torch.cat(tensors) return data def push_and_sample(self, x: Tensor, y: Tensor, size: int = None) -> Tuple[Tensor, Tensor]: size = x.shape[0] if size is None else size self.unlabeled.push(x) return self.labeled.push_and_sample(x, y, size=size) def push_and_sample_unlabeled(self, x: Tensor, y: Tensor = None, size: int = None) -> Tensor: size = x.shape[0] if size is None else size if y is not None: self.labeled.push(x, y) return self.unlabeled.push_and_sample(x, size=size) def clear(self): self.labeled.clear() self.unlabeled.clear() @dataclass class ReplayOptions(Serializable): """Options related to Replay.""" # Size of the labeled replay buffer. labeled_buffer_size: int = field(0, alias="replay_buffer_size") # Size of the unlabeled replay buffer. unlabeled_buffer_size: int = 0 # Always use the replay buffer to help "smooth" out the data stream. always_use_replay: bool = False # Sampling size, when used as described above to smooth out the data stream. # If not given, will use the same value as the batch size. sampled_batch_size: Optional[int] = None @property def enabled(self) -> bool: return self.labeled_buffer_size > 0 or self.unlabeled_buffer_size > 0 ================================================ FILE: sequoia/common/spaces/__init__.py ================================================ """ Custom `gym.spaces.Space` subclasses used by Sequoia. """ from .image import Image, ImageTensorSpace from .named_tuple import NamedTuple, NamedTupleSpace from .space import Space from .sparse import Sparse from .tensor_spaces import TensorBox, TensorDiscrete, TensorMultiDiscrete, TensorSpace from .typed_dict import TypedDictSpace ================================================ FILE: sequoia/common/spaces/image.py ================================================ """ IDEA: Create a subclass of spaces.Box for images. """ from typing import Optional, Tuple, Union import numpy as np import torch from gym import spaces from gym.vector.utils import batch_space from .space import Space, T from .tensor_spaces import TensorBox def could_become_image(space: spaces.Space) -> bool: if not isinstance(space, spaces.Box): return False shape = space.shape return len(shape) == 3 and ( shape[0] == shape[1] and shape[2] in {1, 3} or shape[1] == shape[2] and shape[0] in {1, 3} ) class Image(spaces.Box, Space[T]): """Subclass of `gym.spaces.Box` for images. Comes with a few useful attributes, like `h`, `w`, `c`, `channels_first`, `channels_last`, etc. """ def __init__( self, low: Union[float, np.ndarray], high: Union[float, np.ndarray], shape: Tuple[int, ...] = None, dtype: np.dtype = None, **kwargs, ): if dtype is None: if isinstance(low, int) and isinstance(high, int) and low == 0 and high == 255: dtype = np.uint8 else: dtype = np.float32 super().__init__(low=low, high=high, shape=shape, dtype=dtype, **kwargs) self.channels_first: bool = False # Optional batch dimension self.b: Optional[int] = None self.h: int self.w: int self.c: int assert len(self.shape) in {3, 4}, "Need three or four dimensions." if len(self.shape) == 3: self.b = None if self.shape[0] in {1, 3}: self.c, self.h, self.w = self.shape self.channels_first = True elif self.shape[-1] in {1, 3}: self.h, self.w, self.c = self.shape else: # NOTE: will assume that in channels_first for now, but won't set # `channels_first` property. self.c, self.h, self.w = self.shape elif len(self.shape) == 4: if self.shape[1] in {1, 3}: self.b, self.c, self.h, self.w = self.shape self.channels_first = True elif self.shape[-1] in {1, 3}: self.b, self.h, self.w, self.c = self.shape else: # NOTE: will assume that in channels_first for now: self.b, self.c, self.h, self.w = self.shape if any(v is None for v in [self.h, self.w, self.c]): raise RuntimeError( f"Shouldn't be using an Image space, since the shape " f"doesn't appear to be an image: {self.shape}" ) @property def channels(self) -> int: return self.c @property def height(self) -> int: return self.h @property def width(self) -> int: return self.w @property def batch_size(self) -> Optional[int]: return self.b @classmethod def from_box(cls, box_space: spaces.Box): return cls(box_space.low, box_space.high, dtype=box_space.dtype) @classmethod def wrap(cls, space: Union["Image", spaces.Box]): if isinstance(space, Image): return space if isinstance(space, spaces.Box): return cls.from_box(space) raise NotImplementedError(space) @property def channels_last(self) -> bool: return not self.channels_first def __repr__(self): return f"{type(self).__name__}({self.low.min()}, {self.high.max()}, {self.shape}, {self.dtype})" def sample(self) -> T: return super().sample() class ImageTensorSpace(Image, TensorBox): @classmethod def from_box(cls, box_space: TensorBox, device: torch.device = None): device = device or box_space.device return cls(box_space.low, box_space.high, dtype=box_space.dtype, device=device) def __repr__(self): return f"{type(self).__name__}({self.low.min()}, {self.high.max()}, {self.shape}, {self.dtype}, device={self.device})" def sample(self): self.dtype = self._numpy_dtype s = super().sample() self.dtype = self._torch_dtype return torch.as_tensor(s, dtype=self._torch_dtype, device=self.device) # @to_tensor.register # def _(space: Image, # sample: Union[np.ndarray, Tensor], # device: torch.device = None) -> Union[Tensor]: # """ Converts a sample from the given space into a Tensor. """ # return torch.as_tensor(sample, device=device) @batch_space.register def _batch_image_space(space: Image, n: int = 1) -> Union[Image, spaces.Box]: if space.b is not None: # This might happen in BatchedVectorEnv, when creating env_a and env_b, # which have an extra batch/chunk dimension. if space.b == 1: if n == 1: return space repeats = [n, 1, 1, 1] else: # instead maybe we should just fall back to a Box Space? repeats = [n] + [1] * space.low.ndim low, high = np.tile(space.low, repeats), np.tile(space.high, repeats) return spaces.Box(low=low, high=high, dtype=space.dtype) raise RuntimeError(f"can't batch an already batched image space {space}, n={n}") else: repeats = [n, 1, 1, 1] low, high = np.tile(space.low, repeats), np.tile(space.high, repeats) img = type(space)(low=low, high=high, dtype=space.dtype) return img ================================================ FILE: sequoia/common/spaces/named_tuple.py ================================================ """ IDEA: Subclass of `gym.spaces.Tuple` that yields namedtuples, as a bit of a hybrid between `gym.spaces.Dict` and `gym.spaces.Tuple`. """ from collections import namedtuple from collections.abc import Mapping as MappingABC from typing import Any, Dict, Iterable, List, Mapping, Sequence, Tuple, Type, Union import numpy as np from gym import Space, spaces from sequoia.utils.generic_functions._namedtuple import NamedTuple class NamedTupleSpace(spaces.Tuple): """ A tuple (i.e., product) of simpler (named) spaces. Samples are namedtuples. Example usage: ```python self.observation_space = NamedTupleSpace(x=spaces.Discrete(2), t=spaces.Discrete(3)) ``` Note: here the dtype is actually the type of namedtuple to use, not a numpy dtype. """ def __init__( self, spaces: Union[Mapping[str, Space], Sequence[Space]] = None, names: Sequence[str] = None, dtype: Type[NamedTuple] = None, **kwargs, ): self._spaces: Dict[str, Space] = {} if isinstance(spaces, MappingABC): assert names is None self._spaces = dict(spaces.items()) elif kwargs: assert all(isinstance(k, str) and isinstance(v, Space) for k, v in kwargs.items()) self._spaces = kwargs else: # if not names: # try: # names = [getattr(space, "__name") for space in spaces] # except AttributeError: # pass assert names is not None, "need to pass names when spaces isn't a mapping." assert spaces and len(names) == len(spaces), "need to pass a name for each space" self._spaces = dict(zip(names, spaces)) # NOTE: dict.values() is ordered since python 3.7. spaces = tuple(self._spaces.values()) super().__init__(spaces) self.names: Sequence[str] = tuple(self._spaces.keys()) self.dtype: Type[Tuple] = dtype or namedtuple("NamedTuple", self.names) # idea: could use this _name attribute to change the __repr__ first part self._name = self.dtype.__name__ assert all(name == key for name, key in zip(self.names, self._spaces.keys())) def __getitem__(self, index: Union[int, str]) -> Space: if isinstance(index, str): return self._spaces[index] return super().__getitem__(index) def __getattr__(self, attr: str) -> Space: if attr == "_spaces": raise AttributeError(attr) if attr in self._spaces: return self._spaces[attr] raise AttributeError(attr) def __repr__(self): # TODO: Tricky: decide what name to show for the space class: cls_name = type(self).__name__ # cls_name = self._name or type(self).__name__ return ( f"{cls_name}(" + ", ".join([str(k) + "=" + str(s) for k, s in self._spaces.items()]) + ")" ) def _replace(self, **kwargs): """replaces the given subspaces with newer ones, maintaining the current ordering. """ spaces = self._spaces.copy() assert all(k in spaces for k in kwargs), "no new keys allowed" spaces.update(kwargs) return type(self)(**spaces) def __eq__(self, other: Union["NamedTupleSpace", Any]) -> bool: return isinstance(other, spaces.Tuple) and tuple(self.spaces) == tuple(other.spaces) def sample(self): return self.dtype(*super().sample()) def contains(self, x) -> bool: if isinstance(x, MappingABC): # TODO: If a namedtuple/dataclass has more items than those required # by this space, should we consider it valid if all its items are # contained in their respective spaces in `self`? x = tuple(x[k] for k in self.names) # x = tuple(x.values()) return super().contains(x) def keys(self) -> List[str]: return self._spaces.keys() def values(self) -> List[Space]: return self._spaces.values() def items(self) -> Iterable[Tuple[str, Space]]: yield from self._spaces.items() # See https://github.com/openai/gym/issues/2140 : Fix __eq__ of gym.spaces.Tuple def __eq__(self, other: Union["NamedTupleSpace", Any]) -> bool: # BUG in openai gym: spaces passed to the spaces.Tuple constructor could # be a list of spaces, rather than a tuple, and so this might return # False when it shouldn't. return isinstance(other, spaces.Tuple) and tuple(self.spaces) == tuple(other.spaces) spaces.Tuple.__eq__ = __eq__ from gym.spaces.utils import flatten from gym.vector.utils import batch_space @batch_space.register(NamedTupleSpace) def batch_namedtuple_space(space: NamedTupleSpace, n: int = 1): return NamedTupleSpace( **{key: batch_space(space[key], n) for key in space.names}, dtype=space.dtype ) @flatten.register def flatten_namedtuple_space_sample(space: NamedTupleSpace, x: NamedTuple): assert not isinstance(x, Batch), f"NamedTupleSpace, shouldn't have Batch samples: {space} {x}" return np.concatenate([flatten(s, x_part) for x_part, s in zip(x, space.spaces)]) ================================================ FILE: sequoia/common/spaces/named_tuple_test.py ================================================ import numpy as np import pytest from gym import spaces from gym.spaces import Box, Discrete from gym.vector.utils import batch_space from .named_tuple import NamedTuple, NamedTupleSpace pytestmark = pytest.mark.skip( reason="Removing the NamedTuple space and NamedTuple class in favour of TypedDict.", ) def test_basic(): named_tuple_space = NamedTupleSpace( current_state=Box(0, 1, (2, 2)), action=Discrete(2), next_state=Box(0, 1, (2, 2)), ) v = named_tuple_space.sample() print(v) assert v in named_tuple_space # TODO: Maybe re-use all the tests for gym.spaces.Tuple in the gym repo # somehow? normal_tuple_space = spaces.Tuple( [ Box(0, 1, (2, 2)), Discrete(2), Box(0, 1, (2, 2)), ] ) assert normal_tuple_space.sample() in named_tuple_space assert named_tuple_space.sample() in normal_tuple_space class StateTransition(NamedTuple): current_state: np.ndarray action: int next_state: np.ndarray def test_basic_with_dtype(): named_tuple_space = NamedTupleSpace( current_state=Box(0, 1, (2, 2)), action=Discrete(2), next_state=Box(0, 1, (2, 2)), dtype=StateTransition, ) v = named_tuple_space.sample() assert v in named_tuple_space assert isinstance(v, StateTransition) normal_tuple_space = spaces.Tuple( [ Box(0, 1, (2, 2)), Discrete(2), Box(0, 1, (2, 2)), ] ) assert normal_tuple_space.sample() in named_tuple_space assert named_tuple_space.sample() in normal_tuple_space @pytest.mark.xfail() def test_isinstance_namedtuple(): named_tuple_space = NamedTupleSpace( current_state=Box(0, 1, (2, 2)), action=Discrete(2), next_state=Box(0, 1, (2, 2)), dtype=StateTransition, ) assert isinstance(named_tuple_space, NamedTupleSpace) assert isinstance(named_tuple_space.sample(), NamedTuple) def test_equals_tuple_space_with_same_items(): """Test that a NamedTupleSpace is considered equal to a Tuple space if the spaces are in the same order and all equal (regardless of the names). """ named_tuple_space = NamedTupleSpace( current_state=Box(0, 1, (2, 2)), action=Discrete(2), next_state=Box(0, 1, (2, 2)), dtype=StateTransition, ) tuple_space = spaces.Tuple( [ Box(0, 1, (2, 2)), Discrete(2), Box(0, 1, (2, 2)), ] ) assert named_tuple_space == tuple_space assert tuple_space == named_tuple_space def test_batch_objets_considered_valid_samples(): from dataclasses import dataclass import numpy as np from sequoia.common.batch import Batch @dataclass(frozen=True) class StateTransitionDataclass(Batch): current_state: np.ndarray action: int next_state: np.ndarray named_tuple_space = NamedTupleSpace( current_state=Box(0, 1, (2, 2)), action=Discrete(2), next_state=Box(0, 1, (2, 2)), dtype=StateTransitionDataclass, ) obs = StateTransitionDataclass( current_state=np.ones([2, 2]) / 2, action=1, next_state=np.zeros([2, 2]), ) assert obs in named_tuple_space assert named_tuple_space.sample() in named_tuple_space assert isinstance(named_tuple_space.sample(), StateTransitionDataclass) def test_batch_space(): named_tuple_space = NamedTupleSpace( current_state=Box(0, 1, (2, 2)), action=Discrete(2), next_state=Box(0, 1, (2, 2)), dtype=StateTransition, ) assert batch_space(named_tuple_space, n=5) == NamedTupleSpace( current_state=Box(0, 1, (5, 2, 2)), action=spaces.MultiDiscrete([2, 2, 2, 2, 2]), next_state=Box(0, 1, (5, 2, 2)), dtype=StateTransition, ) ## IDEA: Creating a space like this, using the same syntax as with NamedTuple # class StateTransitionSpace(NamedTupleSpace): # current_state: Box = Box(0, 1, (2,2)) # action: Discrete = Discrete(2) # current_state: Box = Box(0, 1, (2,2)) # space = StateTransitionSpace() # space.sample() ================================================ FILE: sequoia/common/spaces/space.py ================================================ """ Small typing improvements to the `gym.spaces.Space` class. """ from typing import Any, Generic, TypeVar, Union from gym.spaces import Space as _Space T = TypeVar("T") class Space(_Space, Generic[T]): def sample(self) -> T: return super().sample() def __contains__(self, x: Union[T, Any]) -> bool: return super().__contains__(x) def contains(self, v: Union[T, Any]) -> bool: return super().contains(v) ================================================ FILE: sequoia/common/spaces/sparse.py ================================================ """ 'wrapper' around a gym.Space that adds has a probability of sampling `None` instead of a sample from the 'base' space. As a result, `None` is always a valid sample from any Sparse space. """ import multiprocessing as mp from ctypes import c_bool # from gym.spaces.utils import flatdim, flatten from functools import singledispatch from multiprocessing.context import BaseContext from typing import Any, Dict, Optional, Sequence, Tuple, Union import gym import gym.spaces.utils import gym.vector.utils.numpy_utils import gym.vector.utils.shared_memory import numpy as np import torch from gym import spaces from gym.vector.utils import batch_space, concatenate from gym.vector.utils.numpy_utils import concatenate from torch import Tensor from .space import Space, T class Sparse(Space[Optional[T]]): """Space which returns a value of `None` `sparsity`% of the time when sampled. `None` is also a valid sample of this space in addition to those of the wrapped space. TODO: Maybe refactor this into a mixin class, a bit like `TensorSpace`? If so, then make sure that we don't suddenly need to create SparseTensorBox and the like. """ def __init__(self, base: Space[T], sparsity: float = 0.0): self.base = base assert 0 <= sparsity <= 1, "invalid spasity, needs to be in [0, 1]" self._sparsity = sparsity # Would it ever cause a problem to have different dtypes for different # instances of the same space? # dtype = self.base.dtype if sparsity == 0. else np.object_ super().__init__(shape=self.base.shape, dtype=np.object_) @property def sparsity(self) -> float: return self._sparsity # def __getattr__(self, attr: str): # return getattr(self.base, attr) def seed(self, seed=None): super().seed(seed) return self.base.seed(seed=seed) def sample(self) -> Optional[T]: if self.sparsity == 0: return self.base.sample() if self.sparsity == 1.0: return None p = self.np_random.random() if p <= self.sparsity: return None else: return self.base.sample() def contains(self, x: Union[Optional[T], Any]) -> bool: """ Return boolean specifying if x is a valid member of this space """ return x is None or self.base.contains(x) def __repr__(self): return f"Sparse({self.base}, sparsity={self.sparsity})" def __eq__(self, other: Any): if not isinstance(other, Sparse): return NotImplemented return other.base == self.base and other.sparsity == self.sparsity def to_jsonable(self, sample_n): assert False, "TODO: This isn't really ever used anywhere, even in Gym, is it?" super().to_jsonable # serialize as dict-repr of vectors return { key: space.to_jsonable([sample[key] for sample in sample_n]) for key, space in self.spaces.items() } def from_jsonable(self, sample_n): assert False, "TODO: This isn't really ever used anywhere, even in Gym, is it?" dict_of_list = {} for key, space in self.spaces.items(): dict_of_list[key] = space.from_jsonable(sample_n[key]) ret = [] for i, _ in enumerate(dict_of_list[key]): entry = {} for key, value in dict_of_list.items(): entry[key] = value[i] ret.append(entry) return ret # Customize how these functions handle `Sparse` spaces by making them # singledispatch callables and registering a new callable. def _is_singledispatch(module_function): return hasattr(module_function, "registry") def register_sparse_variant(module, module_fn_name: str): """Converts a function from the given module to a singledispatch callable, and registers the wrapped function as the callable to use for Sparse spaces. The module function must have the space as the first argument for this to work. """ module_function = getattr(module, module_fn_name) # Convert the function to a singledispatch callable. if not _is_singledispatch(module_function): module_function = singledispatch(module_function) setattr(module, module_fn_name, module_function) # Register the function as the callable to use when the first arg is a # Sparse object. def wrapper(function): module_function.register(Sparse, function) return function return wrapper @register_sparse_variant(gym.spaces.utils, "flatdim") def flatdim_sparse(space: Sparse) -> int: return gym.spaces.utils.flatdim(space.base) @register_sparse_variant(gym.spaces.utils, "flatten") def flatten_sparse(space: Sparse[T], x: Optional[T]) -> Optional[np.ndarray]: return np.array([None]) if x is None else gym.spaces.utils.flatten(space.base, x) @register_sparse_variant(gym.spaces.utils, "flatten_space") def flatten_sparse_space(space: Sparse[T]) -> Optional[np.ndarray]: space = gym.spaces.utils.flatten_space(space.base) space.dtype = np.object_ return space @register_sparse_variant(gym.spaces.utils, "unflatten") def unflatten_sparse(space: Sparse[T], x: np.ndarray) -> Optional[T]: if len(x) == 1 and x[0] is None: return None else: return gym.spaces.utils.unflatten(space.base, x) @register_sparse_variant(gym.vector.utils, "create_empty_array") def create_empty_array_sparse(space: Sparse, n=1, fn=np.zeros) -> np.ndarray: return fn([n], dtype=np.object_) @register_sparse_variant(gym.vector.utils.shared_memory, "create_shared_memory") def create_shared_memory_for_sparse_space(space: Sparse, n: int = 1, ctx: BaseContext = mp): # The shared memory should be something that can accomodate either 'None' # or a sample from the space. Therefore we should probably just create the # array for the base space, but then how would store a 'None' value in that # space? # What if we return a tuple or something, in which we actually add an 'is-none' print(f"Creating shared memory for {n} entries from space {space}") return { "is_none": ctx.Array(c_bool, np.zeros(n, dtype=np.bool)), "value": gym.vector.utils.shared_memory.create_shared_memory(space.base, n, ctx), } @register_sparse_variant(gym.vector.utils.shared_memory, "write_to_shared_memory") def write_to_shared_memory( index: int, value: Optional[T], shared_memory: Union[Dict, Tuple, BaseContext.Array], space: Union[Sparse[T], gym.Space], ): print(f"Writing entry from space {space} at index {index} in shared memory") if isinstance(space, Sparse): assert isinstance(shared_memory, dict) is_none_array = shared_memory["is_none"] value_array = shared_memory["value"] raise NotImplementedError(f"Still debugging this") # assert False, index # assert False, is_none_array is_none_array[index] = value is None if value is not None: return write_to_shared_memory(index, value, value_array, space.base) else: # TODO: Would this cause a problem, say in the case where we have a # regular space like Tuple that contains some Sparse spaces, then would # calling this "old" function here prevent this "new" function from # being used on the children? return gym.vector.utils.shared_memory(index, value, shared_memory, space) from gym.vector.utils.shared_memory import read_from_shared_memory as read_from_shared_memory_ @register_sparse_variant(gym.vector.utils.shared_memory, "read_from_shared_memory") def read_from_shared_memory( shared_memory: Union[Dict, Tuple, BaseContext.Array], space: Sparse, n: int = 1 ): print(f"Reading {n} entries from space {space} from shared memory") if isinstance(space, Sparse): assert isinstance(shared_memory, dict) is_none_array = list(shared_memory["is_none"]) value_array = shared_memory["value"] assert len(is_none_array) == len(value_array) == n # This might include some garbage (or default) values, which weren't # set. read_values = read_from_shared_memory(value_array, space.base, n) print(f"Read values from space: {read_values}") print(f"is_none array: {list(is_none_array)}") # assert False, (list(is_none_array), read_values, space) values = [None if is_none_array[index] else read_values[index] for index in range(n)] print(f"resulting values: {values}") return values return read_from_shared_memory_(shared_memory, space.base, n) return read_from_shared_memory_(shared_memory, space, n) @register_sparse_variant(gym.vector.utils, "batch_space") def batch_sparse_space(space: Sparse, n: int = 1) -> gym.Space: """Batch this sparse space. NOTE: The sparsity of `space` currently has an important impact on the kind of space returned! Taking a base space of type `Discrete` as an example: - If `space.sparsity == 0 or space.sparsity == 1`, then the result is a Sparse[MultiDiscrete], - *However*, if `0 < sparsity < 1`, then the result is a `Tuple[Sparse[Discrete], ...]`. """ # NOTE: This means we do something different depending on the sparsity. # Could that become an issue? # assert _is_singledispatch(batch_space) sparsity = space.sparsity # NOTE: It is tempting to just make this more consistent by always returning the same kind of # result, because it's nice to avoid dealing with arrays like `np.array([None, 1, ])` # or, even worse, `np.array([None, None])` which are not fun. # *HOWEVER*, it's not a good idea! As an example, when using VectorEnvs, the spaces are just to # represent what the observations of the VectorEnv will look like. Since each env has 'its own' # Sparse[Discrete] space, and they are "sampled" independantly, then if 0 < sparsity < 1 we WILL # have some entries be None and other not. Therefore, it's better in that case to just return # the tuple of sparse spaces. # return Sparse(batch_space(space.base, n), sparsity=sparsity) # TODO: Use something like this eventually. There are still problem with to_tensor. # return SparseMultiDiscrete( # np.full((n,), space.n, dtype=space.base.dtype), sparsity=space.sparsity # ) if sparsity in {0, 1}: # If the space has 0 sparsity, then batch it just like you would its # base space. # TODO: This is convenient, but not very consistent, as the length of # the batches changes depending on the sparsity of the space.. return Sparse(batch_space(space.base, n), sparsity=sparsity) # Sticking to the default behaviour from gym for now, which is to just # return a tuple of length n with n copies of the space. return spaces.Tuple(tuple(space for _ in range(n))) # We could also do this, where we make the sub-spaces sparse: # batch_space(Sparse>) -> Tuple), batch_space(Sparse)> if isinstance(space.base, spaces.Tuple): return spaces.Tuple( [ spaces.Tuple([Sparse(sub_space, sparsity) for _ in range(n)]) for sub_space in space.base.spaces ] ) if isinstance(space.base, spaces.Dict): return spaces.Dict( { name: Sparse(batch_space(sub_space, n), sparsity) for name, sub_space in space.base.spaces.items() } ) return batch_space(space.base, n) @register_sparse_variant(gym.vector.utils.numpy_utils, "concatenate") def concatenate_sparse_items( space: Sparse, items: Sequence[Optional[T]], out: Union[tuple, dict, np.ndarray] ) -> Optional[Sequence[T]]: if space.sparsity == 0: if not all(item is not None for item in items): raise ValueError("Space has sparsity of 0, there shouldn't be any `None` items!") # Assume that the items are samples of the individual spaces. # In most cases this means they shouldn't be None, but there's the special case where the # individual spaces are also Sparse, and then it's fine for them to be None. return concatenate(space.base, items=items, out=out) if space.sparsity == 1: if not all(item is None for item in items): raise ValueError("Space has sparsity of 1, all items should be None!") # Assume that the items are samples of the individual spaces. # In most cases this means they shouldn't be None, but there's the special case where the # individual spaces are also Sparse, and then it's fine for them to be None. return None return tuple(items) # NOTE: Avoiding returning this np.array of type `object`, simply because `np.array([None])` is # not fun to have to deal with. # return np.array([None if v == None else v for v in items], dtype=object) return np.array(items) # for i, item in enumerate(items): # out[i] = items # return out from sequoia.utils.generic_functions.to_from_tensor import to_tensor @to_tensor.register(Sparse) def sparse_sample_to_tensor( space: Sparse, sample: Union[Optional[Any], np.ndarray], device: torch.device = None ) -> Optional[Union[Tensor, np.ndarray]]: if space.sparsity == 1.0: if isinstance(space.base, spaces.MultiDiscrete): assert all(v == None for v in sample) return np.array([None if v == None else v for v in sample]) if sample is not None: assert isinstance(sample, np.ndarray) and sample.dtype == np.object assert not sample.shape return None if space.sparsity == 0.0: # Do we need to convert dtypes here though? return to_tensor(space.base, sample, device) # 0 < sparsity < 1 if isinstance(sample, np.ndarray) and sample.dtype == np.object: return np.array([None if v == None else v for v in sample]) assert False, (space, sample) ================================================ FILE: sequoia/common/spaces/sparse_test.py ================================================ from typing import Iterable import gym import numpy as np import pytest from gym import spaces from .sparse import Sparse base_spaces = [ spaces.Discrete(n=10), spaces.Box(0, 1, [3, 32, 32], dtype=np.float32), spaces.Tuple( [ spaces.Discrete(n=10), spaces.Box(0, 1, [3, 32, 32], dtype=np.float32), ] ), spaces.Dict( { "x": spaces.Tuple( [ spaces.Discrete(n=10), spaces.Box(0, 1, [3, 32, 32], dtype=np.float32), ] ), "t": spaces.Discrete(1), } ), ] def equals(value, expected) -> bool: assert type(value) == type(expected) if isinstance(value, (int, float, bool)): return value == expected if isinstance(value, np.ndarray): return value.tolist() == expected.tolist() if isinstance(value, (tuple, list)): assert len(value) == len(expected) return all(equals(a_v, e_v) for a_v, e_v in zip(value, expected)) if isinstance(value, dict): assert len(value) == len(expected) for k in expected.keys(): if k not in value: return False if not equals(value[k], expected[k]): return False return True return value == expected def is_sparse(iterable: Iterable[bool]) -> bool: """Returns wether some (but not all) values in the iterable are None.""" none_values: int = 0 non_none_values: int = 0 for value in iterable: if value is None: none_values += 1 if non_none_values: return True else: non_none_values += 1 if none_values: return True return False # Equivalent, but with a copy: values = list(values) return any(v is None for v in values) and not all(v is None for v in values) @pytest.mark.parametrize("base_space", base_spaces) def test_sample(base_space: gym.Space): space = Sparse(base_space, sparsity=0.0) samples = [space.sample() for i in range(100)] assert all(sample is not None for sample in samples) assert all(sample in base_space for sample in samples) space = Sparse(base_space, sparsity=0.5) samples = [space.sample() for i in range(100)] assert is_sparse(samples) assert all([sample in base_space for sample in samples if sample is not None]) space = Sparse(base_space, sparsity=1.0) samples = [space.sample() for i in range(100)] assert all(sample is None for sample in samples) @pytest.mark.parametrize("sparsity", [0.0, 0.5, 1.0]) @pytest.mark.parametrize("base_space", base_spaces) def test_contains(base_space: gym.Space, sparsity: float): space = Sparse(base_space, sparsity=sparsity) samples = [space.sample() for i in range(100)] assert all(sample in space for sample in samples) from gym.vector.utils import batch_space @pytest.mark.parametrize("base_space", base_spaces) def test_batching_works(base_space: gym.Space, n: int = 3): batched_base_space = batch_space(base_space, n) sparse_space = Sparse(base_space) batched_sparse_space = batch_space(sparse_space, n) base_batch = batched_base_space.sample() sparse_batch = batched_sparse_space.sample() assert len(base_batch) == len(sparse_batch) # @pytest.mark.xfail(reason="TODO: Need to decide how we want the sparsity to " # "affect the batching of Tuple or Dict spaces.") @pytest.mark.parametrize("base_space", base_spaces) @pytest.mark.parametrize("sparsity", [0.0, 0.5, 1.0]) def test_batching_works(base_space: gym.Space, sparsity: float, n: int = 10): batched_base_space = batch_space(base_space, n) sparse_space = Sparse(base_space, sparsity=sparsity) batched_sparse_space = batch_space(sparse_space, n) batched_base_space.seed(123) base_batch = batched_base_space.sample() batched_sparse_space.seed(123) sparse_batch = batched_sparse_space.sample() if sparsity == 0: # When there is no sparsity, the batching is the same as batching the # same space. assert equals(base_batch, sparse_batch) elif sparsity == 1: assert sparse_batch is None # assert len(sparse_batch) == n # assert sparse_batch == tuple([None] * n) else: assert len(sparse_batch) == n assert isinstance(sparse_batch, tuple) for i, value in enumerate(sparse_batch): if value is not None: assert value in base_space # There should be some sparsity. assert any(v is None for v in sparse_batch) and not all( v is None for v in sparse_batch ), sparse_batch from gym.spaces.utils import flatdim, flatten @pytest.mark.xfail( reason="When using the normal gym repo rather than the " "fork, the change doesn't persist through an import." ) def test_change_doesnt_persist_after_import(): """When re-importing the `concatenate` function from `gym.vector.utils`, the changes aren't preserved. """ assert hasattr(gym.vector.utils.numpy_utils.concatenate, "registry") assert hasattr(gym.vector.utils.batch_space, "registry") def test_change_persists_after_full_import(): """When re-importing the `concatenate` function from `gym.vector.utils.numpy_utils`, the changes are preserved. """ assert hasattr(gym.vector.utils.numpy_utils.concatenate, "registry") assert hasattr(gym.vector.utils.batch_space, "registry") @pytest.mark.parametrize("base_space", base_spaces) def test_flatdim(base_space: gym.Space): sparse_space = Sparse(base_space, sparsity=0.0) base_flat_dims = flatdim(base_space) sparse_flat_dims = flatdim(sparse_space) assert base_flat_dims == sparse_flat_dims @pytest.mark.parametrize("base_space", base_spaces) def test_flatdim(base_space: gym.Space): sparse_space = Sparse(base_space, sparsity=0.0) base_flat_dims = flatdim(base_space) sparse_flat_dims = flatdim(sparse_space) assert base_flat_dims == sparse_flat_dims # The flattened dimensions shouldn't depend on the sparsity. sparse_space = Sparse(base_space, sparsity=1.0) sparse_flat_dims = flatdim(sparse_space) assert base_flat_dims == sparse_flat_dims @pytest.mark.parametrize("base_space", base_spaces) def test_seeding_works(base_space: gym.Space): sparse_space = Sparse(base_space, sparsity=0.0) base_space.seed(123) base_sample = base_space.sample() sparse_space.seed(123) sparse_sample = sparse_space.sample() assert equals(base_sample, sparse_sample) @pytest.mark.parametrize("base_space", base_spaces) def test_flatten(base_space: gym.Space): sparse_space = Sparse(base_space, sparsity=0.0) base_space.seed(123) base_sample = base_space.sample() flattened_base_sample = flatten(base_space, base_sample) sparse_space.seed(123) sparse_sample = sparse_space.sample() flattened_sparse_sample = flatten(sparse_space, sparse_sample) assert equals(flattened_base_sample, flattened_sparse_sample) @pytest.mark.parametrize("base_space", base_spaces) def test_equality(base_space: gym.Space): sparse_space = Sparse(base_space, sparsity=0.0) other_space = Sparse(base_space, sparsity=0.0) assert sparse_space == other_space sparse_space = Sparse(base_space, sparsity=0.2) assert sparse_space != other_space sparse_space = Sparse(spaces.Tuple([base_space, base_space]), sparsity=0.0) assert sparse_space != other_space ================================================ FILE: sequoia/common/spaces/tensor_spaces.py ================================================ """ TODO: Maybe create a typed version of 'add_tensor_support' of gym_wrappers.convert_tensors """ from typing import Optional, Union import gym import numpy as np import torch from gym import spaces from torch import Tensor # Dict of NumPy dtype -> torch dtype (when the correspondence exists) numpy_to_torch_dtypes = { bool: torch.bool, np.uint8: torch.uint8, np.int8: torch.int8, np.int16: torch.int16, np.int32: torch.int32, np.int64: torch.int64, np.float16: torch.float16, np.float32: torch.float32, np.float64: torch.float64, np.complex64: torch.complex64, np.complex128: torch.complex128, } # Dict of torch dtype -> NumPy dtype torch_to_numpy_dtypes = {value: key for (key, value) in numpy_to_torch_dtypes.items()} def get_numpy_dtype_equivalent_to(torch_dtype: torch.dtype) -> np.dtype: """TODO: Gets the numpy dtype equivalent to the given torch dtype.""" def dtypes_equal(a: torch.dtype, b: torch.dtype) -> bool: return a == b # simple for now. matching_dtypes = [v for k, v in torch_to_numpy_dtypes.items() if dtypes_equal(k, torch_dtype)] if len(matching_dtypes) == 0: raise RuntimeError(f"Unable to find a numpy dtype equivalent to {torch_dtype}") if len(matching_dtypes) > 1: raise RuntimeError(f"Found more than one match for dtype {torch_dtype}: {matching_dtypes}") return np.dtype(matching_dtypes[0]) def get_torch_dtype_equivalent_to(numpy_dtype: np.dtype) -> torch.dtype: """TODO: Gets the torch dtype equivalent to the given np dtype.""" def dtypes_equal(a: torch.dtype, b: torch.dtype) -> bool: return a == b # simple for now. matching_dtypes = [v for k, v in numpy_to_torch_dtypes.items() if dtypes_equal(k, numpy_dtype)] if len(matching_dtypes) == 0: raise RuntimeError(f"Unable to find a torch dtype equivalent to {numpy_dtype}") if len(matching_dtypes) > 1: raise RuntimeError(f"Found more than one match for dtype {numpy_dtype}: {matching_dtypes}") return matching_dtypes[0] from inspect import isclass from typing import Any def is_numpy_dtype(dtype: Any) -> bool: return isinstance(dtype, np.dtype) or isclass(dtype) and issubclass(dtype, np.generic) def is_torch_dtype(dtype: Any) -> bool: return isinstance(dtype, torch.dtype) from abc import ABC def supports_tensors(space: gym.Space) -> bool: raise NotImplementedError(f"TODO: Create a generic function for this.") return isinstance(space, TensorSpace) class TensorSpace(gym.Space, ABC): """Mixin class that makes a Space's `contains` and `sample` methods accept and produce tensors, respectively. """ def __init__(self, *args, device: torch.device = None, **kwargs): # super().__init__(*args, **kwargs) self.device: Optional[torch.device] = torch.device(device) if device else None # Depending on the value passed to `dtype` dtype = kwargs.get("dtype") if dtype is None: if isinstance(self, (spaces.Discrete, spaces.MultiDiscrete)): # NOTE: They dont actually give a 'dtype' argument for these. self._numpy_dtype = np.dtype(np.int64) self._torch_dtype = torch.int64 else: raise NotImplementedError(f"Space {self} doesn't have a `dtype`?") elif is_numpy_dtype(dtype): self._numpy_dtype = np.dtype(dtype) self._torch_dtype = get_torch_dtype_equivalent_to(dtype) elif is_torch_dtype(dtype): self._numpy_dtype = get_numpy_dtype_equivalent_to(dtype) self._torch_dtype = dtype elif str(dtype) == "float32": self._numpy_dtype = np.dtype(np.float32) self._torch_dtype = torch.float32 else: assert not any(dtype == k for k in numpy_to_torch_dtypes) assert not any(dtype == k for k in torch_to_numpy_dtypes) raise NotImplementedError(f"Unsupported dtype {dtype} (of type {type(dtype)})") if "dtype" in kwargs: kwargs["dtype"] = self._numpy_dtype super().__init__(*args, **kwargs) self.dtype: torch.dtype = self._torch_dtype class TensorBox(TensorSpace, spaces.Box): """Box space that accepts both Tensor and ndarrays.""" def __init__(self, low, high, shape=None, dtype=np.float32, device: torch.device = None): super().__init__(low, high, shape=shape, dtype=dtype, device=device) self.low_tensor = torch.as_tensor(self.low, device=self.device) self.high_tensor = torch.as_tensor(self.high, device=self.device) self.dtype = self._torch_dtype def sample(self): self.dtype = self._numpy_dtype sample = super().sample() self.dtype = self._torch_dtype return torch.as_tensor(sample, dtype=self._torch_dtype, device=self.device) def contains(self, x: Union[list, np.ndarray, Tensor]) -> bool: if isinstance(x, list): x = np.array(x) # Promote list to array for contains check if isinstance(x, Tensor): if not (x.device == self.low_tensor.device == self.high_tensor.device): raise RuntimeError( f"Values aren't on the same device: {x.device}, {self.device}, {self.low_tensor.device}" ) return ( x.shape == self.shape and (x >= self.low_tensor).all() and (x <= self.high_tensor).all() ) return x.shape == self.shape and np.all(x >= self.low) and np.all(x <= self.high) def __repr__(self): return ( f"{type(self).__name__}({self.low.min()}, {self.high.max()}, " f"{self.shape}, {self.dtype}" + (f", device={self.device}" if self.device is not None else "") + ")" ) @classmethod def from_box(cls, box: spaces.Box, device: torch.device = None): return cls( low=box.low.flat[0], high=box.high.flat[0], shape=box.shape, dtype=box.dtype, # NOTE: Gets converted in TensorSpace constructor. device=device, ) class TensorDiscrete(TensorSpace, spaces.Discrete): def contains(self, v: Union[int, Tensor]) -> bool: if isinstance(v, Tensor): v = v.detach().cpu().numpy() return super().contains(v) def sample(self): self.dtype = self._numpy_dtype s = super().sample() self.dtype = self._torch_dtype return torch.as_tensor(s, dtype=self.dtype, device=self.device) class TensorMultiDiscrete(TensorSpace, spaces.MultiDiscrete): def contains(self, v: Tensor) -> bool: try: return super().contains(v) except: v_numpy = v.detach().cpu().numpy() return super().contains(v_numpy) def sample(self): self.dtype = self._numpy_dtype s = super().sample() self.dtype = self._torch_dtype return torch.as_tensor(s, dtype=self.dtype, device=self.device) from gym.vector.utils.spaces import batch_space @batch_space.register(TensorDiscrete) def _batch_discrete_space(space: TensorDiscrete, n: int = 1) -> TensorMultiDiscrete: return TensorMultiDiscrete(torch.full((n,), space.n, dtype=space.dtype)) ================================================ FILE: sequoia/common/spaces/tensor_spaces_test.py ================================================ import numpy as np import pytest from gym import spaces from torch import Tensor from .tensor_spaces import TensorBox, numpy_to_torch_dtypes @pytest.mark.parametrize("np_dtype", [np.uint8, np.float32]) def test_tensor_box(np_dtype: np.dtype): torch_dtype = numpy_to_torch_dtypes[np_dtype] space = spaces.Box(0, 1, (28, 28), dtype=np_dtype) new_space = TensorBox.from_box(space) sample = new_space.sample() assert isinstance(sample, Tensor) assert sample in new_space assert sample.cpu().numpy().astype(np_dtype) in space assert sample.dtype == torch_dtype ================================================ FILE: sequoia/common/spaces/typed_dict.py ================================================ """ Subclass of `spaces.Dict` that allows custom dtypes and uses type annotations. """ import dataclasses from collections import OrderedDict from collections.abc import Mapping as MappingABC from copy import deepcopy from dataclasses import fields, is_dataclass from inspect import isclass from typing import ( Any, ClassVar, Dict, Iterable, List, Mapping, Sequence, Tuple, Type, TypeVar, Union, get_type_hints, ) import gym import numpy as np from gym import Space, spaces from gym.vector.utils import batch_space, concatenate from .sparse import batch_space, concatenate try: from typing import get_origin except ImportError: # Python 3.7's typing module doesn't have this `get_origin` function, so get it from # `typing_inspect`. from typing_inspect import get_origin M = TypeVar("M", bound=Mapping[str, Any]) S = TypeVar("S") Dataclass = TypeVar("Dataclass") class TypedDictSpace(spaces.Dict, Space[M]): """Subclass of `spaces.Dict` that allows custom dtypes and uses type annotations. ## Examples: - Using it just like a regular spaces.Dict: >>> from gym.spaces import Box >>> s = TypedDictSpace(x=Box(0, 1, (4,), dtype=np.float64)) >>> s TypedDictSpace(x:Box(0.0, 1.0, (4,), float64)) >>> _ = s.seed(123) >>> s.sample() {'x': array([0.06132501, 0.48141959, 0.41703335, 0.34899889])} - Using it like a TypedDict: (This equivalent to the above) >>> class VisionSpace(TypedDictSpace): ... x: Box = Box(0, 1, (4,), dtype=np.float64) >>> s = VisionSpace() >>> s VisionSpace(x:Box(0.0, 1.0, (4,), float64)) >>> _ = s.seed(123) >>> s.sample() {'x': array([0.06132501, 0.48141959, 0.41703335, 0.34899889])} - You can also overwrite the values from the type annotations by passing them to the constructor: >>> s = VisionSpace(x=spaces.Box(0, 2, (3,), dtype=np.int64)) >>> s VisionSpace(x:Box(0, 2, (3,), int64)) >>> _ = s.seed(123) >>> s.sample() {'x': array([0, 1, 1])} ### Using custom dtypes Can use any type here, as long as it can receive the samples from each space as keyword arguments. One good example of this is to use a `dataclass` as the custom dtype. You are strongly encouraged to use a dtype that inherits from the `Mapping` class from `collections.abc`, so that samples form your space can be handled similarly to regular dictionaries. >>> from collections import OrderedDict >>> s = TypedDictSpace(x=spaces.Box(0, 1, (4,), dtype=float), dtype=OrderedDict) >>> s TypedDictSpace(x:Box(0.0, 1.0, (4,), float64), dtype=) >>> _ = s.seed(123) >>> s.sample() OrderedDict([('x', array([0.06132501, 0.48141959, 0.41703335, 0.34899889]))]) ### Required items: If an annotation on the class doesn't have a default value, then it is treated as a required argument: >>> class FooSpace(TypedDictSpace): ... a: spaces.Box = spaces.Box(0, 1, (4,), float) ... b: spaces.Discrete >>> s = FooSpace() # doesn't work! Traceback (most recent call last): ... TypeError: Space of type requires a 'b' item! >>> s = FooSpace(b=spaces.Discrete(5)) >>> s FooSpace(a:Box(0.0, 1.0, (4,), float64), b:Discrete(5)) NOTE: spaces can also inherit from each other! >>> class ImageSegmentationSpace(VisionSpace): ... bounding_box: Box ... >>> s = ImageSegmentationSpace( ... x=spaces.Box(0, 1, (2, 2), dtype=float), ... bounding_box=spaces.Box(0, 4, (4, 2), dtype=int), ... ) >>> s ImageSegmentationSpace(x:Box(0.0, 1.0, (2, 2), float64), bounding_box:Box(0, 4, (4, 2), int64)) """ def __init__(self, spaces: Mapping[str, Space] = None, dtype: Type[M] = dict, **spaces_kwargs): """Creates the TypedDict space. Can either pass a dict of spaces, or pass the spaces as keyword arguments. Parameters ---------- spaces : Mapping[str, Space], optional Dictionary mapping from strings to spaces, by default None dtype : Type[M], optional Type of outputs to return. By default `dict`, but this can also use any other dtype which will accept the values from each space as a keyword argument. NOTE: This `dtype` is usually set to some dataclass type in Sequoia, such as `Observation`, `Rewards`, etc. (subclasses of `Batch`). By default, `dtype` is just `dict`, and `space.sample()` will return simple dictionaries. Raises ------ RuntimeError If both `spaces` and **kwargs are used. TypeError If the class has a type annotation for a space, and the required space isn't passed as an argument (emulating a required argument, in a way). """ if spaces and spaces_kwargs: raise RuntimeError("Can only use one of `spaces` or **kwargs, not both.") spaces_from_args = spaces or spaces_kwargs # have to use OrderedDict just in case python <= 3.6.x spaces_from_annotations: Dict[str, gym.Space] = OrderedDict() cls = type(self) class_typed_attributes: Dict[str, Type] = get_type_hints(cls) # NOTE: This is only needed when using `__future__ import annotations` in a # client file: # Get the `globals` of the caller when checking type annotations: # NOTE: Might actually need to get the globals of where that class is defined! # caller_globals = inspect.stack()[1][0].f_globals # class_typed_attributes: Dict[str, Type] = get_type_hints(cls, globalns=caller_globals) if class_typed_attributes: for attribute, type_annotation in class_typed_attributes.items(): if getattr(type_annotation, "__origin__", "") is ClassVar: continue is_space = False if isclass(type_annotation) and issubclass(type_annotation, gym.Space): is_space = True else: origin = get_origin(type_annotation) is_space = ( origin is not None and isclass(origin) and issubclass(origin, gym.Space) ) # NOTE: emulate a 'required argument' when there is a type # annotation, but no value. # Note: How about a None value, is that ok? if is_space: _missing = object() value = getattr(cls, attribute, _missing) if value is _missing and attribute not in spaces_from_args: raise TypeError( f"Space of type {type(self)} requires a '{attribute}' item!" ) if isinstance(value, gym.Space): # Shouldn't be able to have two annotations with the same name. assert attribute not in spaces_from_annotations # TODO: Should copy the space, so that modifying the class # attribute doesn't affect the instances of that space. spaces_from_annotations[attribute] = deepcopy(value) # Avoid the annoying sorting of keys that `spaces.Dict` does if we pass a # regular dict. spaces = OrderedDict() # Need to use this for 3.6.x spaces.update(spaces_from_annotations) spaces.update(spaces_from_args) # Arguments overwrite the spaces from the annotations. if not spaces: raise TypeError( "Need to either have type annotations on the class, or pass some " "arguments to the constructor!" ) assert all(isinstance(s, gym.Space) for s in spaces.values()), spaces super().__init__(spaces=spaces) self.spaces = dict(self.spaces) # Get rid of the OrderedDict. # Sequoia-specific check. if "x" in self.spaces: assert list(self.spaces.keys()).index("x") == 0, self.spaces self.dtype = dtype # Optional: But just to make sure this works: if dataclasses.is_dataclass(self.dtype): dtype_fields: List[str] = [f.name for f in dataclasses.fields(self.dtype)] # Check that the dtype can handle all the entries of `self.spaces`, so that # we won't get any issues when calling `self.dtype(**super().sample())`. for space_name, space in self.spaces.items(): if space_name not in dtype_fields: raise RuntimeError( f"dtype {self.dtype} doesn't have a field for space " f"'{space_name}' ({space})!" ) def keys(self) -> Sequence[str]: return self.spaces.keys() def items(self) -> Iterable[Tuple[str, Space]]: return self.spaces.items() def values(self) -> Sequence[Space]: return self.spaces.values() def sample(self) -> M: dict_sample: dict = super().sample() # Gets rid of OrderedDict. return self.dtype(**dict_sample) def __getattr__(self, attr: str) -> Space: if attr != "spaces": if attr in self.spaces: return self.spaces[attr] raise AttributeError(f"Space doesn't have attribute {attr}") def __getitem__(self, key: Union[str, int]) -> Space: if key not in self.spaces: if isinstance(key, int): # IDEA: Try to get the item at given index in the keys? a bit like a # tuple space? # return self[list(self.spaces.keys())[key]] pass return super().__getitem__(key) def __len__(self) -> int: return len(self.spaces) # def __setitem__(self, key, value): # return super().__setitem__(key, value) def contains(self, x: Union[M, Mapping[str, Space]]) -> bool: if is_dataclass(x): if is_dataclass(self.dtype): if not isinstance(x, self.dtype): # NOTE: This could be a bit controversial, since it departs a bit how Dict # does things. return False # NOTE: We don't use dataclasses.asdict as it doesn't work with Tensor # items with grad attributes. x = {f.name: getattr(x, f.name) for f in fields(x)} # NOTE: Modifying this so that we allow samples with more values, as long as it # has all the required keys. if not isinstance(x, (dict, MappingABC)) or not all(k in x for k in self.spaces): return False for k, space in self.spaces.items(): if k not in x: return False if not space.contains(x[k]): return False return True # return super().contains(x) def __repr__(self) -> str: return ( f"{str(type(self).__name__)}(" + ", ".join([f"{k}:{s}" for k, s in self.spaces.items()]) + (f", dtype={self.dtype}" if self.dtype is not dict else "") + ")" ) def __eq__(self, other): if isinstance(other, TypedDictSpace) and self.dtype != other.dtype: return False return super().__eq__(other) @batch_space.register(TypedDictSpace) def _batch_typed_dict_space(space: TypedDictSpace, n: int = 1) -> spaces.Dict: return type(space)( {key: batch_space(subspace, n=n) for (key, subspace) in space.spaces.items()}, dtype=space.dtype, ) @concatenate.register(TypedDictSpace) def _concatenate_typed_dicts( space: TypedDictSpace, items: Union[list, tuple], out: Union[tuple, dict, np.ndarray], ) -> Dict: return space.dtype( **{ key: concatenate(subspace, [item[key] for item in items], out=out[key]) for (key, subspace) in space.spaces.items() } ) from sequoia.utils.generic_functions.to_from_tensor import from_tensor, to_tensor T = TypeVar("T") @from_tensor.register(TypedDictSpace) def _(space: TypedDictSpace, sample: Union[T, Mapping]) -> T: return space.dtype( **{key: from_tensor(sub_space, sample[key]) for key, sub_space in space.spaces.items()} ) import torch @to_tensor.register(TypedDictSpace) def _( space: TypedDictSpace[T], sample: Dict[str, Union[np.ndarray, Any]], device: torch.device = None, ) -> T: return space.dtype( **{ key: to_tensor(subspace, sample=sample[key], device=device) for key, subspace in space.items() } ) ================================================ FILE: sequoia/common/spaces/typed_dict_test.py ================================================ from dataclasses import Field, dataclass, fields from typing import Dict, Iterable, Mapping, Tuple, TypeVar import gym import numpy as np from gym import spaces from gym.spaces import Box, Discrete from gym.vector.utils import batch_space from .typed_dict import TypedDictSpace T = TypeVar("T") def test_basic(): space = TypedDictSpace( current_state=Box(0, 1, (2, 2)), action=Discrete(2), next_state=Box(0, 1, (2, 2)), ) v = space.sample() print(v) assert v in space # TODO: Maybe re-use all the tests for gym.spaces.Tuple in the gym repo # somehow? vanilla_space = spaces.Dict( current_state=Box(0, 1, (2, 2)), action=Discrete(2), next_state=Box(0, 1, (2, 2)), ) assert vanilla_space.sample() in space assert space.sample() in vanilla_space def test_supports_dataclasses(): # IDEA: Wrapper that makes the 'default factory' of each field actually use # the 'sample' method from a space associated with each class. @dataclass class Sample: a: np.ndarray b: bool c: Tuple[int, int] space = spaces.Dict( a=spaces.Box(0, 1, [2, 2], dtype=np.float64), b=spaces.Box(False, True, (), np.bool), c=spaces.MultiDiscrete([2, 2]), ) wrapped_space: TypedDictSpace = TypedDictSpace(spaces=space.spaces, dtype=Sample) assert isinstance(wrapped_space, spaces.Dict) s = Sample( a=np.ones([2, 2]), b=np.array(False), c=np.array([0, 1]), ) assert s in wrapped_space assert isinstance(wrapped_space.sample(), Sample) @dataclass class StateTransition(Mapping[str, T]): current_state: T action: int next_state: T def __post_init__(self): self._fields: Dict[str, Field] = {f.name: f for f in fields(self)} def __len__(self) -> int: return len(self._fields) def __getitem__(self, attr: str) -> T: if attr not in self._fields: raise KeyError(attr) return getattr(self, attr) def __iter__(self) -> Iterable[str]: return iter(self._fields) def test_basic_with_dtype(): space = TypedDictSpace( current_state=Box(0, 1, (2, 2)), action=Discrete(2), next_state=Box(0, 1, (2, 2)), dtype=StateTransition, ) v = space.sample() assert v in space assert isinstance(v, StateTransition) normal_space = spaces.Dict( current_state=Box(0, 1, (2, 2)), action=Discrete(2), next_state=Box(0, 1, (2, 2)), ) assert normal_space.sample() in space # NOTE: this doesn't work when using a dtype that isn't a subclass of dict! if issubclass(space.dtype, dict): assert space.sample() in normal_space def test_isinstance(): space = TypedDictSpace( current_state=Box(0, 1, (2, 2)), action=Discrete(2), next_state=Box(0, 1, (2, 2)), dtype=StateTransition, ) assert isinstance(space, spaces.Dict) assert isinstance(space.sample(), StateTransition) def test_equals_dict_space_with_same_items(): """Test that a TypedDictSpace is considered equal to aDict space if the spaces are in the same order and all equal. """ space = TypedDictSpace( current_state=Box(0, 1, (2, 2)), action=Discrete(2), next_state=Box(0, 1, (2, 2)), dtype=StateTransition, ) dict_space = spaces.Dict( current_state=Box(0, 1, (2, 2)), action=Discrete(2), next_state=Box(0, 1, (2, 2)), ) assert space == dict_space assert dict_space == space def test_batch_objets_considered_valid_samples(): from dataclasses import dataclass import numpy as np from sequoia.common.batch import Batch @dataclass(frozen=True) class StateTransitionDataclass(Batch): current_state: np.ndarray action: int next_state: np.ndarray space = TypedDictSpace( current_state=Box(0, 1, (2, 2), dtype=np.float64), action=Discrete(2), next_state=Box(0, 1, (2, 2), dtype=np.float64), dtype=StateTransitionDataclass, ) obs = StateTransitionDataclass( current_state=np.ones([2, 2]) / 2, action=1, next_state=np.zeros([2, 2]), ) assert obs in space assert space.sample() in space assert isinstance(space.sample(), StateTransitionDataclass) def test_batch_space(): space = TypedDictSpace( current_state=Box(0, 1, (2, 2)), action=Discrete(2), next_state=Box(0, 1, (2, 2)), dtype=StateTransition, ) assert batch_space(space, n=5) == TypedDictSpace( current_state=Box(0, 1, (5, 2, 2)), action=spaces.MultiDiscrete([2, 2, 2, 2, 2]), next_state=Box(0, 1, (5, 2, 2)), dtype=StateTransition, ) def test_batch_space_preserves_dtype(): space = TypedDictSpace( current_state=Box(0, 1, (2, 2)), action=Discrete(2), next_state=Box(0, 1, (2, 2)), dtype=StateTransition, ) batched_space = batch_space(space, n=5) assert isinstance(batched_space, TypedDictSpace) assert list(batched_space.spaces.keys()) == list(batched_space.spaces.keys()) assert list(batched_space.spaces.keys()) == [ "current_state", "action", "next_state", ] assert batched_space.dtype is StateTransition space = TypedDictSpace( dict( current_state=Box(0, 1, (2, 2)), action=Discrete(2), next_state=Box(0, 1, (2, 2)), ), dtype=StateTransition, ) batched_space = batch_space(space, n=5) assert isinstance(batched_space, TypedDictSpace) assert list(batched_space.spaces.keys()) == list(batched_space.spaces.keys()) assert list(batched_space.spaces.keys()) == [ "current_state", "action", "next_state", ] assert list(batched_space.sample().keys()) == [ "current_state", "action", "next_state", ] assert list(v[0] for v in space.spaces.items()) == [ "current_state", "action", "next_state", ] assert batched_space.dtype is StateTransition space = TypedDictSpace( dict( x=Box(0, 1, (2, 2)), action=Discrete(2), next_state=Box(0, 1, (2, 2)), ), ) batched_space = batch_space(space, n=5) assert batched_space.x == Box(0, 1, (5, 2, 2)) assert isinstance(batched_space, TypedDictSpace) assert list(batched_space.spaces.keys()) == list(batched_space.spaces.keys()) assert list(batched_space.spaces.keys()) == ["x", "action", "next_state"] assert list(batched_space.sample().keys()) == ["x", "action", "next_state"] assert list(v[0] for v in space.spaces.items()) == ["x", "action", "next_state"] class DummyDictEnv(gym.Env): def __init__(self): super().__init__() self.observation_space = TypedDictSpace( x=Box(0, 1, (2, 2)), t=Discrete(2), done=Box(False, True, (1,), bool), ) self.action_space = spaces.Discrete(10) self.reward_space = spaces.Box(-10, 10, shape=(1,), dtype=np.float32) def reset(self): return self.observation_space.sample() def step(self, action): return self.observation_space.sample(), self.reward_space.sample(), False, {} def seed(self, seed=None): seeds = [] seeds += self.observation_space.seed(seed) seeds += self.action_space.seed(seed) seeds += self.reward_space.seed(seed) return seeds def test_vector_env(): env = DummyDictEnv() from gym.envs.registration import register from gym.vector import make register("dummy_foo-v0", entry_point=DummyDictEnv) env = make("dummy_foo-v0", num_envs=10) from typing import Optional from numpy.typing import ArrayLike from sequoia.common.batch import Batch def test_object_with_extra_keys_fits(): @dataclass(frozen=True) class Observation(Batch): x: np.ndarray t: ArrayLike done: Optional[ArrayLike] = None space = TypedDictSpace( x=spaces.Box(0, 10, (10,), dtype=np.float64), t=spaces.Box(0, 1, (1,), dtype=np.int32) ) obs = Observation( x=np.arange(10, dtype=np.float64), t=np.array([1], dtype=np.int32), done=False, ) assert obs.x in space.x assert obs.t in space.t assert obs in space def test_order_of_keys_is_same_in_samples(): space = TypedDictSpace(x=spaces.Box(0, 10, (10,), dtype=np.int32), t=spaces.Discrete(10)) expected = ["x", "t"] assert list(space.keys()) == expected assert list(k for k, v in space.items()) == expected assert list(space.sample().keys()) == expected assert list(k for k, v in space.sample().items()) == expected space.seed(123) s = space.sample() assert str(s) == f"{{'x': {repr(s['x'])}, 't': {repr(s['t'])}}}" def test_debugging(): assert { "task_labels": 0, "x": np.array([-0.25162117, -0.43992427, 0.42706016, 1.47862901]), } in TypedDictSpace( x=spaces.Box(-3.4028234663852886e38, 3.4028234663852886e38, (4,), np.float64), task_labels=spaces.Discrete(5), dtype=dict, ) def test_equality(): s1 = TypedDictSpace( x=spaces.Box(-np.inf, np.inf, (39,), np.float32), task_labels=spaces.Discrete(10), dtype=dict, ) s2 = TypedDictSpace( x=spaces.Box(-np.inf, np.inf, (39,), np.float32), task_labels=spaces.Discrete(10), dtype=dict, ) assert s1 == s2 ## IDEA: Creating a space like this, using the same syntax as with TypedDict # class StateTransitionSpace(TypedDict): # current_state: Box = Box(0, 1, (2,2)) # action: Discrete = Discrete(2) # current_state: Box = Box(0, 1, (2,2)) # space = StateTransitionSpace() # space.sample() ================================================ FILE: sequoia/common/task.py ================================================ """ NOTE: Unused at the moment. This defines a `Task` object that is just used to represent the information about a 'Task'. """ from dataclasses import dataclass, field from typing import List from simple_parsing import list_field from sequoia.utils.serialization import Serializable @dataclass class Task(Serializable): """Dataclass that represents a task. TODO (@lebrice): This isn't being used anymore, but we could probably use it / add it to the Continuum package, if it doesn't already have something like it. TODO: Maybe the this could also specify from which dataset(s) it is sampled. """ # The index of this task (the order in which it was encountered) index: int = field(default=-1, repr=False) # All the unique classes present within this task. (order matters) classes: List[int] = list_field() ================================================ FILE: sequoia/common/transforms/__init__.py ================================================ from .channels import ( ChannelsFirst, ChannelsFirstIfNeeded, ChannelsLast, ChannelsLastIfNeeded, ThreeChannels, ) from .compose import Compose from .split_batch import SplitBatch, split_batch from .to_tensor import ToTensor, image_to_tensor from .transform import Transform from .transform_enum import Transforms ================================================ FILE: sequoia/common/transforms/channels.py ================================================ # from torchvision.transforms import Lambda from collections.abc import Mapping from dataclasses import dataclass from functools import singledispatch from typing import Any, Iterable, Tuple, Union import numpy as np import torch from gym import spaces from torch import Tensor from sequoia.common.spaces import NamedTupleSpace, TypedDictSpace from sequoia.utils.logging_utils import get_logger from .transform import Img, Transform from .utils import is_image logger = get_logger(__name__) @singledispatch def has_channels_last(img_or_shape: Union[Img, Tuple[int, ...], spaces.Box]) -> bool: """Returns wether the given image, or image batch, shape, or Space is in the channels last format. """ shape = getattr(img_or_shape, "shape", img_or_shape) return len(shape) and shape[-1] in {1, 3} def has_channels_first(img_or_shape: Union[Img, Tuple[int, ...], spaces.Box]) -> bool: """Returns wether the given image or image batch, shape, or Space is in the channels first format. """ shape = getattr(img_or_shape, "shape", img_or_shape) if len(shape) == 3: return shape[0] in {1, 3} elif len(shape) == 4: return shape[1] in {1, 3} return False # return len(shape) and shape[0 if len(shape) == 3 else 1] in {1, 3} def channels_last_if_needed(x: Any) -> Any: if has_channels_first(x): return channels_last(x) elif has_channels_last(x): return x raise RuntimeError(f"Input isn't channels_first or channels_last! {x.shape}") def channels_first_if_needed(x: Any) -> Any: if has_channels_last(x): return channels_first(x) elif has_channels_first(x): return x raise RuntimeError(f"Input isn't channels_first or channels_last! {x.shape}") class NamedDimensions(Transform[Tensor, Tensor]): """'Transform' that gives names to the dimensions of input tensors. Overwrites existing named dimensions, if any. """ def __init__(self, names: Iterable[str]): self.names = tuple(names) def __call__(self, tensor: Tensor) -> Tensor: return tensor.refine_names(*self.names) @singledispatch def three_channels(x: Any) -> Any: """Transform that makes the input images have three channels if they don't. * New: Also adds names to each dimension, when possible. (edit: off for now) For instance, if the input shape is: [28, 28] -> [3, 28, 28] (copy the image three times) [1, 28, 28] -> [3, 28, 28] (same idea) [10, 1, 28, 28] -> [10, 3, 28, 28] (keep batch intact, do the same again.) """ raise NotImplementedError(f"This doesn't currently support input {x} of type {type(x)}") @three_channels.register(Tensor) def _(x: Tensor) -> Tensor: names: Tuple[str, ...] = () if x.ndim == 2: x = x.reshape([1, *x.shape]) x = x.repeat(3, 1, 1) names = ("C", "H", "W") if x.ndim == 3: if x.shape[0] == 1: x = x.repeat(3, 1, 1) names = ("C", "H", "W") elif x.shape[-1] == 1: x = x.repeat(1, 1, 3) names = ("H", "W", "C") if x.ndim == 4: if x.shape[1] == 1: x = x.repeat(1, 3, 1, 1) names = ("N", "C", "H", "W") elif x.shape[-1] == 1: x = x.repeat(1, 1, 1, 3) names = ("N", "H", "W", "C") # FIXME: Turning this off for now, since using named dimensions # generates a whole lot of UserWarnings atm. # if isinstance(x, Tensor) and names: # # Cool new pytorch feature! # x.rename(*names) return x @three_channels.register(np.ndarray) def _(x: np.ndarray) -> np.ndarray: if x.ndim == 2: # names = ("C", "H", "W") x = x.reshape([1, *x.shape]) x = np.tile(x, [3, 1, 1]) if x.ndim == 3: if x.shape[0] == 1: # names = ("C", "H", "W") x = np.tile(x, [3, 1, 1]) elif x.shape[-1] == 1: # names = ("H", "W", "C") x = np.tile(x, [1, 1, 3]) if x.ndim == 4: if x.shape[1] == 1: # names = ("N", "C", "H", "W") x = np.tile(x, [1, 3, 1, 1]) elif x.shape[-1] == 1: # names = ("N", "H", "W", "C") x = np.tile(x, [1, 1, 1, 3]) return x @three_channels.register(spaces.Box) def _(x: spaces.Box) -> spaces.Box: return type(x)(low=three_channels(x.low), high=three_channels(x.high), dtype=x.dtype) @three_channels.register(torch.Size) @three_channels.register(tuple) def _(x: Tuple[int, ...]) -> Tuple[int, ...]: dims = len(x) if dims == 2: return (3, *x) elif dims == 3: if x[0] == 1: return (3, *x[1:]) elif x[-1] == 1: return (*x[:-1], 3) elif dims == 4: if x[1] == 1: return (x[0], 3, *x[2:]) elif x[-1] == 1: return (*x[:-1], 3) return x @three_channels.register(NamedTupleSpace) def _three_channels(x: Any) -> Any: return type(x)( **{key: three_channels(value) if is_image(value) else value for key, value in x.items()}, dtype=x.dtype, ) @three_channels.register(spaces.Dict) @three_channels.register(Mapping) def _three_channels(x: Any) -> Any: return type(x)( **{key: three_channels(value) if is_image(value) else value for key, value in x.items()} ) @three_channels.register(TypedDictSpace) def _three_channels(x: TypedDictSpace) -> TypedDictSpace: return type(x)( {key: three_channels(value) if is_image(value) else value for key, value in x.items()}, dtype=x.dtype, ) @dataclass class ThreeChannels(Transform[Tensor, Tensor]): """Transform that makes the input images have three tensors. * New: Also adds names to each dimension, when possible. For instance, if the input shape is: [28, 28] -> [3, 28, 28] (copy the image three times) [1, 28, 28] -> [3, 28, 28] (same idea) [10, 1, 28, 28] -> [10, 3, 28, 28] (keep batch intact, do the same again.) """ def __call__(self, x: Tensor) -> Tensor: return three_channels(x) @singledispatch def channels_first(x: Any) -> Any: """Re-orders the dimensions of the input from ((n), H, W, C) to ((n), C, H, W). If the tensor doesn't have named dimensions, this will ALWAYS re-order the dimensions, regarless of if the image or space already has channels first. Also converts non-Tensor inputs to tensors using `to_tensor`. """ raise RuntimeError(f"Transform isn't applicable to input {x} of type {type(x)}.") @channels_first.register(Tensor) def _(x: Tensor) -> Tensor: if x.ndim == 3: if any(x.names): return x.align_to("C", "H", "W") return x.permute(2, 0, 1) # .to(memory_format=torch.contiguous_format) if x.ndim == 4: if any(x.names): return x.align_to("N", "C", "H", "W") return x.permute(0, 3, 1, 2).contiguous() return x @channels_first.register(tuple) def _(x: Tuple[int, ...]) -> Tuple[int, ...]: if len(x) == 3: # TODO: Re-enable the naming of the dimensions at some point. return type(x)(x[i] for i in (2, 0, 1)) if len(x.shape) == 4: return type(x)(x[i] for i in (0, 3, 1, 2)) raise NotImplementedError(x) @channels_first.register(np.ndarray) def _(x: spaces.Box) -> spaces.Box: if x.ndim == 4: return np.moveaxis(x, 3, 1) elif x.ndim == 3: return np.moveaxis(x, 2, 0) else: raise NotImplementedError(f"Expected 3-d or 4-d input, got {x}") @channels_first.register(tuple) def _(x: Tuple[int, ...]) -> Tuple[int, ...]: if len(x) == 4: return type(x)(x[i] for i in (0, 3, 1, 2)) if len(x) == 3: return type(x)(x[i] for i in (2, 0, 1)) raise NotImplementedError(x) @channels_first.register(spaces.Box) def _(x: spaces.Box) -> spaces.Box: return type(x)( low=channels_first(x.low), high=channels_first(x.high), dtype=x.dtype, ) @dataclass class ChannelsFirst(Transform[Union[np.ndarray, Tensor], Tensor]): """Re-orders the dimensions of the tensor from ((n), H, W, C) to ((n), C, H, W). If the tensor doesn't have named dimensions, this will ALWAYS re-order the dimensions, regarless of the length of the last dimension. Also converts non-Tensor inputs to tensors using `to_tensor`. """ def __call__(self, x: Tensor) -> Tensor: return self.apply(x) @classmethod def apply(cls, x: Tensor) -> Tensor: return channels_first(x) # if not isinstance(x, Tensor): # raise RuntimeError(f"Transform only applies to Tensors. (Not {x} of type {type(x)}).") # # if has_channels_first(x): # # logger.warning(RuntimeWarning(f"Input already seems to have channels first, but this transform will be applied anyway..")) # if x.ndim == 3: # if any(x.names): # return x.align_to("C", "H", "W") # return x.permute(2, 0, 1)#.to(memory_format=torch.contiguous_format) # if x.ndim == 4: # if any(x.names): # return x.align_to("N", "C", "H", "W") # return x.permute(0, 3, 1, 2).contiguous() # return x # @staticmethod # def shape_change(input_shape: Union[Tuple[int, ...], torch.Size]) -> Tuple[int, ...]: # ndim = len(input_shape) # if ndim == 3: # return tuple(input_shape[i] for i in (2, 0, 1)) # elif ndim == 4: # return tuple(input_shape[i] for i in (0, 3, 1, 2)) # return input_shape @dataclass class ChannelsFirstIfNeeded(ChannelsFirst): """Only puts the channels first if the input has channels last.""" @classmethod def apply(cls, x: Tensor) -> Tensor: if has_channels_last(x): return super().apply(x) return x # @classmethod # def shape_change(cls, input_shape: Union[Tuple[int, ...], torch.Size]) -> Tuple[int, ...]: # if has_channels_last(input_shape): # return super().shape_change(input_shape) # return input_shape @singledispatch def channels_last(x: Any) -> Any: raise NotImplementedError(f"This doesn't support input {x} of type {type(x)}") @channels_last.register(Tensor) def _(x: Tensor) -> Tensor: if len(x.shape) == 3: # TODO: Re-enable the naming of the dimensions at some point. # if not x.names: # x.rename("C", "H", "W") # return x.align_to("H", "W", "C") return x.permute(1, 2, 0) if len(x.shape) == 4: return x.permute(0, 2, 3, 1) @channels_last.register(tuple) def _(x: Tuple[int, ...]) -> Tuple[int, ...]: if len(x) == 3: # TODO: Re-enable the naming of the dimensions at some point. return type(x)(x[i] for i in (1, 2, 0)) if len(x.shape) == 4: return type(x)(x[i] for i in (0, 2, 3, 1)) raise NotImplementedError(x) @channels_last.register(np.ndarray) def _(x: np.ndarray) -> np.ndarray: if len(x.shape) == 4: return np.moveaxis(x, 1, 3) elif len(x.shape) == 3: return np.moveaxis(x, 0, 2) raise NotImplementedError(x.shape) @channels_last.register(spaces.Box) def _(x: spaces.Box) -> spaces.Box: return type(x)( low=channels_last(x.low), high=channels_last(x.high), dtype=x.dtype, ) @dataclass class ChannelsLast(Transform[Tensor, Tensor]): def __call__(self, x: Tensor) -> Tensor: return self.apply(x) @classmethod def apply(cls, x: Tensor) -> Tensor: return channels_last(x) @dataclass class ChannelsLastIfNeeded(ChannelsLast): """Only puts the channels last if the input has channels first.""" @classmethod def apply(cls, x: Tensor) -> Tensor: return channels_last_if_needed(x) ================================================ FILE: sequoia/common/transforms/compose.py ================================================ from typing import Callable, List, TypeVar from gym import spaces from torchvision.transforms import Compose as ComposeBase from sequoia.utils.logging_utils import get_logger from .transform import InputType, OutputType, Transform logger = get_logger(__name__) T = TypeVar("T", bound=Callable) class Compose(List[T], ComposeBase, Transform[InputType, OutputType]): """Extend the Compose class of torchvision with methods of `list`. This can also be passed in members of the `Transforms` enum, which makes it possible to do something like this: >>> from .transform_enum import Compose, Transforms >>> transforms = Compose([Transforms.to_tensor, Transforms.three_channels,]) >>> Transforms.three_channels in transforms True >>> transforms += [Transforms.random_grayscale] >>> transforms [, , ] """ def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) ComposeBase.__init__(self, transforms=self) def __call__(self, img): if isinstance(img, spaces.Space): for t in self: try: img = t(img) except: logger.debug( f"Unable to apply transform {t} on space {img}: assuming that transform {t} doesn't change the space." ) return img else: for t in self: img = t(img) return img # def shape_change(self, input_shape: Union[Tuple[int, ...], torch.Size]) -> Tuple[int, ...]: # logger.debug(f"shape_change on Compose: input shape: {input_shape}") # # TODO: Give the impact of this transform on a given input shape. # for transform in self: # logger.debug(f"Shape before transform {transform}: {input_shape}") # shape_change_method: Optional[Callable] = getattr(transform, "shape_change", None) # if shape_change_method and callable(shape_change_method): # input_shape = transform(input_shape) # type: ignore # else: # logger.debug( # f"Unable to detect the change of shape caused by " # f"transform {transform}, assuming its output has same " # f"shape as its input." # ) # logger.debug(f"Final shape: {input_shape}") # return input_shape # def space_change(self, input_space: gym.Space) -> gym.Space: # from .transform_enum import Transforms # for transform in self: # if isinstance(transform, Transforms): # transform = transform.value # input_space = transform(input_space) # return input_space ================================================ FILE: sequoia/common/transforms/resize.py ================================================ from collections.abc import Mapping from functools import singledispatch from typing import Dict, List, Tuple import numpy as np import torch from gym import spaces from PIL import Image from torch import Tensor from torch.nn.functional import interpolate from torchvision.transforms import InterpolationMode from torchvision.transforms import Resize as Resize_ from torchvision.transforms import functional as F from sequoia.common.gym_wrappers.convert_tensors import add_tensor_support, has_tensor_support from sequoia.common.spaces import NamedTupleSpace, TypedDictSpace from sequoia.common.spaces.image import Image as ImageSpace from sequoia.utils.logging_utils import get_logger from .channels import channels_first, channels_last, has_channels_first, has_channels_last from .transform import Img, Transform from .utils import is_image logger = get_logger(__name__) @singledispatch def resize(x: Img, size: Tuple[int, ...], **kwargs) -> Img: """Resizes a PIL.Image, a Tensor, ndarray, or a Box space.""" raise NotImplementedError(f"Transform doesn't support input {x} of type {type(x)}") @resize.register def _(x: Image.Image, size: Tuple[int, ...], **kwargs) -> Image.Image: return F.resize(x, size, **kwargs) @resize.register(np.ndarray) @resize.register(Tensor) def _resize_array_or_tensor(x: np.ndarray, size: Tuple[int, ...], **kwargs) -> np.ndarray: """TODO: This resizes numpy arrays by converting them to tensors and then using the `interpolate` function. There is for sure a more efficient way to do this. """ original = x if isinstance(original, np.ndarray): # Need to convert to tensor (for interpolate to work). x = torch.as_tensor(x) if len(original.shape) == 3: # Need to add a batch dimension (for interpolate to work). x = x.unsqueeze(0) if has_channels_last(original): # Need to make it channels first (for interpolate to work). x = channels_first(x) assert has_channels_first(x), f"Image needs to have channels first (shape is {x.shape})" x = interpolate(x, size, mode="area") if isinstance(original, np.ndarray): x = x.numpy() if len(original.shape) == 3: x = x[0] if has_channels_last(original): x = channels_last(x) return x @resize.register def _resize_namedtuple_space( x: NamedTupleSpace, size: Tuple[int, ...], **kwargs ) -> NamedTupleSpace: """When presented with a NamedTupleSpace input, this transform will be applied to all 'Image' spaces. """ return type(x)( **{ key: resize(v, size, **kwargs) if isinstance(v, ImageSpace) else v for key, v in x._spaces.items() } ) @resize.register(Mapping) def _resize_namedtuple(x: Dict, size: Tuple[int, ...], **kwargs) -> Dict: """When presented with a Mapping-like input, this transform will be applied to all 'Image' spaces. """ return type(x)( **{ key: resize(value, size, **kwargs) if is_image(value) else value for key, value in x.items() } ) @resize.register(TypedDictSpace) def _resize_typed_dict(x: TypedDictSpace, size: Tuple[int, ...], **kwargs) -> TypedDictSpace: """When presented with a Mapping-like input, this transform will be applied to all 'Image' spaces. """ return type(x)( { key: resize(value, size, **kwargs) if is_image(value) else value for key, value in x.items() }, dtype=x.dtype, ) @resize.register(tuple) def _resize_image_shape(x: Tuple[int, ...], size: Tuple[int, ...], **kwargs) -> Tuple[int, ...]: """Give the resized image shape, given the input shape.""" new_shape: List[int] = list(size) if len(size) == 2: # Preserve the number of channels. if len(x) == 4: if has_channels_first(x): new_shape = [*x[:2], *size] elif has_channels_last(x): new_shape = [x[0], *size, x[-1]] else: raise NotImplementedError(x) elif len(x) == 3: if has_channels_first(x): new_shape = [x[0], *size] elif has_channels_last(x): new_shape = [*size, x[-1]] else: raise NotImplementedError(x) else: NotImplementedError(size) return type(x)(new_shape) @resize.register(spaces.Box) def _resize_space(x: spaces.Box, size: Tuple[int, ...], **kwargs) -> spaces.Box: # Hmm, not sure if the bounds would actually also be respected though. new_space = type(x)( low=resize(x.low, size, **kwargs), high=resize(x.high, size, **kwargs), dtype=x.dtype, ) # If the 'old' space supported tensors as samples, then so will the new space. if has_tensor_support(x): return add_tensor_support(new_space) return new_space class Resize(Resize_, Transform[Img, Img]): def __init__(self, size: Tuple[int, ...], interpolation=InterpolationMode.BILINEAR): super().__init__(size, interpolation) # self.size = size # self.interpolation = interpolation def __call__(self, img): # TODO: (@lebrice) Weirdly enough, it seems that even though we # implement forward below, and __call__ is supposed to just use # `forward`, the base class somehow doesn't use our implementation, so # the test # env_dataset_test.py::test_iteration_with_more_than_one_wrapper would # fail if we don't have this __call__ explicitly implemented, return self.forward(img) def forward(self, img: Img) -> Img: return resize(img, size=self.size) ================================================ FILE: sequoia/common/transforms/split_batch.py ================================================ import dataclasses from typing import Any, Callable, Optional, Tuple, Type, TypeVar import numpy as np from torch import Tensor from ..batch import Batch from .transform import Transform # Type variables just for the below function. ObservationType = TypeVar("ObservationType", bound=Batch) RewardType = TypeVar("RewardType", bound=Batch) class SplitBatch(Transform[Any, Tuple[ObservationType, RewardType]]): """ Transform that will split batches into Observations and Rewards. The provided observation and reward types (which have to be subclasses of the `Batch` class) will be used to construct the observation and reward objects, respectively. To make this simpler, this callable will always return an Observation and a Reward object, even when the batch is unlabeled. In that case, the Reward object will have a 'None' passed for any of its required arguments. Parameters ---------- observation_type : Type[ObservationType] [description] reward_type : Type[RewardType] [description] Returns ------- Callable[[Any], Tuple[ObservationType, RewardType]] [description] Raises ------ RuntimeError If the observation_type or reward_type don't both subclass Batch. NotImplementedError If the type of the batch isn't supported. RuntimeError [description] NotImplementedError [description] """ def __init__(self, observation_type: Type[ObservationType], reward_type: Type[RewardType]): self.Observations = observation_type self.Rewards = reward_type self.func = split_batch(observation_type=observation_type, reward_type=reward_type) def __call__(self, batch: Any) -> Tuple[ObservationType, RewardType]: return self.func(batch) def split_batch( observation_type: Type[ObservationType], reward_type: Type[RewardType] ) -> Callable[[Any], Tuple[ObservationType, Optional[RewardType]]]: """Makes a callable that will split batches into Observations and Rewards. The provided observation and reward types (which have to be subclasses of the `Batch` class) will be used to construct the observation and reward objects, respectively. To make this simpler, this callable will always return a tuple with an Observation and an optional Reward object, even when the batch is unlabeled. In that case, the Reward will be None. Parameters ---------- observation_type : Type[ObservationType] [description] reward_type : Type[RewardType] [description] Returns ------- Callable[[Any], Tuple[ObservationType, RewardType]] [description] Raises ------ RuntimeError If the observation_type or reward_type don't both subclass Batch. NotImplementedError If the type of the batch isn't supported. RuntimeError [description] NotImplementedError [description] """ if not (issubclass(observation_type, Batch) and issubclass(reward_type, Batch)): raise RuntimeError( "Both `observation_type` and `reward_type` need to " "inherit from `Batch`!" ) # Get the min, max and total number of args for each object type. min_for_obs = n_required_fields(observation_type) max_for_obs = n_fields(observation_type) n_required_for_obs = min_for_obs n_optional_for_obs = max_for_obs - min_for_obs min_for_rew = n_required_fields(reward_type) max_for_reward = n_fields(reward_type) n_required_for_rew = min_for_rew n_optional_for_rew = max_for_reward - min_for_obs min_items = min_for_obs + min_for_rew max_items = max_for_obs + max_for_reward def split_batch_transform(batch: Any) -> Tuple[ObservationType, RewardType]: if isinstance(batch, (Tensor, np.ndarray)): batch = (batch,) if isinstance(batch, dict): obs_fields = observation_type.field_names rew_fields = reward_type.field_names assert not set(obs_fields).intersection( set(rew_fields) ), "Observation and Reward shouldn't share fields names" obs_kwargs = {k: v for k, v in batch.items() if k in obs_fields} obs = observation_type(**obs_kwargs) reward_kwargs = {k: v for k, v in batch.items() if k in rew_fields} reward = reward_type(**reward_kwargs) return obs, reward if isinstance(batch, observation_type): return batch, None if not isinstance(batch, (tuple, list)): # TODO: Add support for more types maybe? Or just wrap it in a tuple # and call it a day? raise RuntimeError(f"Batch is of an unsuported type: {type(batch)}.") # If the batch already has two elements, check if they are already of # the right type, to avoid unnecessary computation below. if len(batch) == 2: obs, rew = batch if isinstance(obs, observation_type) and isinstance(rew, reward_type): return obs, rew n_items = len(batch) if n_items < min_items or n_items > max_items: raise RuntimeError( f"There aren't the right number of elements in the batch to " f"create both an Observation and a Reward!\n" f"(batch has {n_items} items, but type " f"{observation_type} requires from {min_for_obs} to " f"{max_for_obs} args, while {reward_type} requires from " f"{min_for_rew} to {max_for_reward} args. " ) # Batch looks like: # [ # O_1, O_2, ..., O_{min_obs}, (O_{min_obs+1}), ..., (O_{max_obs}), # R_1, R_2, ..., R_{min_rew}, (R_{min_rew+1}), ..., (R_{max_rew}), # ] if n_items == 0: obs = observation_type() rew = reward_type() if n_items == max_items: # Easiest case! Just use all the values. obs = observation_type(*batch[:max_for_obs]) rew = reward_type(*batch[max_for_obs:]) elif n_items == min_items: # Easy case as well. Also simply uses all the values directly. obs = observation_type(*batch[:min_for_obs]) rew = reward_type(*batch[min_for_obs:]) elif n_optional_for_obs == 0 and n_optional_for_rew != 0: # All the extra args go in the reward. obs = observation_type(*batch[:min_for_obs]) rew = reward_type(*batch[min_for_obs:]) elif n_optional_for_obs != 0 and n_optional_for_rew == 0: # All the extra args go in the observation. obs = observation_type(*batch[:max_for_obs]) rew = reward_type(*batch[max_for_obs:]) else: # We can't tell where the 'extra' tensors should go. # TODO: Maybe just assume that all the 'extra' tensors are meant to # be part of the observation? or the reward? For instance: # Option 1: All the extra args go in the observation: # obs = Observation(*batch[:n_items-n_required_for_rew]) # rew = Observation(*batch[n_items-n_required_for_rew:]) # Option 2: All the extra args go in the reward: # obs = Observation(*batch[:n_required_for_obs]) # rew = Observation(*batch[n_required_for_obs:]) n_extra = n_items - min_items max_extra = n_optional_for_obs + n_optional_for_rew raise NotImplementedError( f"Can't tell where to put these extra tensors!\n" f"(batch has {n_items} items, but type " f"{observation_type} requires from {min_for_obs} to " f"{max_for_obs} args, while {reward_type} requires from " f"{min_for_rew} to {max_for_reward} args. There are " f"{n_extra} extra items out of a potential of {max_extra}." ) return obs, rew return split_batch_transform def n_fields(batch_type: Type[Batch]) -> int: """Helper function, gives back the total number of fields in Batch subclass. Parameters ---------- batch_type : Type A subclass of Batch. Returns ------- int The total number of fields in the type. See the `fields` function of the `dataclasses` package for more info. """ return len(dataclasses.fields(batch_type)) def n_required_fields(batch_type: Type) -> int: """Helper function, gives the number of required fields in the dataclass. Parameters ---------- batch_type : Type [description] Returns ------- int The number of fields which don't have a default value or a default factory and are required by the constructor (have init=True). """ # Need to figure out a way to get the number fields through the # class itself. fields = dataclasses.fields(batch_type) required_fields_names = [ f.name for f in fields if f.default is dataclasses.MISSING and f.default_factory is dataclasses.MISSING and f.init ] # print(f"class {batch_type}: required fields: {required_fields_names}") return len(required_fields_names) ================================================ FILE: sequoia/common/transforms/to_tensor.py ================================================ """ Slight modification of the ToTensor transform from TorchVision. @lebrice: I wrote this because I would often get weird 'negative stride in images' errors when converting PIL images from some gym environments when using `ToTensor` from torchvision. """ from collections.abc import Mapping from dataclasses import dataclass from functools import singledispatch from typing import Dict, Sequence, Tuple, Union import gym import numpy as np import torch from gym import spaces from PIL.Image import Image from torch import Tensor from torchvision.transforms import ToTensor as ToTensor_ from torchvision.transforms import functional as F from sequoia.common.gym_wrappers.convert_tensors import add_tensor_support from sequoia.common.spaces import NamedTupleSpace, TypedDictSpace from sequoia.utils.logging_utils import get_logger from .channels import channels_first_if_needed from .transform import Img, Transform logger = get_logger(__name__) def copy_if_negative_strides(image: Img) -> Img: # It sometimes happens when taking images from a gym env that the strides # are negative, for some reason. Therefore we need to copy the array # before we can call torchvision.transforms.functional.to_tensor(image). if isinstance(image, Image): image = np.array(image) if isinstance(image, np.ndarray): strides = image.strides elif isinstance(image, Tensor): strides = image.stride() elif hasattr(image, "strides"): strides = image.strides else: raise NotImplementedError(f"Can't get strides of object {image}") if any(s < 0 for s in strides): return image.copy() return image @singledispatch def image_to_tensor(image: Union[Img, Sequence[Img], gym.Space]) -> Union[Tensor, gym.Space]: """ Converts a PIL Image or numpy.ndarray ((N) x H x W x C) in the range [0, 255] to a torch.FloatTensor of shape ((N) x C x H x W) in the range [0.0, 1.0] if the PIL Image belongs to one of the modes (L, LA, P, I, F, RGB, YCbCr, RGBA, CMYK, 1) or if the numpy.ndarray has dtype = np.uint8 Parameters ---------- image : Union[Img, Sequence[Img]] [description] Returns ------- Tensor [description] """ raise NotImplementedError(f"Don't know how to convert {image} to a Tensor.") # @image_to_tensor.register # def _(image: Tensor) -> Tensor: # return channels_first_if_needed(image) @image_to_tensor.register(Tensor) @image_to_tensor.register(np.ndarray) @image_to_tensor.register(Image) def _(image: Union[Image, np.ndarray]) -> Tensor: """Converts a PIL Image, or np.uint8 ndarray to a Tensor. Also reshapes it to channels_first format (because ToTensor from torchvision does it also). """ from .channels import channels_first_if_needed image = copy_if_negative_strides(image) if len(image.shape) == 2: return F.to_tensor(image) if isinstance(image, np.ndarray): # Convert to channels last if needed, because ToTensor expects to # receive that. image = channels_first_if_needed(image) image = torch.from_numpy(image).contiguous() # backward compatibility if isinstance(image, torch.ByteTensor): image = image.float().div(255) return image if len(image.shape) == 4: return channels_first_if_needed(torch.stack(list(map(image_to_tensor, image)))) if not isinstance(image, Tensor): image = F.to_tensor(image) return channels_first_if_needed(image) @image_to_tensor.register(list) def _list_of_images_to_tensor(image: Sequence[Img]) -> Tensor: return torch.stack(list(map(image_to_tensor, image))) @image_to_tensor.register(tuple) def _to_tensor_effect_on_image_shape(image: Tuple[int, ...]) -> Tuple[int, ...]: """Give the output shape given the input shape of an image.""" if len(image) == 3: from .channels import channels_first_if_needed return channels_first_if_needed(image) return image @image_to_tensor.register(spaces.Box) def _(image: spaces.Box) -> spaces.Box: if image.dtype == np.uint8: # images get their bounds changed to [0. 1.] and their shape changed to # channels_first. image = type(image)( low=0.0, high=1.0, shape=channels_first_if_needed(image.shape), dtype=np.float32 ) # TODO: it sometimes happens that the `image` space has already been # through 'to_tensor`, not sure what to do in that case. # elif not has_tensor_support(image): # raise RuntimeError(f"image spaces should have dtype np.uint8!: {image}") # Since the transform would convert images / ndarrays to tensors, then we # add 'Tensor' support when applying the same transform on the Space of # images! image = add_tensor_support(image) return image @image_to_tensor.register(NamedTupleSpace) def _(space: Dict, device: torch.device = None) -> Dict: from .resize import is_image return type(space)( **{ key: image_to_tensor(value) if is_image(value) else value for key, value in space.items() }, dtype=space.dtype, ) @image_to_tensor.register(Mapping) @image_to_tensor.register(spaces.Dict) def _space_with_images_to_tensor(space: Dict, device: torch.device = None) -> Dict: from .resize import is_image return type(space)( **{ key: image_to_tensor(value) if is_image(value) else value for key, value in space.items() } ) @image_to_tensor.register(TypedDictSpace) def _space_with_images_to_tensor( space: TypedDictSpace, device: torch.device = None ) -> TypedDictSpace: from .resize import is_image return type(space)( {key: image_to_tensor(value) if is_image(value) else value for key, value in space.items()}, dtype=space.dtype, ) # @image_to_tensor.register(Image) # def to_tensor(image: Union[Img, Sequence[Img]]) -> Tensor: # tensor: Tensor # if isinstance(image, Tensor): # return channels_first(image) # return image # # return channels_first(image) # if isinstance(image, (list, tuple)) or (isinstance(image, np.ndarray) and image.ndim == 4): # return torch.stack(list(map(to_tensor, image))) # assert isinstance(image, (np.ndarray, Image)) # image = copy_if_negative_strides(image) # if isinstance(image, np.ndarray): # # Convert to channels last if needed, because ToTensor expects to # # receive that. # if len(image.shape) == 2: # pass # elif image.shape[-1] not in {1, 3}: # assert image.shape[0] in {1, 3}, image.shape # image = image.transpose(1, 2, 0) # # image = channels_last(image) # image = F.to_tensor(image) # assert isinstance(image, Tensor), image.shape # return image @dataclass class ToTensor(ToTensor_, Transform): def __call__(self, image): """ Args: image (PIL Image or numpy.ndarray): Image to be converted to tensor. Returns: Tensor: Converted image. NOTE: torchvision's ToTensor transform assumes that whatever it is given is always in channels_last format (as is usually the case with PIL images) and always returns images with the channels *first*! Converts a PIL Image or numpy.ndarray (H x W x C) in the range [0, 255] to a torch.FloatTensor of shape (C x H x W) in the range [0.0, 1.0] if the PIL Image belongs to one of the modes (L, LA, P, I, F, RGB, YCbCr, RGBA, CMYK, 1) or if the numpy.ndarray has dtype = np.uint8 """ return image_to_tensor(image) # @classmethod # def shape_change(cls, input_shape: Union[Tuple[int, ...], torch.Size]) -> Tuple[int, ...]: # from .channels import ChannelsFirstIfNeeded # return ChannelsFirstIfNeeded.shape_change(input_shape) # @classmethod # def space_change(cls, input_space: gym.Space) -> gym.Space: # if not isinstance(input_space, spaces.Box): # logger.warning(UserWarning(f"Transform {cls} is only meant for Box spaces, not {input_space}")) # return input_space # return spaces.Box( # low=0., # high=1., # shape=cls.shape_change(input_space.shape), # dtype=np.float32, # ) ================================================ FILE: sequoia/common/transforms/transform.py ================================================ """ Defines a 'smarter' Transform class. """ from abc import abstractmethod from typing import Generic, Tuple, TypeVar, Union, overload import numpy as np from gym import Space from PIL.Image import Image from torch import Tensor InputType = TypeVar("InputType") OutputType = TypeVar("OutputType") Img = TypeVar("Img", Image, np.ndarray, Tensor) Shape = TypeVar("Shape", bound=Tuple[int, ...]) class Transform(Generic[InputType, OutputType]): """Callable that can also tell you its impact on the shape of inputs.""" @overload def __call__(self, input: InputType) -> OutputType: ... @overload def __call__(self, input: Shape) -> Shape: ... @overload def __call__(self, input: Space) -> Space: ... @abstractmethod def __call__(self, input: Union[InputType, Space, Shape]) -> Union[OutputType, Space, Shape]: pass ================================================ FILE: sequoia/common/transforms/transform_enum.py ================================================ """ Transforms and such. Trying to make it possible to parse such from the command-line. Also, playing around with the idea of adding the ability to predict the change in shape resulting from the transforms, à-la-Tensorflow. """ from enum import Enum from typing import Any, Callable, List, Tuple, TypeVar, Union import gym import torch from simple_parsing.helpers.serialization.encoding import encode from torchvision.transforms import Compose as ComposeBase from torchvision.transforms import RandomGrayscale from sequoia.utils.logging_utils import get_logger from sequoia.utils.serialization import decode logger = get_logger(__name__) from .channels import ( ChannelsFirst, ChannelsFirstIfNeeded, ChannelsLast, ChannelsLastIfNeeded, ThreeChannels, ) from .resize import Resize from .to_tensor import ToTensor from .transform import Transform # TODO: Add names to the dimensions in the transforms! # from pl_bolts.models.self_supervised.simclr import (SimCLREvalDataTransform, # SimCLRTrainDataTransform) class Transforms(Enum): """Enum of possible transforms. By having this as an Enum, we can choose which transforms to use from the command-line. This also makes it easier to check for identity, e.g. to check wether a particular transform was used. TODO: Add the SimCLR/MOCO/etc transforms from https://pytorch-lightning-bolts.readthedocs.io/en/latest/transforms.html TODO: Figure out a way to let people customize the arguments to the transforms? """ three_channels = ThreeChannels() to_tensor = ToTensor() random_grayscale = RandomGrayscale() channels_first = ChannelsFirst() channels_first_if_needed = ChannelsFirstIfNeeded() channels_last = ChannelsLast() channels_last_if_needed = ChannelsLastIfNeeded() resize_64x64 = Resize((64, 64)) resize_32x32 = Resize((32, 32)) def __call__(self, x): return self.value(x) @classmethod def _missing_(cls, value: Any): # called whenever performing something like Transforms[] # with not being one of the enum values. for e in cls: if e.name == value: return e elif type(e.value) == type(value): return e return super()._missing_(value) def shape_change(self, input_shape: Union[Tuple[int, ...], torch.Size]) -> Tuple[int, ...]: raise NotImplementedError(f"TODO: Add shape (tuple) support to {self}") if isinstance(self.value, Transform): return self.value.shape_change(input_shape) def space_change(self, input_space: gym.Space) -> gym.Space: raise NotImplementedError(f"TODO: Add space support to {self}") if isinstance(self.value, Transform): return self.value.space_change(input_space) T = TypeVar("T", bound=Callable) class Compose(List[T], ComposeBase): """Extend the Compose class of torchvision with methods of `list`. This can also be passed in members of the `Transforms` enum, which makes it possible to do something like this: >>> transforms = Compose([Transforms.to_tensor, Transforms.three_channels,]) >>> Transforms.three_channels in transforms True >>> transforms += [Transforms.resize_32x32] >>> from pprint import pprint >>> pprint(transforms) [, , ] NEW: This Compose transform also applies on gym spaces: >>> import numpy as np >>> from gym.spaces import Box >>> image_space = Box(0, 255, (28, 28, 1), dtype=np.uint8) >>> transforms(image_space) TensorBox(0.0, 1.0, (3, 32, 32), torch.float32) """ def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) ComposeBase.__init__(self, transforms=self) # def shape_change(self, input_shape: Union[Tuple[int, ...], torch.Size]) -> Tuple[int, ...]: # for transform in self: # if isinstance(transform, Transforms): # transform = transform.value # if isinstance(transform, Transform) or hasattr(transform, "shape_change"): # input_shape = transform.shape_change(input_shape) # else: # logger.debug( # f"Unable to detect the change of shape caused by " # f"transform {transform}, assuming its output has same " # f"shape as its input." # ) # logger.debug(f"Final shape: {input_shape}") # return input_shape @encode.register def encode_transforms(v: Transforms) -> str: return v.name @decode.register def decode_transforms(v: str) -> Transforms: return Transforms[v] if __name__ == "__main__": import doctest doctest.testmod() ================================================ FILE: sequoia/common/transforms/transforms_test.py ================================================ from dataclasses import dataclass, field from typing import List, Tuple import gym import numpy as np import pytest import torch from gym import spaces from sequoia.conftest import requires_pyglet from sequoia.utils.serialization import Serializable from . import Compose, Transforms @pytest.mark.parametrize( "transform,input_shape,output_shape", [ ## Channels first: (Transforms.channels_first, (9, 9, 3), (3, 9, 9)), # Check that the ordering doesn't get messed up: (Transforms.channels_first, (9, 12, 3), (3, 9, 12)), (Transforms.channels_first, (400, 600, 3), (3, 400, 600)), # Axes get permuted even when the channels are already 'first'. (Transforms.channels_first, (3, 12, 9), (9, 3, 12)), ## Channels first (if needed): (Transforms.channels_first_if_needed, (9, 9, 3), (3, 9, 9)), (Transforms.channels_first_if_needed, (9, 12, 3), (3, 9, 12)), (Transforms.channels_first_if_needed, (400, 600, 3), (3, 400, 600)), # Axes do NOT get permuted when the channels are already 'first'. (Transforms.channels_first_if_needed, (3, 12, 9), (3, 12, 9)), # Does nothing when the channel dim isn't in {1, 3}: (Transforms.channels_first_if_needed, (7, 12, 13), (7, 12, 13)), (Transforms.channels_first_if_needed, (7, 12, 123), (7, 12, 123)), # when the input is 4-dimensional with batch size of 1 or 3, still works: (Transforms.channels_first_if_needed, (1, 28, 12, 3), (1, 3, 28, 12)), (Transforms.channels_first_if_needed, (1, 400, 600, 3), (1, 3, 400, 600)), (Transforms.channels_first_if_needed, (1, 3, 28, 27), (1, 3, 28, 27)), (Transforms.channels_first_if_needed, (3, 28, 12, 3), (3, 3, 28, 12)), (Transforms.channels_first_if_needed, (3, 400, 600, 3), (3, 3, 400, 600)), (Transforms.channels_first_if_needed, (3, 3, 28, 27), (3, 3, 28, 27)), ## Channels Last: (Transforms.channels_last, (3, 9, 9), (9, 9, 3)), # Check that the ordering doesn't get messed up: (Transforms.channels_last, (3, 9, 12), (9, 12, 3)), # Axes get permuted even when the channels are already 'last'. (Transforms.channels_last, (5, 6, 1), (6, 1, 5)), ## Channels Last (if needed): (Transforms.channels_last_if_needed, (3, 9, 9), (9, 9, 3)), # Check that the ordering doesn't get messed up: (Transforms.channels_last_if_needed, (3, 9, 12), (9, 12, 3)), # Axes do NOT get permuted when the channels are already 'last': (Transforms.channels_last_if_needed, (5, 6, 1), (5, 6, 1)), (Transforms.channels_last_if_needed, (12, 13, 3), (12, 13, 3)), # Test out the 'ThreeChannels' transform (Transforms.three_channels, (7, 12, 13), (7, 12, 13)), (Transforms.three_channels, (1, 28, 28), (3, 28, 28)), (Transforms.three_channels, (28, 28, 1), (28, 28, 3)), # Test out the 'Resize' transforms (Transforms.resize_64x64, (3, 128, 128), (3, 64, 64)), (Transforms.resize_64x64, (128, 128, 3), (64, 64, 3)), (Transforms.resize_64x64, (3, 64, 64), (3, 64, 64)), (Transforms.resize_64x64, (64, 64, 3), (64, 64, 3)), (Transforms.resize_64x64, (3, 111, 128), (3, 64, 64)), (Transforms.resize_64x64, (111, 128, 3), (64, 64, 3)), ], ) def test_transform(transform: Transforms, input_shape, output_shape): x = torch.rand(input_shape) assert transform(x).shape == output_shape, transform # Apply the transform onto the input shape directly: assert transform(input_shape) == output_shape input_space = spaces.Box(low=0, high=1, shape=input_shape) output_space = spaces.Box(low=0, high=1, shape=output_shape) # Apply the transform onto the input space directly: actual_output_space = transform(input_space) assert actual_output_space == output_space # TODO: Test that serializing / deserializing the transforms works correctly. @dataclass class Foo(Serializable): transforms: List[Transforms] = field(default_factory=list) foo = Foo(transforms=[transform]) foo_ = Foo.loads_json(foo.dumps_json()) assert foo_ == foo assert Compose(foo_.transforms)(x).shape == output_shape assert Compose(foo_.transforms)(input_space) == output_space @pytest.mark.parametrize( "transform,input_shape,output_shape", [ # NOTE: to_tensor also does the channels-first operation (because since the # torchvision transform ToTensor does it, we do it also). (Transforms.to_tensor, (9, 9, 3), (3, 9, 9)), (Transforms.to_tensor, (3, 9, 9), (3, 9, 9)), ], ) def test_to_tensor(transform: Transforms, input_shape, output_shape): x = np.random.randint(0, 255, input_shape, dtype=np.uint8) # x = PIL.Image.fromarray(x, mode="RGB") y = transform(x) assert y.shape == output_shape assert transform(input_shape) == output_shape assert isinstance(y, torch.Tensor) input_space = spaces.Box(low=0, high=255, shape=input_shape, dtype=np.uint8) output_space = spaces.Box(low=0, high=1, shape=output_shape, dtype=np.float32) assert transform(input_space) == output_space @pytest.mark.parametrize( "transform, input_shape", [ (Transforms.channels_last_if_needed, (7, 12, 13)), ], ) def test_applying_transforms_on_weird_input_raises_error( transform: Transforms, input_shape: Tuple[int, ...] ): with pytest.raises(Exception): transform(input_shape) input_space = spaces.Box(low=0, high=255, shape=input_shape, dtype=np.uint8) with pytest.raises(Exception): transform(input_space) with pytest.raises(Exception): transform(input_space.sample()) def test_compose_applied_on_shape(): transform = Compose([Transforms.channels_first]) start_shape = (9, 9, 3) x = transform(torch.rand(start_shape)) assert x.shape == (3, 9, 9) assert x.shape == transform(start_shape) assert x.shape == transform(start_shape) == (3, 9, 9) import gym from sequoia.common.gym_wrappers import PixelObservationWrapper, TransformObservation @requires_pyglet def test_channels_first_transform_on_gym_env(): env = gym.make("CartPole-v0") env = PixelObservationWrapper(env) assert env.reset().shape == (400, 600, 3) transform = Compose( [ Transforms.to_tensor, Transforms.channels_first_if_needed, ] ) env = TransformObservation(env, transform) assert env.reset().shape == (3, 400, 600) assert env.observation_space.shape == (3, 400, 600) obs, *_ = env.step(env.action_space.sample()) assert obs.shape == (3, 400, 600) def test_preserves_device_when_possible(): # TODO: Write a test that checks which transforms can be run on GPU, and checks # that they preserve the `device` attribute of a space when it's applied on a space. pass ================================================ FILE: sequoia/common/transforms/utils.py ================================================ from typing import Any import numpy as np from gym import spaces from PIL import Image from torch import Tensor from sequoia.common.spaces.image import Image as ImageSpace def is_image(v: Any) -> bool: """Returns wether the value is an Image, an image tensor, or an image space. """ return ( isinstance(v, Image.Image) or (isinstance(v, (Tensor, np.ndarray)) and len(v.shape) >= 3) or isinstance(v, ImageSpace) or isinstance(v, spaces.Box) and len(v.shape) >= 3 ) ================================================ FILE: sequoia/common.puml ================================================ @startuml common !include gym.puml ' class List package common { abstract class Batch {} package transforms as common.transforms { enum Transforms { to_tensor: ToTensor three_channels: ThreeChannels random_grayscale: RandomGrayscale channels_first: ChannelsFirst channels_last: ChannelsLast resize_64x64: Resize resize_32x32: Resize ... } abstract class Transform class Compose extends torchvision.transforms.Compose { } } package gym_wrappers as common.gym_wrappers {} package spaces as common.spaces {} } @enduml ================================================ FILE: sequoia/conftest.py ================================================ import json import logging import sys from pathlib import Path from typing import Any, Iterable, List, Optional, Type, get_type_hints import gym import numpy as np import pytest from sequoia.common.config import Config from sequoia.methods.trainer import TrainerConfig from sequoia.settings import Method from sequoia.settings.rl.envs import ( ATARI_PY_INSTALLED, METAWORLD_INSTALLED, MONSTERKONG_INSTALLED, MTENV_INSTALLED, MUJOCO_INSTALLED, ) from sequoia.methods import AVALANCHE_INSTALLED, SB3_INSTALLED # Prevent the collection of these modules if the requirements for them aren't installed. collect_ignore = [] collect_ignore_glob = [] if not MONSTERKONG_INSTALLED: collect_ignore.append("settings/rl/envs/monsterkong.py") if not MUJOCO_INSTALLED: collect_ignore.append("settings/rl/envs/mujoco") if not AVALANCHE_INSTALLED: collect_ignore.append("methods/avalanche_methods") if not SB3_INSTALLED: collect_ignore.append("methods/stable_baselines3_methods") logger = logging.getLogger(__name__) parametrize = pytest.mark.parametrize xfail = pytest.mark.xfail def xfail_param(*args, reason: str): return pytest.param(*args, marks=pytest.mark.xfail(reason=reason)) def skip_param(*args, reason: str): return pytest.param(*args, marks=pytest.mark.skip(reason=reason)) def skipif_param(condition, *args, reason: str): return pytest.param(*args, marks=pytest.mark.skipif(condition, reason=reason)) @pytest.fixture(autouse=True) def add_np(doctest_namespace): doctest_namespace["np"] = np @pytest.fixture() def trainer_config(tmp_path_factory): tmp_path = tmp_path_factory.mktemp("log_dir") return TrainerConfig( fast_dev_run=True, # TODO: What if we don't have a GPU when testing? # TODO: Parametrize with the distributed backend, skip param if no GPU? distributed_backend="dp", default_root_dir=tmp_path, ) @pytest.fixture() def config(tmp_path: Path): # TODO: Set the results dir somehow with the value of this `tmp_path` fixture. tmp_results_dir = tmp_path / "tmp_results" tmp_results_dir.mkdir() return Config(debug=True, seed=123, log_dir=tmp_results_dir) @pytest.fixture(scope="session") def session_config(tmp_path_factory: Path): test_log_dir = tmp_path_factory.mktemp("test_log_dir") # TODO: Set the results dir somehow with the value of this `tmp_path` fixture. return Config(debug=True, seed=123, log_dir=test_log_dir) def id_fn(params: Any) -> str: """Creates a 'name' for an execution of a parametrized test. Args: params (Dict): [description] Returns: str: [description] """ # if not params: # return "default" if isinstance(params, dict): return json.dumps(params, sort_keys=True, separators=(",", ":")) return str(params) def get_all_dataset_names(method_class: Type[Method] = None) -> List[str]: # When not given a method class, use the Method class (gives ALL the # possible datasets). method_class = method_class or Method dataset_names: Iterable[List[str]] = map( lambda s: list(s.available_datasets), method_class.get_applicable_settings() ) return sorted(list(set(sum(dataset_names, [])))) def get_dataset_params( method_type: Type[Method], supported_datasets: List[str], skip_unsuported: bool = True, ) -> List[str]: all_datasets = get_all_dataset_names(method_type) dataset_params = [] for dataset in all_datasets: if dataset in supported_datasets: dataset_params.append(dataset) elif skip_unsuported: dataset_params.append(skip_param(dataset, reason="Not supported yet")) else: dataset_params.append(xfail_param(dataset, reason="Not supported yet")) return dataset_params test_datasets_option_name: str = "datasets" def pytest_addoption(parser): parser.addoption("--slow", action="store_true", default=False) parser.addoption(f"--{test_datasets_option_name}", action="store", nargs="*", default=[]) slow = pytest.mark.skipif( "--slow" not in sys.argv, reason="This test is slow so we only run it when necessary.", ) def slow_param(*args): """Mark a parameter as 'slow', so it's only run when using the "--slow" flag.""" return pytest.param(*args, marks=slow) def find_class_under_test( module, function, name: str = "method", global_var_name: str = None ) -> Optional[Type]: cls: Optional[Type] = None module_name: str = module.__name__ function_name: str = function.__name__ type_hints = get_type_hints(function) global_var_name = global_var_name or name.capitalize() for k in [name, f"{name}_class", f"{name}_type"]: cls = type_hints.get(k) if cls: logger.debug( f"function {function_name} has annotation of type " f"{cls} for argument {k}." ) break if cls is None: # Try to get the class to test from a global variable on the module. cls = getattr(module, global_var_name, None) logger.debug( f"Test module {module_name} has a '{global_var_name}' gloval variable of type {cls}" ) return cls def parametrize_test_datasets(metafunc): # We want to get these from inspecting the test function: # The datasets to test on. test_datasets: List[str] = [] default_test_datasets = ["mnist", "cifar10"] func_param_name = "test_dataset" global_var_names = ["test_datasets", "supported_datasets"] if func_param_name not in metafunc.fixturenames: return module = metafunc.module function = metafunc.function module_name: str = module.__name__ function_name: str = function.__name__ # Get the test datasets from the command-line option. datasets_from_command_line = metafunc.config.getoption(test_datasets_option_name) if "ALL" in datasets_from_command_line: method_class: Optional[Type[Method]] = find_class_under_test( module, function, name="method", ) test_datasets = get_all_dataset_names(method_class) elif "NONE" in datasets_from_command_line: test_datasets = [skip_param("?", reason="Set to skip, with command line arg.")] elif datasets_from_command_line: assert isinstance(datasets_from_command_line, list) and all( isinstance(v, str) for v in datasets_from_command_line ) # If any datasets were set, use them. test_datasets = datasets_from_command_line else: # The default datasets to try are the ones specified at the global # variable with name {module_test_datasets_name} in the module. for global_var_name in global_var_names: test_datasets = getattr(module, global_var_name, None) if test_datasets is not None: break else: logger.warning( RuntimeWarning( f"Test module {module_name} didn't specify a test_datasets " f"global variable, defaulting to {default_test_datasets}" ) ) test_datasets = default_test_datasets test_datasets = sorted(test_datasets) logger.info( f"Parametrizing the '{func_param_name}' param of test " f"{module_name} :: {function_name} with {test_datasets}." ) metafunc.parametrize(func_param_name, test_datasets) def pytest_generate_tests(metafunc): """Automatically Parametrize the tests. TODO: Having some fun parametrizing tests automatically, but should check that it's worth it, because otherwise it might make things too confusing. """ parametrize_test_datasets(metafunc) class DummyEnvironment(gym.Env): """Dummy environment for testing. The reward is how close to the target value the state (a counter) is. The actions are: 0: keep the counter the same. 1: Increment the counter. 2: Decrement the counter. """ def __init__(self, start: int = 0, target: int = 5, max_value: int = None): self.i = start self.start = start max_value = max_value if max_value is not None else target * 2 assert 0 <= target <= max_value self.max_value = max_value self.reward_range = (0, max_value) self.action_space = gym.spaces.Discrete(n=3) self.observation_space = gym.spaces.Discrete(n=max_value) self.target = target self.reward_range = (0, max(target, max_value - target)) self.done: bool = False self._reset: bool = False def step(self, action: int): # The action modifies the state, producing a new state, and you get the # reward associated with that transition. if not self._reset: raise RuntimeError("Need to reset before you can step.") if action == 1: self.i += 1 elif action == 2: self.i -= 1 self.i %= self.max_value done = self.i == self.target reward = abs(self.i - self.target) # print(self.i, reward, done, action) return self.i, reward, done, {} def reset(self): self._reset = True self.i = self.start return self.i def seed(self, seed: Optional[int]) -> List[int]: seeds = [] seeds.append(self.observation_space.seed(seed)) seeds.append(self.action_space.seed(seed)) return seeds monsterkong_required = pytest.mark.skipif( not MONSTERKONG_INSTALLED, reason="monsterkong is required for this test." ) def param_requires_monsterkong(*args): return skipif_param( not MONSTERKONG_INSTALLED, *args, reason="monsterkong is required for this parameter.", ) atari_py_required = pytest.mark.skipif( not ATARI_PY_INSTALLED, reason="atari_py is required for this test." ) def param_requires_atari_py(*args): return skipif_param( not ATARI_PY_INSTALLED, *args, reason="atari_py is required for this parameter.", ) mtenv_required = pytest.mark.skipif(not MTENV_INSTALLED, reason="mtenv is required for this test.") def param_requires_mtenv(*args): return skipif_param( not MTENV_INSTALLED, *args, reason="mtenv is required for this parameter.", ) # Metaworld needs mujoco metaworld_required = pytest.mark.skipif( not METAWORLD_INSTALLED, reason="metaworld is required for this test." ) def param_requires_metaworld(*args): return skipif_param( not METAWORLD_INSTALLED, *args, reason="metaworld is required for this parameter.", ) mujoco_required = pytest.mark.skipif( not MUJOCO_INSTALLED, reason="mujoco-py is required for this test." ) def param_requires_mujoco(*args): return skipif_param( not MUJOCO_INSTALLED, *args, reason="mujoco-py is required for this parameter.", ) PYGLET_INSTALLED = False try: import pyglet PYGLET_INSTALLED = True except ImportError: pass requires_pyglet = pytest.mark.skipif( not PYGLET_INSTALLED, reason="pyglet is required to render envs." ) def param_requires_pyglet(*args): return skipif_param( not PYGLET_INSTALLED, *args, reason="pyglet is required to render envs.", ) ================================================ FILE: sequoia/experiments/__init__.py ================================================ """ Package that defines a list of "Experiments". """ from .experiment import Experiment from .hpo_sweep import HPOSweep ================================================ FILE: sequoia/experiments/experiment.py ================================================ """ Module used for launching an Experiment (applying a Method to one or more Settings). """ import os import shlex import sys from dataclasses import dataclass from inspect import isclass from pathlib import Path from typing import Any, Dict, List, Optional, Tuple, Type, Union from simple_parsing import ArgumentParser, choice, mutable_field from sequoia.common.config import Config, WandbConfig from sequoia.methods import Method, get_all_methods from sequoia.settings import Results, Setting, all_settings from sequoia.settings.presets import setting_presets from sequoia.utils import Parseable, Serializable, get_logger from sequoia.utils.logging_utils import get_logger logger = get_logger(__name__) source_dir = Path(os.path.dirname(__file__)) def get_method_names() -> Dict[str, Type[Method]]: all_methods = get_all_methods() return {method.get_full_name(): method for method in all_methods} @dataclass class Experiment(Parseable, Serializable): """Applies a Method to an experimental Setting to obtain Results. When the `setting` is not set, this will apply the chosen method on all of its "applicable" settings. (i.e. all subclasses of its target setting). When the `method` is not set, this will apply all applicable methods on the chosen setting. """ # Which experimental setting to use. When left unset, will evaluate the # provided method on all applicable settings. setting: Optional[Union[Setting, Type[Setting]]] = choice( {setting.get_name(): setting for setting in all_settings}, default=None, type=str, ) # Path to a json/yaml file containing preset options for the chosen setting. # Can also be one of the key from the `setting_presets` dictionary, # for convenience. benchmark: Optional[Union[str, Path]] = None # Which experimental method to use. When left unset, will evaluate all # compatible methods on the provided setting. method: Optional[Union[str, Method, Type[Method]]] = choice(get_method_names(), default=None) # All the other configuration options, which are independant of the choice # of Setting or of Method, go in this next dataclass here! For example, # things like the log directory, wether Cuda is used, etc. config: Config = mutable_field(Config) wandb: Optional[WandbConfig] = None def __post_init__(self): if not (self.setting or self.method): raise RuntimeError("One of `setting` or `method` must be set!") # All settings have a unique name. if isinstance(self.setting, str): self.setting = get_class_with_name(self.setting, all_settings) # Each Method also has a unique name. if isinstance(self.method, str): self.method = get_class_with_name(self.method, all_methods) if self.benchmark: # If the provided benchmark isn't a path, try to get the value from # the `setting_presets` dict. If it isn't in the dict, raise an # error. if not Path(self.benchmark).is_file(): if self.benchmark in setting_presets: self.benchmark = setting_presets[self.benchmark] else: raise RuntimeError( f"Could not find benchmark '{self.benchmark}': it " f"is neither a path to a file or a key of the " f"`setting_presets` dictionary. \n\n" f"Available presets: \n" + "\n".join( f"- {preset_name}: \t{preset_file.relative_to(os.getcwd())}" for preset_name, preset_file in setting_presets.items() ) ) # Creating an experiment for the given setting, loaded from the # config file. # TODO: IDEA: Do the same thing for loading the Method? logger.info( f"Will load the options for the setting from the file " f"at path {self.benchmark}." ) drop_extras = True if self.setting is None: logger.warn( UserWarning( f"You didn't specify which setting to use, so this will " f"try to infer the correct type of setting to use from the " f"contents of the file, which might not work!\n (Consider " f"running this with the `--setting` option instead." ) ) # Find the first type of setting that fits the given file. drop_extras = False self.setting = Setting # Raise an error if any of the args in sys.argv would have been used # up by the Setting, just to prevent any ambiguities. try: _, unused_args = self.setting.from_known_args() except (ImportError, AssertionError) as exc: # NOTE: An ImportError can occur here because of a missing OpenGL # dependency, since when no arguments are passed, the default RL setting # is created (cartpole with pixel observations), which requires a render # wrapper to be added (which itself uses pyglet, which uses OpenGL). logger.warning(RuntimeWarning(f"Unable to check for unused args: {exc}")) # In this case, we just pretend that no arguments would have been used. unused_args = sys.argv[1:] ignored_args = list(set(sys.argv[1:]) - set(unused_args)) if ignored_args: # TODO: This could also be trigerred if there were arguments # in the method with the same name as some from the Setting. raise RuntimeError( f"Cannot pass command-line arguments for the Setting when " f"loading a preset, since these arguments whould have been " f"ignored when creating the setting of type {self.setting} " f"anyway: {ignored_args}" ) assert isclass(self.setting) and issubclass(self.setting, Setting) # Actually load the setting from the file. # TODO: Why isn't this using `load_benchmark`? self.setting = self.setting.load(path=self.benchmark, drop_extra_fields=drop_extras) self.setting.wandb = self.wandb if self.method is None: raise NotImplementedError( f"For now, you need to specify a Method to use using the " f"`--method` argument when loading the setting from a file." ) if self.setting is not None and self.method is not None: if not self.method.is_applicable(self.setting): raise RuntimeError( f"Method {self.method} isn't applicable to " f"setting {self.setting}!" ) assert ( self.setting is None or isinstance(self.setting, Setting) or issubclass(self.setting, Setting) ) assert ( self.method is None or isinstance(self.method, Method) or issubclass(self.method, Method) ) @staticmethod def run_experiment( setting: Union[Setting, Type[Setting]], method: Union[Method, Type[Method]], config: Config, argv: Union[str, List[str]] = None, strict_args: bool = False, ) -> Results: """Launches an experiment, applying `method` onto `setting` and returning the corresponding results. This assumes that both `setting` and `method` are not None. This always returns a single `Results` object. If either `setting` or `method` are classes, then instances of these classes from the command-line arguments `argv`. If `strict_args` is True and there are leftover arguments (not consumed by either the Setting or the Method), a RuntimeError is raised. This then returns the result of `setting.apply(method)`. Parameters ---------- argv : Union[str, List[str]], optional List of command-line args. When not set, uses the contents of `sys.argv`. Defaults to `None`. strict_args : bool, optional Wether to raise an error when encountering command-line arguments that are unexpected by both the Setting and the Method. Defaults to `False`. Returns ------- Results """ assert setting is not None and method is not None assert isinstance( setting, Setting ), f"TODO: Fix this, need to pass a wandb config to the Setting from the experiment!" if not (isinstance(setting, Setting) and isinstance(method, Method)): setting, method = parse_setting_and_method_instances( setting=setting, method=method, argv=argv, strict_args=strict_args ) assert isinstance(setting, Setting) assert isinstance(method, Method) assert isinstance(config, Config) return setting.apply(method, config=config) def launch( self, argv: Union[str, List[str]] = None, strict_args: bool = False, ) -> Results: """Launches the experiment, applying `self.method` onto `self.setting` and returning the corresponding results. This differs from `main` in that this assumes that both `self.setting` and `self.method` are not None, and so this always returns a single `Results` object. NOTE: Internally, this is equivalent to calling `run_experiment`, passing in the `setting`, `method` and `config` arguments from `self`. Parameters ---------- argv : Union[str, List[str]], optional List of command-line args. When not set, uses the contents of `sys.argv`. Defaults to `None`. strict_args : bool, optional Wether to raise an error when encountering command-line arguments that are unexpected by both the Setting and the Method. Defaults to `False`. Returns ------- Results An object describing the results of applying Method `self.method` onto the Setting `self.setting`. """ assert self.setting is not None assert self.method is not None assert self.config is not None if not (isinstance(self.setting, Setting) and isinstance(self.method, Method)): self.setting, self.method = parse_setting_and_method_instances( setting=self.setting, method=self.method, argv=argv, strict_args=strict_args ) assert isinstance(self.setting, Setting) assert isinstance(self.method, Method) self.setting.wandb = self.wandb self.setting.config = self.config return self.setting.apply(self.method, config=self.config) @classmethod def main( cls, argv: Union[str, List[str]] = None, strict_args: bool = False, ) -> Union[Results, Tuple[Dict, Any], List[Tuple[Dict, Results]]]: """Launches one or more experiments from the command-line. First, we get the choice of method and setting using a first parser. Then, we parse the Setting and Method objects using the remaining args with two other parsers. Parameters ---------- - argv : Union[str, List[str]], optional, by default None command-line arguments to use. When None (default), uses sys.argv. Returns ------- Union[Results, Dict[Tuple[Type[Setting], Type[Method], Config], Results]] Results of the experiment, if only applying a method to a setting. Otherwise, if either of `--setting` or `--method` aren't set, this will be a dictionary mapping from (setting_type, method_type) tuples to Results. """ # TODO: Clean this up with the new command-line API. if argv is None: argv = sys.argv[1:] if isinstance(argv, str): argv = shlex.split(argv) argv_copy = argv.copy() experiment: Experiment experiment, argv = cls.from_known_args(argv) setting: Optional[Type[Setting]] = experiment.setting method: Optional[Type[Method]] = experiment.method config: Config = experiment.config if method is None and setting is None: raise RuntimeError(f"One of setting or method must be set.") if setting and method: # One 'job': Launch it directly. results = experiment.launch(argv, strict_args=strict_args) print("\n\n EXPERIMENT IS DONE \n\n") print(f"Results: {results}") return results # TODO: Test out this other case. Haven't used it in a while. # TODO: Move this to something like a BatchExperiment? all_results = launch_batch_of_runs(setting=setting, method=method, argv=argv) return all_results def launch_batch_of_runs( setting: Optional[Setting], method: Optional[Method], argv: Union[str, List[str]] = None, ) -> List[Tuple[Dict, Results]]: if argv is None: argv = sys.argv[1:] if isinstance(argv, str): argv = shlex.split(argv) argv_copy = argv.copy() experiment: Experiment experiment, argv = Experiment.from_known_args(argv) setting: Optional[Type[Setting]] = experiment.setting method: Optional[Type[Method]] = experiment.method config = experiment.config # TODO: Maybe if everything stays exactly identical, we could 'cache' # the results of some experiments, so we don't re-run them all the time? all_results: Dict[Tuple[Type[Setting], Type[Method]], Results] = {} # The lists of arguments for each 'job'. method_types: List[Type[Method]] = [] setting_types: List[Type[Setting]] = [] run_configs: List[Config] = [] if setting: logger.info(f"Evaluating all applicable methods on Setting {setting}.") method_types = setting.get_applicable_methods() setting_types = [setting for _ in method_types] elif method: logger.info(f"Applying Method {method} on all its applicable settings.") setting_types = method.get_applicable_settings() method_types = [method for _ in setting_types] # Create a 'config' for each experiment. # Use a log_dir for each run using the 'base' log_dir (passed # when creating the Experiment), the name of the Setting, and # the name of the Method. for setting_type, method_type in zip(setting_types, method_types): run_log_dir = config.log_dir / setting_type.get_name() / method_type.get_name() run_config_kwargs = config.to_dict() run_config_kwargs["log_dir"] = run_log_dir run_config = Config(**run_config_kwargs) run_configs.append(run_config) arguments_of_each_run: List[Dict] = [] results_of_each_run: List[Result] = [] # Create one 'job' per setting-method combination: for setting_type, method_type, run_config in zip(setting_types, method_types, run_configs): # NOTE: Some methods might use all the values in `argv`, and some # might not, so we set `strict=False`. arguments_of_each_run.append( dict( setting=setting_type, method=method_type, config=run_config, argv=argv, strict_args=False, ) ) # TODO: Use submitit or somethign like it, to run each of these in parallel: # See https://github.com/lebrice/Sequoia/issues/87 for more info. for run_arguments in arguments_of_each_run: result = Experiment.run_experiment(**run_arguments) logger.info(f"Results for arguments {run_arguments}: {result}") results_of_each_run.append(result) all_results = list(zip(arguments_of_each_run, results_of_each_run)) logger.info(f"All results: ") for run_arguments, run_results in all_results: print(f"Arguments: {run_arguments}") print(f"Results: {run_results}") return all_results def parse_setting_and_method_instances( setting: Union[Setting, Type[Setting]], method: Union[Method, Type[Method]], argv: Union[str, List[str]] = None, strict_args: bool = False, ) -> Tuple[Setting, Method]: # TODO: Should we raise an error if an argument appears both in the Setting # and the Method? parser = ArgumentParser(description=__doc__, add_dest_to_option_strings=False) if not isinstance(setting, Setting): assert issubclass(setting, Setting) setting.add_argparse_args(parser) if not isinstance(method, Method): assert method is not None assert issubclass(method, Method) method.add_argparse_args(parser) if strict_args: args = parser.parse_args(argv) else: args, unused_args = parser.parse_known_args(argv) if unused_args: logger.warning(UserWarning(f"Unused command-line args: {unused_args}")) if not isinstance(setting, Setting): setting = setting.from_argparse_args(args) if not isinstance(method, Method): method = method.from_argparse_args(args) return setting, method def get_class_with_name( class_name: str, all_classes: Union[List[Type[Setting]], List[Type[Method]]], ) -> Union[Type[Method], Type[Setting]]: potential_classes = [c for c in all_classes if c.get_name() == class_name] # if target_class: # potential_classes = [ # m for m in potential_classes # if m.is_applicable(target_class) # ] if len(potential_classes) == 1: return potential_classes[0] if not potential_classes: raise RuntimeError( f"Couldn't find any classes with name {class_name} in the list of " f"available classes {all_classes}!" ) raise RuntimeError( f"There are more than one potential methods with name " f"{class_name}, which isn't supposed to happen! " f"(all_classes: {all_classes})" ) def check_has_descendants(potential_classes: List[Type[Method]]) -> List[bool]: """Returns a list where for each method in the list, check if it has any descendants (subclasses of itself) also within the list. """ def _has_descendant(method: Type[Method]) -> bool: """For a given method, check if it has any descendants within the list of potential methods. """ return any( (issubclass(other_method, method) and other_method is not method) for other_method in potential_classes ) return [_has_descendant(method) for method in potential_classes] def main(): logger.debug( "Registered Settings: \n" + "\n".join( f"- {setting.get_name()}: {setting} ({setting.get_path_to_source_file()})" for setting in all_settings ) ) logger.debug( "Registered Methods: \n" + "\n".join( f"- {method.get_name()}: {method} ({method.get_path_to_source_file()})" for method in get_all_methods() ) ) Experiment.main() exit(0) ================================================ FILE: sequoia/experiments/experiment_test.py ================================================ import shlex import sys from pathlib import Path from typing import Optional, Type import pytest from sequoia.common.config import Config from sequoia.conftest import slow from sequoia.methods import Method, get_all_methods from sequoia.methods.method_test import key_fn from sequoia.settings import Results, Setting, all_settings from .experiment import Experiment, get_method_names method_names = get_method_names() @pytest.mark.xfail( reason="@lebrice: I changed my mind on this. For example, it could make " "sense to have multiple methods called 'baseline' when a new Setting needs " "to create a new subclass of the BaseMethod or a new Method altogether." ) def test_no_collisions_in_method_names(): methods = get_all_methods() assert len(set(method.get_name() for method in methods)) == len(methods) def test_no_collisions_in_setting_names(): assert len(set(setting.get_name() for setting in all_settings)) == len(all_settings) def test_applicable_methods(): from sequoia.methods import BaseMethod from sequoia.settings import TraditionalSLSetting assert BaseMethod in TraditionalSLSetting.get_applicable_methods() def mock_apply(self: Setting, method: Method, config: Config) -> Results: # 1. Configure the method to work on the setting. # method.configure(self) # 2. Train the method on the setting. # method.train(self) # 3. Evaluate the method on the setting and return the results. # return self.evaluate(method) return type(method), type(self) @pytest.fixture() def set_argv_for_debug(monkeypatch): monkeypatch.setattr(sys, "argv", shlex.split("main.py --debug --fast_dev_run")) @pytest.fixture(params=sorted(get_all_methods(), key=str)) def method_type(request, monkeypatch, set_argv_for_debug): method_class: Type[Method] = request.param return method_class @pytest.fixture(params=sorted(all_settings, key=key_fn)) def setting_type(request, monkeypatch, set_argv_for_debug): setting_class: Type[Setting] = request.param monkeypatch.setattr(setting_class, "apply", mock_apply) for method_type in setting_class.get_applicable_methods(): pass return setting_class def test_experiment_from_args( method_type: Optional[Type[Method]], setting_type: Optional[Type[Setting]] ): """Test that when parsing the 'Experiment' from the command-line, the `setting` and `method` fields get set to the classes corresponding to their names. """ # method = method_type.get_name() method_name = [k for k, v in method_names.items() if v is method_type][0] setting = setting_type.get_name() if not method_type.is_applicable(setting_type): pytest.skip( msg=f"Skipping test since Method {method_type} isn't applicable on " f"settings of type {setting_type}." ) experiment = Experiment.from_args(f"--setting {setting} --method {method_name}") assert experiment.method is method_type assert experiment.setting is setting_type def test_launch_experiment_with_constructor( method_type: Optional[Type[Method]], setting_type: Optional[Type[Setting]] ): if not method_type.is_applicable(setting_type): pytest.skip( msg=f"Skipping test since Method {method_type} isn't applicable on " f"settings of type {setting_type}." ) experiment = Experiment(method=method_type, setting=setting_type) all_results = experiment.launch("--debug --fast_dev_run --batch_size 1") assert all_results == (method_type, setting_type) @slow @pytest.mark.timeout(300) def test_none_setting(method_type: Optional[Type[Method]], tmp_path: Path, monkeypatch): """Test that leaving the Setting unset runs on all applicable setting.""" method = method_type.get_name() for setting_type in method_type.get_applicable_settings(): monkeypatch.setattr(setting_type, "apply", mock_apply) all_results = Experiment.main( f"--method {method} --debug --fast_dev_run " f"--log_dir {tmp_path}" ) for setting_type in method_type.get_applicable_settings(): monkeypatch.setattr(setting_type, "apply", mock_apply) result = all_results[(setting_type, method_type)] assert result == (method_type, setting_type) @slow @pytest.mark.timeout(300) def test_none_method(setting_type: Optional[Type[Setting]]): """Test that leaving the method unset runs all applicable methods on the setting. """ setting = setting_type.get_name() all_results = Experiment.main(f"--setting {setting} --debug --fast_dev_run --batch-size 1") for method_type in setting_type.get_applicable_methods(): result = all_results[(setting_type, method_type)] assert result == (method_type, setting_type) # assert all_results == { # method_type: (method_type, setting_type) # for method_type in setting_type.get_applicable_methods() # } ================================================ FILE: sequoia/experiments/hpo_sweep.py ================================================ import json import shlex import sys from dataclasses import dataclass from pathlib import Path from typing import Dict, List, Optional, Tuple, Type, Union from simple_parsing.helpers import choice from sequoia.settings import Method, Results, Setting from .experiment import Experiment, parse_setting_and_method_instances @dataclass class HPOSweep(Experiment): """Experiment which launches an HPO Sweep using Orion. TODO: Maybe use this somewhere in main.py once we redesign the command-line API. """ # Path to a json file containing the orion-formatted search space dictionary. # When `None` (by default), the result of `get_search_space` will be used instead. search_space_path: Optional[Path] = None # Path indicating where the pickle database will be loaded or be created. database_path: Path = Path("orion_db.pkl") # manual, unique identifier for this experiment. This should only really be used # when launching multiple different experiments that involve the same method and # the same exact setting configurations, but where some other aspect of the # experiment is changed. experiment_id: Optional[str] = None # Maximum number of runs to perform. max_runs: Optional[int] = 10 hpo_algorithm: str = choice( { "random": "random", "bayesian": "BayesianOptimizer", }, default="bayesian", ) # TODO: BayesianOptimizer does not support num > 1 def __post_init__(self): super().__post_init__() self.search_space: Dict = {} if self.search_space_path: with open(self.search_space_path, "r") as f: self.search_space = json.load(f) def launch(self, argv: Union[str, List[str]] = None, strict_args: bool = False): """Launch the experiment, using its attributes and possibly also using the provided command-line arguments. This differs from `Experiment.launch` in that this will actually launch a sequence of runs. Parameters ---------- argv : Union[str, List[str]], optional [description], by default None strict_args : bool, optional [description], by default False Returns ------- [type] [description] """ if not (isinstance(self.setting, Setting) and isinstance(self.method, Method)): self.setting, self.method = parse_setting_and_method_instances( setting=self.setting, method=self.method, argv=argv, strict_args=strict_args, ) assert isinstance(self.setting, Setting) assert isinstance(self.method, Method) self.setting.wandb = self.wandb # TODO: IDEA: It could actually be really cool if we created a list of # Experiment objects here, and just call their 'launch' methods in parallel, # rather than do the sweep logic in the Method class! best_params, best_objective = self.method.hparam_sweep( self.setting, search_space=self.search_space, database_path=self.database_path, experiment_id=self.experiment_id, max_runs=self.max_runs, hpo_algorithm=self.hpo_algorithm, ) print( "Best params:\n" + "\n".join(f"\t{key}: {value}" for key, value in best_params.items()) ) print(f"Best objective: {best_objective}") return (best_params, best_objective) @classmethod def main( cls, argv: Union[str, List[str]] = None, strict_args: bool = False, ) -> List[Tuple[Dict, Results]]: """Launches this experiment from the command-line. First, we get the choice of method and setting using a first parser. Then, we parse the Setting and Method objects using the remaining args. Parameters ---------- - argv : Union[str, List[str]], optional, by default None command-line arguments to use. When None (default), uses sys.argv. Returns ------- List[Tuple[Dict, Results]] Best trial parameters and objective found during the sweep. """ if argv is None: argv = sys.argv[1:] if isinstance(argv, str): argv = shlex.split(argv) _ = argv.copy() experiment: HPOSweep experiment, argv = cls.from_known_args(argv) setting: Optional[Type[Setting]] = experiment.setting method: Optional[Type[Method]] = experiment.method # config: Config = experiment.config if method is None or setting is None: raise RuntimeError("Both `--setting` and `--method` must be set to run a sweep.") return experiment.launch(argv, strict_args=strict_args) def main(): HPOSweep.main() if __name__ == "__main__": main() ================================================ FILE: sequoia/experiments/hpo_sweep_test.py ================================================ import random import shlex import sys from pathlib import Path from typing import Optional, Type import pytest from sequoia.common.config import Config from sequoia.methods import Method, get_all_methods from sequoia.methods.method_test import key_fn from sequoia.methods.random_baseline import RandomBaselineMethod from sequoia.settings import Results, Setting, all_settings from sequoia.utils.serialization import Serializable from .hpo_sweep import HPOSweep class MockResults(Results): def __init__(self, hparams): self.haprams = hparams self._objective = random.random() @property def objective(self) -> float: return self._objective def make_plots(self): return {} def to_log_dict(self, verbose: bool = False): return { "hparams": self.hparams.to_dict() if isinstance(self.hparams, Serializable) else self.hparams, "objective": self.objective, } def summary(self): return str(self.to_log_dict()) def mock_apply(self: Setting, method: Method, config: Config = None) -> Results: # 1. Configure the method to work on the setting. # method.configure(self) # 2. Train the method on the setting. # method.train(self) # 3. Evaluate the method on the setting and return the results. # return self.evaluate(method) # assert False, method.hparams return MockResults(getattr(method, "hparams", {})) # return type(method), type(self) @pytest.fixture() def set_argv_for_debug(monkeypatch): monkeypatch.setattr(sys, "argv", shlex.split("main.py --debug --fast_dev_run")) @pytest.fixture(params=sorted(get_all_methods(), key=str)) def method_type(request, monkeypatch, set_argv_for_debug): method_class: Type[Method] = request.param return method_class @pytest.fixture(params=sorted(all_settings, key=key_fn)) def setting_type(request, monkeypatch, set_argv_for_debug): setting_class: Type[Setting] = request.param monkeypatch.setattr(setting_class, "apply", mock_apply) # TODO: Not sure what this was doing, but I think it was important that all methods # get imported here. for method_type in setting_class.get_applicable_methods(): pass return setting_class @pytest.mark.skip(reason="BUG: seems to make other tests hang, because of Orion's bug.") def test_launch_sweep_with_constructor( method_type: Optional[Type[Method]], setting_type: Optional[Type[Setting]], tmp_path: Path, ): if not method_type.is_applicable(setting_type): pytest.skip( msg=f"Skipping test since Method {method_type} isn't applicable on settings of type {setting_type}." ) if issubclass(method_type, RandomBaselineMethod): pytest.skip( "BUG: RandomBaselineMethod has a hparam space that causes the HPO algo to go into an infinite loop." ) return experiment = HPOSweep( method=method_type, setting=setting_type, database_path=tmp_path / "debug.pkl", config=Config(debug=True), max_runs=3, ) best_hparams, best_performance = experiment.launch(["--debug"]) assert best_hparams assert best_performance ================================================ FILE: sequoia/main.py ================================================ """Sequoia - The Research Tree Used to run experiments, which consist in applying a Method to a Setting. """ from argparse import _SubParsersAction from dataclasses import dataclass from pathlib import Path from typing import Optional, Type, Union from simple_parsing import ArgumentParser from simple_parsing.help_formatter import SimpleHelpFormatter from simple_parsing.helpers import choice import sequoia from sequoia.common.config import Config from sequoia.common.config.wandb_config import WandbConfig from sequoia.methods import get_all_methods from sequoia.settings import all_settings from sequoia.settings.base import Method, Results, Setting from sequoia.utils import get_logger # TODO: Fix all the `get_logger` to use __name__ instead of __file__. logger = get_logger(__name__) def main(): """Adds all command-line arguments, parses the args, and runs the selected action.""" parser = ArgumentParser(prog="sequoia", description=__doc__, add_dest_to_option_strings=False) parser.add_argument( "--version", action="version", version=sequoia.__version__, help="Displays the installed version of Sequoia and exits.", ) command_subparsers = parser.add_subparsers( title="command", dest="command", description="Command to execute", parser_class=ArgumentParser, required=False, ) add_run_command(command_subparsers) add_sweep_command(command_subparsers) add_info_command(command_subparsers) args = parser.parse_args() command: str = getattr(args, "command", None) if command is None: parser.print_help() elif command == "run": method_type: Type[Method] = args.method_type setting_type: Type[Setting] = args.setting_type method: Method = method_type.from_argparse_args(args) setting: Setting = setting_type.from_argparse_args(args) config: Config = args.config # TODO: Make this a bit cleaner, current need to set this `wandb` config as a property on # the setting. Could either subclass Config and add an Optional[WandbConfig] field, or just # add it directly to the existing Config class. wandb_config: WandbConfig = args.wandb setting.wandb = wandb_config run(setting=setting, method=method, config=config) elif command == "sweep": method_type: Type[Method] = args.method_type setting_type: Type[Setting] = args.setting_type method: Method = method_type.from_argparse_args(args) setting: Setting = setting_type.from_argparse_args(args) config: Config = args.config # TODO: Fix this up a bit: Currently need to set this on the setting wandb_config: WandbConfig = args.wandb setting.wandb = wandb_config sweep(setting=args.setting, method=method, config=args.config) elif command == "info": info(component=args.component) def add_run_command(command_subparsers: _SubParsersAction) -> None: run_parser = command_subparsers.add_parser( "run", description="Run an experiment on a given setting.", help="Run an experiment on a given setting.", add_dest_to_option_strings=False, formatter_class=SimpleHelpFormatter, ) run_parser.add_arguments(Config, dest="config") run_parser.add_arguments(WandbConfig, dest="wandb") add_args_for_settings_and_methods(run_parser) def run(setting: Setting, method: Method, config: Config) -> Results: """Performs a single run, applying a method to a setting, and returns the results.""" logger.debug("Setting:") # BUG: TypeError: __reduce_ex__() takes exactly one argument (0 given) try: logger.debug(setting.dumps_yaml()) except TypeError: logger.debug(setting) logger.debug("Config:") logger.debug(config.dumps_yaml()) logger.debug("Method") logger.debug(str(method)) results = setting.apply(method, config=config) logger.debug("Results:") logger.debug(results.summary()) return results @dataclass class SweepConfig(Config): """Configuration options for a HPO sweep.""" # Path indicating where the pickle database will be loaded or be created. database_path: Path = Path("orion_db.pkl") # manual, unique identifier for this experiment. This should only really be used # when launching multiple different experiments that involve the same method and # the same exact setting configurations, but where some other aspect of the # experiment is changed. experiment_id: Optional[str] = None # Maximum number of runs to perform. max_runs: Optional[int] = 10 # Which hyper-parameter optimization algorithm to use. hpo_algorithm: str = choice( { "random": "random", "bayesian": "BayesianOptimizer", }, default="bayesian", ) # TODO: BayesianOptimizer does not support num > 1 def sweep(setting: Setting, method: Method, config: SweepConfig) -> Setting.Results: """Performs a Hyper-Parameter Optimization sweep, consisting in running the method on the given setting, each run having a different set of hyper-parameters. """ print("Sweep!") logger.debug("Setting:") # BUG: TypeError: __reduce_ex__() takes exactly one argument (0 given) try: logger.debug(setting.dumps_yaml()) except TypeError: logger.debug(setting) logger.debug("Config:") logger.debug(config.dumps_yaml()) logger.debug(f"Method: {method}") # TODO: IDEA: It could actually be really cool if we created a list of # Experiment objects here, and just call their 'launch' methods in parallel, # rather than do the sweep logic in the Method class! # TODO: Need to add these arguments again to the parser? best_params, best_objective = method.hparam_sweep( setting, database_path=config.database_path, experiment_id=config.experiment_id, max_runs=config.max_runs, hpo_algorithm=config.hpo_algorithm, ) logger.info( "Best params:\n" + "\n".join(f"\t{key}: {value}" for key, value in best_params.items()) ) logger.info(f"Best objective: {best_objective}") return (best_params, best_objective) def add_sweep_command(command_subparsers: _SubParsersAction) -> None: sweep_parser = command_subparsers.add_parser( "sweep", description="Run a hyper-parameter optimization sweep.", help="Run a hyper-parameter optimization sweep.", add_dest_to_option_strings=False, ) sweep_parser.set_defaults(action=sweep) sweep_parser.add_arguments(SweepConfig, dest="config") add_args_for_settings_and_methods(sweep_parser) def add_info_command(command_subparsers: _SubParsersAction) -> None: """Add commands to display some information about the settings or methods.""" info_parser = command_subparsers.add_parser( "info", # NOTE: Not 100% sure what the difference is between help and description. description="Displays some information about a Setting or Method.", help="Displays some information about a Setting or Method.", add_dest_to_option_strings=False, ) info_parser.set_defaults(**{"component": None}) info_parser.set_defaults(action=lambda namespace: info(namespace.component)) component_subparser = info_parser.add_subparsers( title="component", dest="component", description="Setting or Method to display more information about.", help="heyo", required=False, ) for setting in all_settings: setting_name = setting.get_name() component_parser: ArgumentParser = component_subparser.add_parser( name=setting_name, description=f"Show more info about the {setting_name} setting.", help=get_help(setting), add_dest_to_option_strings=False, ) component_parser.set_defaults(**{"component": setting}) for method in get_all_methods(): method_name = method.get_full_name() component_parser: ArgumentParser = component_subparser.add_parser( name=method_name, description=f"Show more info about the {method_name} method.", help=get_help(method), add_dest_to_option_strings=False, ) component_parser.set_defaults(**{"component": method}) def info(component: Union[Type[Setting], Type[Method]] = None) -> None: """Prints some info about a given component (method class or setting class), or prints the list of available settings and methods. """ if component is None: from sequoia.utils.readme import get_tree_string print(get_tree_string()) # print("Registered Settings:") # for setting in all_settings: # print(f"- {setting.get_name()}: {setting.get_path_to_source_file()}") print() print("Registered Methods:") print() for method in get_all_methods(): src = method.get_path_to_source_file() print(f"- {method.get_full_name()}: {src}") else: # IDEA: Could colorize the tree with red or green depending on if the method is # applicable to the setting or not! help(component) def get_help(component: Type[Setting]) -> str: """Returns the string to be passed as the 'help' argument to the parser.""" # todo docstring = component.__doc__ if not docstring: docstring = f"Help for class {component.__name__} (missing docstring)" # IDEA: Get the first two sentences, or a shortened version of the docstring, # whichever one is shorter. first_two_sentences = ". ".join(docstring.split(".")[:2]) + "." # shortened_docstring = textwrap.shorten(docstring, 150) # return min(shortened_docstring, first_two_sentences, key=len) + "(help)" # NOTE: Seems to be nicer in general to have two whole sentences, even if they are a bit longer. return first_two_sentences # def get_description(command: str, setting: Type[Setting], method: Type[Method] = None) -> str: # """ Returns the text to be displayed right under the "usage" line in the command-line # when either # `sequoia run --help` # or # `sequoia run --help` is invoked. # """ # if command == "run": # if method is not None: # return f"Run an experiment consisting of applying method {method.get_full_name()} on the {setting.get_name()} setting. (desc.)" # else: # return f"Run an experiment in the {setting.get_name()} setting. (desc.)" def add_args_for_settings_and_methods(command_subparser: ArgumentParser): """Adds a subparser for each Setting class and method subparsers for each of those. NOTE: Only adds subparsers for setting classes that have a non-empty 'available_datasets' attribute, so that choosing `Setting`, `SLSetting` or `RLSetting` isn't an option. This is used by the `sequoia run` and `sequoia sweep` commands. """ # ===== RUN ======== setting_subparsers = command_subparser.add_subparsers( title="setting_choice", description="choice of experimental setting", dest="setting_type", metavar="", required=True, ) def key_fn(setting_class: Type[Setting]): return ( len(setting_class.parents()), setting_class.__name__, ) # Sort the settings so the actions come up in a nice order. for setting in sorted(all_settings, key=key_fn): setting_name = setting.get_name() # IDEA: if not getattr(setting, "available_datasets", {}): # Don't add a parser for this setitng, since it has no available datasets. # e.g.: Setting, SL, RL continue setting_parser: ArgumentParser = setting_subparsers.add_parser( setting_name, help=get_help(setting), description=f"Run an experiment in the {setting.get_name()} setting.", add_dest_to_option_strings=False, formatter_class=SimpleHelpFormatter, ) setting_parser.set_defaults(**{"setting_type": setting}) # NOTE: By removing the `dest` argument to `add_argparse_args, we're moving the place where # the setting's values are stored from 'setting' to `camel_case(setting_class.__name__). # Alternative would be to just assume that the settings are dataclasses and add arguments # for the setting at destination 'setting' as before. setting.add_argparse_args(parser=setting_parser) # setting_parser.add_arguments(setting, dest="setting") method_subparsers = setting_parser.add_subparsers( title="method", dest="method_name", metavar="", description=f"which method to apply to the {setting_name} Setting.", required=True, ) for method in setting.get_applicable_methods(): method_name = method.get_full_name() method_parser: ArgumentParser = method_subparsers.add_parser( method_name, help=get_help(method), description=( f"Run an experiment where the {method_name} method is " f"applied to the {setting.get_name()} setting." ), formatter_class=SimpleHelpFormatter, ) method_parser.set_defaults(method_type=method) # TODO: Could also pass the setting to the method's `add_argparse_args` so # that it gets to change its default values! # method.add_argparse_args_for_setting( # parser=method_parser, setting=setting, # ) method.add_argparse_args(parser=method_parser) if __name__ == "__main__": main() ================================================ FILE: sequoia/methods/README.md ================================================ # Sequoia - Methods ### Adding a new Method: #### Prerequisites: **- First, please take a look at the [examples](examples/)** #### Steps: 1. Choose a target setting from the tree (See the "Available Settings" section below). 2. Create a new subclass of [`Method`](settings/base/bases.py), with the chosen target setting. Your class should implement the following methods: - `fit(train_env, valid_env)` - `get_actions(observations, action_space) -> Actions` The following methods are optional, but can be very useful to help customize how your method is used at train/test time: - `configure(setting: Setting)` - `on_task_switch(task_id: Optional[int])` - `test(test_env)` ```python class MyNewMethod(Method, target_setting=ClassIncrementalSetting): ... # Your code here. def fit(self, train_env: DataLoader, valid_env: DataLoader): # Train your model however you want here. self.trainer.fit( self.model, train_dataloader=train_env, val_dataloaders=valid_env, ) def get_actions(self, observations: Observations, observation_space: gym.Space) -> Actions: # Return an "Action" (prediction) for the given observations. # Each Setting has its own Observations, Actions and Rewards types, # which are based on those of their parents. return self.model.predict(observations.x) def on_task_switch(self, task_id: Optional[int]): #This method gets called if task boundaries are known in the current #setting. Furthermore, if task labels are available, task_id will be # the index of the new task. If not, task_id will be None. # For example, you could do something like this: self.model.current_output_head = self.model.output_heads[task_id] ``` 3. Running / Debugging your method: (at the bottom of your script, for example) ```python if __name__ == "__main__": ## 1. Create the setting you want to apply your method on. # First option: Create the Setting directly in code: setting = ClassIncrementalSetting(dataset="cifar10", nb_tasks=5) # Second option: Create the Setting from the command-line: setting = ClassIncrementalSetting.from_args() ## 2. Create your Method, however you want. my_method = MyNewMethod() ## 3. Apply your method on the setting to obtain results. results = setting.apply(my_method) # Optionally, display the results. print(results.summary()) results.make_plots() ``` 4. (WIP): Adding your new method to the tree: - Place the script/package that defines your Method inside of the `methods` folder. - Add the `@register_method` decorator to your Method definition, for example: ```python from sequoia.methods import register_method @register_method class MyNewMethod(Method, target_setting=ClassIncrementalSetting): name: ClassVar[str] = "my_new_method" ... ``` - To launch an experiment using your method, run the following command: ```console python main.py --setting --method my_new_method ``` To customize how your method gets created from the command-line, override the two following class methods: - `add_argparse_args(cls, parser: ArgumentParser)` - `from_argparse_args(cls, args: Namespace) -> Method` - Create a `_test.py` file next to your method script. In it, write unit tests for every module/component used in your Method. Have them be easy to read so people can ideally understand how the components of your Method work by simply reading the tests. - (WIP) To run the unittests locally, use the following command: `pytest methods/my_new_method_test.py` - Then, write a functional test that demonstrates how your new method should behave, and what kind of results it expects to produce. The easiest way to do this is to implement a `validate_results(setting: Setting, results: Results)` method. - (WIP) To debug/run the "integration tests" locally, use the following command: `pytest -x methods/my_new_method_test.py --slow` - Create a Pull Request, and you're good to go! ## Registered Methods (so far): - ## [BaseMethod](sequoia/methods/base_method.py) - Target setting: [Setting](sequoia/settings/base/setting.py) Versatile Baseline method which targets all settings. Uses pytorch-lightning's Trainer for training and a LightningModule as a model. Uses a [BaseModel](methods/models/base_model/base_model.py), which can be used for: - Self-Supervised training with modular auxiliary tasks; - Semi-Supervised training on partially labeled batches; - Multi-Head prediction (e.g. in task-incremental scenario); - ## [RandomBaselineMethod](sequoia/methods/random_baseline.py) - Target setting: [Setting](sequoia/settings/base/setting.py) Baseline method that gives random predictions for any given setting. This method doesn't have a model or any parameters. It just returns a random action for every observation. - ## [pnn.PnnMethod](sequoia/methods/pnn/pnn_method.py) - Target setting: [IncrementalAssumption](sequoia/settings/assumptions/incremental.py) PNN Method. Applicable to both RL and SL Settings, as long as there are clear task boundaries during training (IncrementalAssumption). - ## [avalanche.AGEMMethod](sequoia/methods/avalanche/agem.py) - Target setting: [ContinualSLSetting](sequoia/settings/sl/continual/setting.py) Average Gradient Episodic Memory (AGEM) strategy from Avalanche. See AGEM plugin for details. This strategy does not use task identities. See the parent class `AvalancheMethod` for the other hyper-parameters and methods. - ## [avalanche.AR1Method](sequoia/methods/avalanche/ar1.py) - Target setting: [ContinualSLSetting](sequoia/settings/sl/continual/setting.py) AR1 strategy from Avalanche. See AR1 plugin for details. This strategy does not use task identities. See the parent class `AvalancheMethod` for the other hyper-parameters and methods. - ## [avalanche.CWRStarMethod](sequoia/methods/avalanche/cwr_star.py) - Target setting: [ContinualSLSetting](sequoia/settings/sl/continual/setting.py) CWRStar strategy from Avalanche. See CWRStar plugin for details. This strategy does not use task identities. See the parent class `AvalancheMethod` for the other hyper-parameters and methods. - ## [avalanche.EWCMethod](sequoia/methods/avalanche/ewc.py) - Target setting: [ContinualSLSetting](sequoia/settings/sl/continual/setting.py) Elastic Weight Consolidation (EWC) strategy from Avalanche. See EWC plugin for details. This strategy does not use task identities. See the parent class `AvalancheMethod` for the other hyper-parameters and methods. - ## [avalanche.GEMMethod](sequoia/methods/avalanche/gem.py) - Target setting: [ContinualSLSetting](sequoia/settings/sl/continual/setting.py) Gradient Episodic Memory (GEM) strategy from Avalanche. See GEM plugin for details. This strategy does not use task identities. See the parent class `AvalancheMethod` for the other hyper-parameters and methods. - ## [avalanche.GDumbMethod](sequoia/methods/avalanche/gdumb.py) - Target setting: [ContinualSLSetting](sequoia/settings/sl/continual/setting.py) GDumb strategy from Avalanche. See GDumbPlugin for more details. This strategy does not use task identities. See the parent class `AvalancheMethod` for the other hyper-parameters and methods. - ## [avalanche.LwFMethod](sequoia/methods/avalanche/lwf.py) - Target setting: [ContinualSLSetting](sequoia/settings/sl/continual/setting.py) Learning without Forgetting strategy from Avalanche. See LwF plugin for details. This strategy does not use task identities. See the parent class `AvalancheMethod` for the other hyper-parameters and methods. - ## [avalanche.ReplayMethod](sequoia/methods/avalanche/replay.py) - Target setting: [ContinualSLSetting](sequoia/settings/sl/continual/setting.py) Replay strategy from Avalanche. See Replay plugin for details. This strategy does not use task identities. See the parent class `AvalancheMethod` for the other hyper-parameters and methods. - ## [avalanche.SynapticIntelligenceMethod](sequoia/methods/avalanche/synaptic_intelligence.py) - Target setting: [ContinualSLSetting](sequoia/settings/sl/continual/setting.py) The Synaptic Intelligence strategy from Avalanche. This is the Synaptic Intelligence PyTorch implementation of the algorithm described in the paper "Continuous Learning in Single-Incremental-Task Scenarios" (https://arxiv.org/abs/1806.08568) The original implementation has been proposed in the paper "Continual Learning Through Synaptic Intelligence" (https://arxiv.org/abs/1703.04200). The Synaptic Intelligence regularization can also be used in a different strategy by applying the :class:`SynapticIntelligencePlugin` plugin. See the parent class `AvalancheMethod` for the other hyper-parameters and methods. - ## [sb3.A2CMethod](sequoia/methods/stable_baselines3_methods/a2c.py) - Target setting: [ContinualRLSetting](sequoia/settings/rl/continual/setting.py) Method that uses the A2C model from stable-baselines3. - ## [sb3.DQNMethod](sequoia/methods/stable_baselines3_methods/dqn.py) - Target setting: [ContinualRLSetting](sequoia/settings/rl/continual/setting.py) Method that uses a DQN model from the stable-baselines3 package. - ## [sb3.DDPGMethod](sequoia/methods/stable_baselines3_methods/ddpg.py) - Target setting: [ContinualRLSetting](sequoia/settings/rl/continual/setting.py) Method that uses the DDPG model from stable-baselines3. - ## [sb3.TD3Method](sequoia/methods/stable_baselines3_methods/td3.py) - Target setting: [ContinualRLSetting](sequoia/settings/rl/continual/setting.py) Method that uses the TD3 model from stable-baselines3. - ## [sb3.SACMethod](sequoia/methods/stable_baselines3_methods/sac.py) - Target setting: [ContinualRLSetting](sequoia/settings/rl/continual/setting.py) Method that uses the SAC model from stable-baselines3. - ## [sb3.PPOMethod](sequoia/methods/stable_baselines3_methods/ppo.py) - Target setting: [ContinualRLSetting](sequoia/settings/rl/continual/setting.py) Method that uses the PPO model from stable-baselines3. - ## [EwcMethod](sequoia/methods/ewc_method.py) - Target setting: [IncrementalAssumption](sequoia/settings/assumptions/incremental.py) Subclass of the BaseMethod, which adds the EWCTask to the `BaseModel`. This Method is applicable to any CL setting (RL or SL) where there are clear task boundaries, regardless of if the task labels are given or not. - ## [ExperienceReplayMethod](sequoia/methods/experience_replay.py) - Target setting: [IncrementalSLSetting](sequoia/settings/sl/incremental/setting.py) Simple method that uses a replay buffer to reduce forgetting. - ## [HatMethod](sequoia/methods/hat.py) - Target setting: [TaskIncrementalSLSetting](sequoia/settings/sl/task_incremental/setting.py) Hard Attention to the Task ``` @inproceedings{serra2018overcoming, title={Overcoming Catastrophic Forgetting with Hard Attention to the Task}, author={Serra, Joan and Suris, Didac and Miron, Marius and Karatzoglou, Alexandros}, booktitle={International Conference on Machine Learning}, pages={4548--4557}, year={2018} } ``` ================================================ FILE: sequoia/methods/__init__.py ================================================ """ Methods: solutions to research problems (Settings). Methods contain the logic related to the training of the algorithm. Methods are encouraged to use a model to keep the networks / architecture / engineering code separate from the training loop. Sequoia includes a `BaseMethod`, along with an accompanying `Model`, which can be used as a jumping-off point for new users. You're obviously also free to write your own method/model from scratch if you want! The recommended way to start is by creating a new subclass of the Base The best way to do so is to create your new model as a subclass of the `Model`, which already has some neat capabilities, and can easily be extended/customized. This `Model` is an instance of Pytorch-Lightning's `LightningModule` class, and can be trained on the environments/dataloaders of Sequoia with a `pl.Trainer`, enabling all the goodies associated with Pytorch-Lightning. You can also easily add callbacks to measure your own metrics and such as you would in Pytorch-Lightning. """ import glob import inspect import os import warnings from functools import lru_cache from importlib import import_module from os.path import abspath, basename, dirname, isfile, join from pathlib import Path from typing import Dict, List, Type import pkg_resources from pkg_resources import EntryPoint from setuptools import find_packages from sequoia.settings.base import Method from sequoia.utils.logging_utils import get_logger logger = get_logger(__name__) AbstractMethod = Method _registered_methods: List[Type[Method]] = [] """ TODO: IDEA: Add arguments to register_method that help configure the tests we add the that method! E.g.: ``` @register_method(slow=True, requires_cuda=True, required_memory_gb=4) class MyMethod(Method, target_setting=ContinualRLSetting): ... ``` """ def register_method( method_class: Type[Method] = None, *, name: str = None, family: str = None ) -> Type[Method]: """Decorator around a method class, which is used to register the method. Can set the name of the method as well as the family when they are passed, and also adds the Method to the list of registered methods. """ def _register_method( method_class: Type[Method] = None, *, name: str = None, family: str = None ) -> Type[Method]: if name is not None: method_class.name = name if family is not None: method_class.family = family if not issubclass(method_class, Method): raise TypeError( "The `register_method` decorator should only be used on subclasses of " "`Method`." ) if method_class not in _registered_methods: _registered_methods.append(method_class) return method_class # This is based on `dataclasses.dataclass`: def wrap(method_class: Type[Method]) -> Type[Method]: return _register_method(method_class, name=name, family=family) # See if we're being called as @register_method or @register_method(). if method_class is None: # We're called with parens. return wrap # We're called as @register_method without parens. return wrap(method_class) from .base_method import BaseMethod, BaseModel from .ewc_method import EwcMethod from .experience_replay import ExperienceReplayMethod from .hat import HatMethod from .pnn import PnnMethod from .random_baseline import RandomBaselineMethod @lru_cache(1) def get_external_methods() -> Dict[str, Type[Method]]: """Returns a dictionary of the Methods defined outside of Sequoia. Packages outside of Sequoia can register methods by putting a `Method` entry-point in their setup.py, like so: ```python # (inside /setup.py) setup( name="my_package", packages=setuptools.find_packages(include=["cn_dpm*"]) ... entry_points={ "Method": [ "foo_method = my_package.my_methods.foo_method:FooMethod", "bar_method = my_package.my_methods.bar_method:BarMethod", ], }, ) ``` Compared with using the `@register_method` decorator, this has the benefit that the module containing the Method does not need to be imported/"live" for the method to be available. This is very relevant when using Sequoia through the command-line, for instance, since Sequoia would have no way of knowing what other methods are available: ```console sequoia setting foo_setting method foo_method ``` """ methods: Dict[str, Type[Method]] = {} for entry_point in pkg_resources.iter_entry_points("Method"): entry_point: EntryPoint try: method_class = entry_point.load() except Exception as exc: logger.error( f"Unable to load external Method: '{entry_point.name}', from package " f"{entry_point.dist.project_name}, version={entry_point.dist.version}: " f"{exc}" ) else: logger.debug( f"Imported an external Method: '{entry_point.name}', from package " f"{entry_point.dist.project_name}, (version = {entry_point.dist.version})." ) methods[entry_point.name] = method_class return methods # Keeping a pointer to the old name, just to help with backward-compatibility a bit. BaselineMethod = BaseMethod # TODO: Eventually these could become external repos, with their own tests / etc, based # on a 'cookiecutter' repo of some sort. This would make it easier to maintain and to # delegate work! # IDEA: Could also do the same for the datasets somehow? Like have an extendable # `sequoia.datasets` cookiecutter repo? How would that work with Settings? # Assumption + Assumption -> Assumption (combined) # Setting := fn(dataset, **kwargs) -> Callable[[Method], Results] AVALANCHE_INSTALLED = False try: from avalanche.training.strategies import BaseStrategy # type: ignore AVALANCHE_INSTALLED = True except ImportError: pass if AVALANCHE_INSTALLED: from sequoia.methods.avalanche_methods import * SB3_INSTALLED = False try: import stable_baselines3 SB3_INSTALLED = True except ImportError: pass if SB3_INSTALLED: from sequoia.methods.stable_baselines3_methods import * try: from sequoia.methods.pl_bolts_methods import * except ImportError: pass def add_external_methods(all_methods: List[Type[Method]]) -> List[Type[Method]]: for name, method_class in get_external_methods().items(): if method_class not in all_methods: logger.debug(f"Adding method {name} from external package.") all_methods.append(method_class) return all_methods def get_all_methods() -> List[Type[Method]]: # This may change over time, and includes ALL subclasses of 'Method'. # methods = Method.__subclasses__() # This includes all registered methods, e.g. not any base classes. methods = _registered_methods methods = add_external_methods(methods) methods = list(set(methods)) return list(sorted(methods, key=lambda method: method.get_full_name())) ================================================ FILE: sequoia/methods/aux_tasks/__init__.py ================================================ from .auxiliary_task import AuxiliaryTask from .ewc import EWCTask from .reconstruction import AEReconstructionTask, VAEReconstructionTask from .transformation_based import RotationTask VAE: str = VAEReconstructionTask.name AE: str = AEReconstructionTask.name EWC: str = EWCTask.name ================================================ FILE: sequoia/methods/aux_tasks/auxiliary_task.py ================================================ import typing from abc import abstractmethod from dataclasses import dataclass from typing import Callable, ClassVar, Dict, Optional, Tuple import torch from pytorch_lightning import LightningModule from torch import Tensor, nn from sequoia.common.hparams import HyperParameters, uniform from sequoia.common.loss import Loss if typing.TYPE_CHECKING: from sequoia.methods.models.base_model import Model class AuxiliaryTask(nn.Module): """Represents an additional loss to apply to a `Classifier`. The main logic should be implemented in the `get_loss` method. In general, it should apply some deterministic transformation to its input, and treat that same transformation as a label to predict. That loss should be backpropagatable through the feature extractor (the `encoder` attribute). """ name: ClassVar[str] = "" input_shape: ClassVar[Tuple[int, ...]] = () hidden_size: ClassVar[int] = -1 _model: ClassVar["Model"] # Class variables for holding the Modules shared with the classifier. encoder: ClassVar[nn.Module] output_head: ClassVar[nn.Module] # type: ignore preprocessing: ClassVar[Callable[[Tensor, Optional[Tensor]], Tuple[Tensor, Optional[Tensor]]]] @dataclass class Options(HyperParameters): """Settings for this Auxiliary Task.""" # Coefficient used to scale the task loss before adding it to the total. coefficient: float = uniform(0.0, 1.0, default=1.0) def __init__(self, *args, options: Options = None, name: str = None, **kwargs): """Creates a new Auxiliary Task to further train the encoder. Can use the `encoder` and `classifier` components of the parent `Classifier` instance. NOTE: Since this object will be stored inside the `tasks` dict in the model, we can't pass a reference to the parent here, otherwise the parent would hold a reference to itself inside its `.modules()`, so there would be an infinite recursion problem. Parameters ---------- - options : AuxiliaryTask.Options, optional, by default None The `Options` related to this task, containing the loss coefficient used to scale this task, as well as any other additional hyperparameters specific to this `AuxiliaryTask`. - name: str, optional, by default None The name of this auxiliary task. When not given, the name of the class is used. """ super().__init__() # If we are given the coefficient as a constructor argument, for # instance, then we create the Options for this auxiliary task. self.name = name or type(self).name self.options = options or type(self).Options(*args, **kwargs) self.device: torch.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") self._disabled = False def encode(self, x: Tensor) -> Tensor: # x, _ = AuxiliaryTask.preprocessing(x, None) return AuxiliaryTask.encoder(x) def logits(self, h_x: Tensor) -> Tensor: return AuxiliaryTask.output_head(h_x) @abstractmethod def get_loss(self, forward_pass: Dict[str, Tensor], y: Tensor = None) -> Loss: """Calculates the Auxiliary loss for the input `x`. The parameters `h_x`, `y_pred` are given for convenience, so we don't re-calculate the forward pass multiple times on the same input. Parameters ---------- - forward_pass: Dict[str, Tensor] containing: - 'x' : Tensor The input samples. - 'h_x' : Tensor The hidden vector, or hidden features, which corresponds to the output of the feature extractor (should be equivalent to `self.encoder(x)`). Given for convenience, when available. - 'y_pred' : Tensor The predicted labels. - y : Tensor, optional, by default None The true labels for each sample. Note that this is the label of the output head's task, not of an auxiliary task. Returns ------- Tensor The loss, not scaled. """ @property def coefficient(self) -> float: return self.options.coefficient @coefficient.setter def coefficient(self, value: float) -> None: if self.enabled and value == 0: self.disable() elif self.disabled and value != 0: self.enable() self.options.coefficient = value def enable(self) -> None: """Enable this auxiliary task. This could be used to create/allocate resources to this task. NOTE: The task will not work, even after being enabled, if its coefficient is set to 0! """ self._disabled = False def disable(self) -> None: """Disable this auxiliary task and sets its coefficient to 0. This could be used to delete/deallocate resources used by this task. """ self._disabled = True @property def enabled(self) -> bool: return not self._disabled @property def disabled(self) -> bool: return self._disabled or self.coefficient == 0.0 def on_task_switch(self, task_id: Optional[int]) -> None: """Executed when the task switches (to either a new or known task).""" @property def model(self) -> LightningModule: return type(self)._model @staticmethod def set_model(model: "Model") -> None: AuxiliaryTask._model = model def shared_modules(self) -> Dict[str, nn.Module]: """Returns any trainable modules if `self` that are shared across tasks. By giving this information, these weights can then be used in regularization-based auxiliary tasks like EWC, for example. By default, for auxiliary tasks, this returns nothing, for instance. For the base model, this returns a dictionary with the encoder, for example. When using only one output head (i.e. when `self.hp.multihead` is `False`), then this dict also includes the output head. Returns ------- Dict[str, nn.Module]: Dictionary mapping from name to the shared modules, if any. """ return {} ================================================ FILE: sequoia/methods/aux_tasks/ewc.py ================================================ """Elastic Weight Consolidation as an Auxiliary Task. This is a simplified version of EWC, that only currently uses the L2 norm, rather than the Fisher Information Matrix. TODO: If it's worth it, we could re-add the 'real' EWC using the nngeometry package, (which I don't think we need to have as a submodule). """ from collections import deque from contextlib import contextmanager from copy import deepcopy from dataclasses import dataclass from typing import Deque, List, Optional, Type from gym.spaces.utils import flatdim from nngeometry.metrics import FIM from nngeometry.object.pspace import PMatAbstract, PMatDiag, PMatKFAC, PVector from simple_parsing import choice from torch import Tensor from torch.utils.data import DataLoader from sequoia.common.hparams import categorical, uniform from sequoia.common.loss import Loss from sequoia.methods.aux_tasks.auxiliary_task import AuxiliaryTask from sequoia.methods.models.forward_pass import ForwardPass from sequoia.methods.models.output_heads import ClassificationHead, RegressionHead from sequoia.settings.base.objects import Observations from sequoia.utils.logging_utils import get_logger from sequoia.utils.utils import dict_intersection logger = get_logger(__name__) class EWCTask(AuxiliaryTask): """Elastic Weight Consolidation, implemented as a 'self-supervision-style' Auxiliary Task. ```bibtex @article{kirkpatrick2017overcoming, title={Overcoming catastrophic forgetting in neural networks}, author={Kirkpatrick, James and Pascanu, Razvan and Rabinowitz, Neil and Veness, Joel and Desjardins, Guillaume and Rusu, Andrei A and Milan, Kieran and Quan, John and Ramalho, Tiago and Grabska-Barwinska, Agnieszka and others}, journal={Proceedings of the national academy of sciences}, volume={114}, number={13}, pages={3521--3526}, year={2017}, publisher={National Acad Sciences} } ``` """ name: str = "ewc" @dataclass class Options(AuxiliaryTask.Options): """Options of the EWC auxiliary task.""" # Coefficient of the EWC auxilary task. # NOTE: It seems to be the case that, at least just for EWC, the coefficient # can be often be much greater than 1, hence why we overwrite the prior over # that hyper-parameter here. coefficient: float = uniform(0.0, 100.0, default=1.0) # Batchsize to be used when computing FIM (unused atm) batch_size_fim: int = 32 # Number of observations to use for FIM calculation sample_size_fim: int = categorical(2, 4, 8, 16, 32, 64, 128, 256, 512, default=8) # Fisher information representation type (diagonal or block diagonal). fim_representation: Type[PMatAbstract] = choice( {"diagonal": PMatDiag, "block_diagonal": PMatKFAC}, default=PMatDiag, ) def __init__(self, *args, name: str = None, options: "EWCTask.Options" = None, **kwargs): super().__init__(*args, options=options, name=name, **kwargs) self.options: EWCTask.Options # The id of the current/most recent task the model has been trained on. self.current_training_task: Optional[int] = None # The id of the previous task the model was trained on. self.previous_training_task: Optional[int] = None # The ids of all the tasks trained on so far, not including the current task. self.previous_training_tasks: List[Optional[int]] = [] self.previous_model_weights: Optional[PVector] = None self.observation_collector: Deque[Observations] = deque(maxlen=self.options.sample_size_fim) self.fisher_information_matrices: List[PMatAbstract] = [] # When True, ignore task boundaries (no EWC update). # This is used mainly because of the need for executing forward passes when # calculating the new FIMs, and the MultiheadModel class might then call # `on_task_switch`, so we don't want to recurse. self._ignore_task_boundaries: bool = False if not self.model.shared_modules(): # TODO: This might cause a bug, if some auxiliary task were to replace the # encoder and also be 'activated' after this task. This is a really obscure # edge case though. logger.warning( RuntimeWarning( "Disabling the EWC auxiliary task, since there appears to be no " "shared weights between tasks!" ) ) self.disable() def get_loss(self, forward_pass: ForwardPass, y: Tensor = None) -> Loss: """Gets the EWC loss.""" if self.training: self.observation_collector.append(forward_pass.observations) if not self.enabled or self.previous_model_weights is None: # We're in the first task: do nothing. return Loss(name=self.name) loss = 0.0 v_current = self.get_current_model_weights() for fim in self.fisher_information_matrices: diff = v_current - self.previous_model_weights loss += fim.vTMv(diff) ewc_loss = Loss(name=self.name, loss=loss) return ewc_loss def on_task_switch(self, task_id: Optional[int]): """Executed when the task switches (to either a known or unknown task).""" if not self.enabled: return logger.debug(f"On task switch called: task_id={task_id}") if self._ignore_task_boundaries: logger.info("Ignoring task boundary (probably from recursive call)") return if not self.training: logger.debug("Task boundary at test time, no EWC update.") return # Two cases: # - Setting without task IDs --> still calculate the FIMs at each task boundary. # - Setting with IDs --> calculate the FIMs before training on new tasks. # Setting without task labels. Task ids: None -> None -> None (always None) if task_id is None: # Here we use the number of task boundaries as a 'fake' task id, meaning we # treat each task as if it has never been encountered before. if self.current_training_task is None: # Start of first task, no EWC update. self.current_training_task = 0 else: self.previous_training_task = self.current_training_task self.current_training_task += 1 self.update_anchor_weights(new_task_id=self.current_training_task) # Setting with task labels. Task ids: 0 -> 1 -> 2 -> 1 -> 3 -> 5 -> 11 -> 5 etc. else: if self.current_training_task is None: logger.info("Starting the first task, no EWC update.") self.current_training_task = task_id elif task_id == self.current_training_task: logger.info("Switching to same task, no EWC update.") elif task_id in self.previous_training_tasks: logger.info(f"Switching to known task {task_id}, no EWC update.") else: logger.info(f"Switching to new task {task_id}, updating EWC params.") self.previous_training_task = self.current_training_task self.previous_training_tasks.append(self.current_training_task) self.current_training_task = task_id self.update_anchor_weights(new_task_id=self.current_training_task) def update_anchor_weights(self, new_task_id: int) -> None: """Update the FIMs and other EWC params before starting training on a new task. Parameters ---------- new_task_id : int The ID of the new task. """ # we dont want to go here at test time. # NOTE: We also switch between unknown tasks. logger.info( f"Updating the EWC 'anchor' weights before starting training on " f"task {new_task_id}" ) self.previous_model_weights = self.get_current_model_weights().clone().detach() # Create a Dataloader from the stored observations. obs_type: Type[Observations] = type(self.observation_collector[0]) dataset = [obs.as_namedtuple() for obs in self.observation_collector] # Or, alternatively (see the note below on why we don't use this): # stacked_observations: Observations = obs_type.stack(self.observation_collector) # dataset = TensorDataset(*stacked_observations.as_namedtuple()) # NOTE: This is equivalent to just using the same batch size as during # training, as each Observations in the list is already a batch. # NOTE: We keep the same batch size here as during training because for # instance in RL, it would be weird to suddenly give some new batch size, # since the buffers would get cleared and re-created just for these forward # passes dataloader = DataLoader(dataset, batch_size=None, collate_fn=None) # TODO: Would be nice to have a progress bar here. # Create the parameters to be passed to the FIM function. These may vary a # bit, depending on if we're being applied in a classification setting or in # a regression setting (not done yet) variant: str # TODO: Change this conditional to be based on the type of action space, rather # than of output head. if isinstance(self._model.output_head, ClassificationHead): variant = "classif_logits" n_output = self._model.action_space.n def fim_function(*inputs) -> Tensor: observations = obs_type(*inputs).to(self._model.device) forward_pass: ForwardPass = self._model(observations) actions = forward_pass.actions return actions.logits elif isinstance(self._model.output_head, RegressionHead): # NOTE: This hasn't been tested yet. variant = "regression" n_output = flatdim(self._model.action_space) def fim_function(*inputs) -> Tensor: observations = obs_type(*inputs).to(self._model.device) forward_pass: ForwardPass = self._model(observations) actions = forward_pass.actions return actions.y_pred else: raise NotImplementedError("TODO") with self._ignoring_task_boundaries(): # Prevent recursive calls to `on_task_switch` from affecting us (can be # called from MultiheadModel). (TODO: MultiheadModel will be fixed soon.) # layer_collection = LayerCollection.from_model(self.model.shared_modules()) # nngeometry BUG: this doesn't work when passing the layer # collection instead of the model new_fim = FIM( model=self.model.shared_modules(), loader=dataloader, representation=self.options.fim_representation, n_output=n_output, variant=variant, function=fim_function, device=self._model.device, layer_collection=None, ) # TODO: There was maybe an idea to use another fisher information matrix for # the critic in A2C, but not doing that atm. new_fims = [new_fim] self.consolidate(new_fims, task=new_task_id) self.observation_collector.clear() @contextmanager def _ignoring_task_boundaries(self): """Contextmanager used to temporarily ignore task boundaries (no EWC update).""" self._ignore_task_boundaries = True yield self._ignore_task_boundaries = False def consolidate(self, new_fims: List[PMatAbstract], task: Optional[int]) -> None: """Consolidates the new and current fisher information matrices. Parameters ---------- new_fims : List[PMatAbstract] The list of new fisher information matrices. task : Optional[int] The id of the previous task, when task labels are available, or the number of task switches encountered so far when task labels are not available. """ if not self.fisher_information_matrices: self.fisher_information_matrices = new_fims return assert task is not None, "Should have been given an int task id (even if fake)." for i, (fim_previous, fim_new) in enumerate( zip(self.fisher_information_matrices, new_fims) ): # consolidate the FIMs if fim_previous is None: self.fisher_information_matrices[i] = fim_new else: # consolidate the fim_new into fim_previous in place if isinstance(fim_new, PMatDiag): # TODO: This is some kind of weird online-EWC related magic: fim_previous.data = (deepcopy(fim_new.data) + fim_previous.data * (task)) / ( task + 1 ) elif isinstance(fim_new.data, dict): # TODO: This is some kind of weird online-EWC related magic: for _, (prev_param, new_param) in dict_intersection( fim_previous.data, fim_new.data ): for prev_item, new_item in zip(prev_param, new_param): prev_item.data = (prev_item.data * task + deepcopy(new_item.data)) / ( task + 1 ) self.fisher_information_matrices[i] = fim_previous def get_current_model_weights(self) -> PVector: return PVector.from_model(self.model.shared_modules()) ================================================ FILE: sequoia/methods/aux_tasks/reconstruction/__init__.py ================================================ """ Auxiliary tasks based on reconstructing an input given a hidden vector. TODO: Add some denoising autoencoders maybe as a reconstruction task? """ from .ae import AEReconstructionTask from .decoder_for_dataset import get_decoder_class_for_dataset from .decoders import CifarDecoder, MnistDecoder from .vae import VAEReconstructionTask ================================================ FILE: sequoia/methods/aux_tasks/reconstruction/ae.py ================================================ """ Defines an Auto-Encoder-based Auxiliary task. """ from typing import ClassVar, Dict, Optional, Tuple, Union import torch from torch import Tensor, nn from sequoia.common.loss import Loss from ..auxiliary_task import AuxiliaryTask from .decoder_for_dataset import get_decoder_class_for_dataset class AEReconstructionTask(AuxiliaryTask): """Task that adds the AE loss (reconstruction loss). Uses the feature extractor (`encoder`) of the parent model as the encoder of an AE. Contains trainable `decoder` module, which is used to get the AE loss to train the feature extractor with. """ name: ClassVar[str] = "ae" def __init__(self, coefficient: float = None, options: AuxiliaryTask.Options = None): super().__init__(coefficient=coefficient, options=options) self.loss = nn.MSELoss(reduction="sum") # BUG: The decoder for mnist has output shape of [1, 28, 28], but the # transforms 'fix' that shape to be [3, 28, 28]. # Therefore: TODO: Should we adapt the output shape of the decoder # depending on the shape of the input? self.decoder: Optional[nn.Module] = None def create_decoder(self, input_shape: Union[torch.Size, Tuple[int, ...]]) -> nn.Module: """Creates a decoder to reconstruct the input from the hidden vectors.""" if len(input_shape) == 4: # discard the batch dimension. input_shape = input_shape[1:] # At the moment we have a 'fixed' set of image sizes (28, 32, 224, iirc) # and we just use the decoder type for the given dataset. # TODO: Create the decoder dynamically, depending on the required shape. decoder_class = get_decoder_class_for_dataset(input_shape) decoder: nn.Module = decoder_class( code_size=AuxiliaryTask.hidden_size, ) decoder = decoder.to(self.device) return decoder def get_loss(self, forward_pass: Dict[str, Tensor], y: Tensor = None) -> Loss: x = forward_pass["x"] h_x = forward_pass["h_x"] # y_pred = forward_pass["y_pred"] z = h_x.view([h_x.shape[0], -1]) if self.decoder is None or self.decoder.output_shape != x.shape: self.decoder = self.create_decoder(x.shape) x_hat = self.decoder(z) assert x_hat.shape == x.shape, ( f"reconstructed x should have same shape as original x! " f"({x_hat.shape} != {x.shape})" ) recon_loss = self.reconstruction_loss(x_hat, x) loss_info = Loss(name=self.name, loss=recon_loss) return loss_info def forward(self, h_x: Tensor) -> Tensor: # type: ignore z = h_x.view([h_x.shape[0], -1]) x_hat = self.decoder(z) return x_hat def reconstruct(self, x: Tensor) -> Tensor: h_x = self.encode(x) x_hat = self.forward(h_x) return x_hat.view(x.shape) def reconstruction_loss(self, recon_x: Tensor, x: Tensor) -> Tensor: return self.loss(recon_x, x) ================================================ FILE: sequoia/methods/aux_tasks/reconstruction/decoder_for_dataset.py ================================================ from typing import Dict, Tuple, Type, Union from torch import nn from .decoders import CifarDecoder, ImageNetDecoder, MnistDecoder # Dict mapping from image (height, width) to the type of decoder to use. # TODO: Add some more decoders for other image datasets/shapes. registered_decoders: Dict[Tuple[int, int], Type[nn.Module]] = { (28, 28): MnistDecoder, (32, 32): CifarDecoder, (224, 224): ImageNetDecoder, } def get_decoder_class_for_dataset(input_shape: Union[Tuple[int, int, int]]) -> Type[nn.Module]: assert len(input_shape) == 3, input_shape channels: int width: int height: int if input_shape[0] == min(input_shape): # Image is in C, H, W format channels, height, width = input_shape elif input_shape[-1] == min(input_shape): height, width, channels = input_shape if (height, width) in registered_decoders: return registered_decoders[(height, width)] raise RuntimeError(f"No decoder available for input shape {input_shape}") ================================================ FILE: sequoia/methods/aux_tasks/reconstruction/decoders.py ================================================ from abc import ABC from typing import Tuple from torch import nn from sequoia.common.layers import DeConvBlock, Reshape class Decoder(nn.Sequential, ABC): """A base class for the decoders (mostly for typing purposes).""" code_size: int output_shape: Tuple[int, int, int] class MnistDecoder(Decoder): """Decoder that generates images of shape [`out_channels`, 28, 28]""" def __init__(self, code_size: int, out_channels: int = 3): self.code_size = code_size self.output_shape: Tuple[int, int, int] = (out_channels, 28, 28) super().__init__( Reshape([self.code_size, 1, 1]), nn.ConvTranspose2d(self.code_size, 32, kernel_size=4, stride=1), nn.BatchNorm2d(32), nn.ELU(alpha=1.0, inplace=True), nn.ConvTranspose2d(32, 16, kernel_size=5, stride=2), nn.BatchNorm2d(16), nn.ELU(alpha=1.0, inplace=True), nn.ConvTranspose2d(16, 16, kernel_size=5, stride=2), nn.BatchNorm2d(16), nn.ELU(alpha=1.0, inplace=True), nn.ConvTranspose2d(16, out_channels, kernel_size=4, stride=1), nn.Sigmoid(), ) class CifarDecoder(Decoder): """Decoder that generates images of shape [3, 32, 32]""" def __init__(self, code_size: int): self.code_size = code_size self.output_shape: Tuple[int, int, int] = (3, 32, 32) super().__init__( Reshape([self.code_size, 1, 1]), DeConvBlock(self.code_size, 16), DeConvBlock(16, 32), DeConvBlock(32, 64), DeConvBlock(64, 64), DeConvBlock(64, 3, last_relu=False), nn.Sigmoid(), ) class ImageNetDecoder(Decoder): """Decoder that generates images of shape [3, 224, 224]""" def __init__(self, code_size: int): self.code_size = code_size self.output_shape: Tuple[int, int, int] = (3, 224, 224) super().__init__( Reshape([self.code_size, 1, 1]), DeConvBlock(self.code_size, 16), DeConvBlock(16, 32), DeConvBlock(32, 64), DeConvBlock(64, 128), DeConvBlock(128, 224), DeConvBlock(224, 3, last_relu=False), nn.Sigmoid(), ) ================================================ FILE: sequoia/methods/aux_tasks/reconstruction/vae.py ================================================ from dataclasses import dataclass from typing import ClassVar, Dict import torch from torch import Tensor, nn from sequoia.common.loss import Loss from ..auxiliary_task import AuxiliaryTask from .ae import AEReconstructionTask from .decoder_for_dataset import get_decoder_class_for_dataset class VAEReconstructionTask(AEReconstructionTask): """Task that adds the VAE loss (reconstruction + KL divergence). Uses the feature extractor (`encoder`) of the parent model as the encoder of a VAE. Contains trainable `mu`, `logvar`, and `decoder` modules, which are used to get the VAE loss to train the feature extractor with. """ name: ClassVar[str] = "vae" @dataclass class Options(AEReconstructionTask.Options): """Settings & Hyper-parameters related to the VAEReconstructionTask.""" code_size: int = 50 # dimensions of the VAE code-space. beta: float = 1.0 # Beta term, multiplies the KL divergence term. def __init__(self, coefficient: float = None, options: "VAEReconstructionTask.Options" = None): super().__init__(coefficient=coefficient, options=options) self.options: VAEReconstructionTask.Options self.code_size = self.options.code_size # add the rest of the VAE layers: (Mu, Sigma, and the decoder) self.mu = nn.Linear(AuxiliaryTask.hidden_size, self.code_size) self.logvar = nn.Linear(AuxiliaryTask.hidden_size, self.code_size) decoder_class = get_decoder_class_for_dataset(AuxiliaryTask.input_shape) self.decoder: nn.Module = decoder_class( code_size=self.code_size, ) def forward(self, h_x: Tensor) -> Tensor: # type: ignore h_x = h_x.view([h_x.shape[0], -1]) mu, logvar = self.mu(h_x), self.logvar(h_x) z = self.reparameterize(mu, logvar) x_hat = self.decoder(z) return x_hat def reparameterize(self, mu: Tensor, logvar: Tensor) -> Tensor: std = torch.exp(0.5 * logvar) eps = torch.randn_like(std) z = mu + eps * std return z def get_loss(self, forward_pass: Dict[str, Tensor], y: Tensor = None) -> Loss: x = forward_pass["x"] h_x = forward_pass["h_x"] h_x = h_x.view([h_x.shape[0], -1]) mu, logvar = self.mu(h_x), self.logvar(h_x) z = self.reparameterize(mu, logvar) x_hat = self.decoder(z) recon_loss = self.reconstruction_loss(x_hat, x) kl_loss = self.options.beta * self.kl_divergence_loss(mu, logvar) loss = Loss(self.name, tensors=dict(mu=mu, logvar=logvar, z=z, x_hat=x_hat)) loss += Loss("recon", loss=recon_loss) loss += Loss("kl", loss=kl_loss) return loss def generate(self, z: Tensor) -> Tensor: z = z.to(self.device) return self.forward(z) @staticmethod def kl_divergence_loss(mu: Tensor, logvar: Tensor) -> Tensor: # see Appendix B from VAE paper: # Kingma and Welling. Auto-Encoding Variational Bayes. ICLR, 2014 # https://arxiv.org/abs/1312.6114 # 0.5 * sum(1 + log(sigma^2) - mu^2 - sigma^2) return -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp()) ================================================ FILE: sequoia/methods/aux_tasks/transformation_based/__init__.py ================================================ from .bases import ClassifyTransformationTask, RegressTransformationTask, TransformationBasedTask from .rotation import RotationTask ================================================ FILE: sequoia/methods/aux_tasks/transformation_based/bases.py ================================================ from dataclasses import dataclass from functools import wraps from typing import Any, Callable, List, Tuple import torch from torch import Tensor, nn from torchvision.transforms import functional as TF from sequoia.common.loss import Loss from sequoia.common.metrics import Metrics, get_metrics from sequoia.utils.logging_utils import get_logger from sequoia.utils.utils import fix_channels from ..auxiliary_task import AuxiliaryTask logger = get_logger(__name__) def wrap_pil_transform(function: Callable): def _transform(img_x, arg): x = TF.to_pil_image(img_x.cpu()) x = function(x, arg) return TF.to_tensor(x).view(img_x.shape).to(img_x) @wraps(function) def _pil_transform(x: Tensor, arg: Any): return torch.cat([_transform(x_i, arg) for x_i in x]).view(x.shape) return _pil_transform class TransformationBasedTask(AuxiliaryTask): """ Generates an AuxiliaryTask for an arbitrary transformation function. Tries to classify or regress which argument was passed to the function, given only the transformed code, if `compare_with_original` is False, else given the original and transformed codes. NOTE: For now, the same function is applied to all the images within the batch. Therefore, the function_args is one value per batch of transformed images, and not one value per image. """ @dataclass class Options(AuxiliaryTask.Options): """Command-line options for the Transformation-based auxiliary task.""" # Wether or not both the original and transformed codes should be passed # to the auxiliary layer in order to detect the transformation. compare_with_original: bool = True def __init__( self, function: Callable[[Tensor, Any], Tensor], function_args: List[Any], loss: Callable, name: str = None, auxiliary_layer: nn.Module = None, options: Options = None, ): """Creates a transformation-based task to predict alpha given the codes. Args: function (Callable[[Tensor, Any], Tensor]): A function to apply to x before it is passed to the encoder. function_args (List[Any]): The arguments to be passed to the `function`. loss (Callable): A loss function, which will be called with `alpha_pred` and `alpha` to get a loss for each argument in `function_args`. name (str, optional): [description]. Defaults to None. auxiliary_layer (nn.Module, optional): [description]. Defaults to None. options (Options, optional): [description]. Defaults to None. """ super().__init__(options=options) self.function = function self.name = name or self.function.__name__ self.function_args = function_args self.alphas: Tensor = torch.Tensor(self.function_args) self.options: TransformationBasedTask.Options = options or self.Options() self.nargs = len(self.function_args) # which loss to use. CrossEntropy when classifying, or MSE when regressing. self.loss = loss if auxiliary_layer is not None: self.auxiliary_layer = auxiliary_layer else: input_dims = AuxiliaryTask.hidden_size if self.options.compare_with_original: input_dims *= 2 self.auxiliary_layer = nn.Sequential( nn.Flatten(), nn.Linear(input_dims, self.nargs), ) def get_loss(self, x: Tensor, h_x: Tensor, y_pred: Tensor = None, y: Tensor = None) -> Loss: loss_info = Loss(self.name) batch_size = x.shape[0] assert self.alphas is not None, "set the `self.alphas` attribute in the base class." assert ( self.function_args is not None ), "set the `self.function_args` attribute in the base class." # Get the loss for each transformation argument. for fn_arg, alpha in zip(self.function_args, self.alphas): loss_i = self.get_loss_for_arg(x=x, h_x=h_x, fn_arg=fn_arg, alpha=alpha) loss_info += loss_i # print(f"{self.name}_{fn_arg}", loss_i.metrics) # Fuse all the sub-metrics into a total metric. # For instance, all the "rotate_0", "rotate_90", "rotate_180", etc. metrics = loss_info.metrics total_metrics = sum(loss_info.metrics.values(), Metrics()) # we actually add up all the metrics to get the "overall" metric. metrics.clear() metrics[self.name] = total_metrics return loss_info def get_loss_for_arg(self, x: Tensor, h_x: Tensor, fn_arg: Any, alpha: Tensor) -> Loss: alpha = alpha.to(x.device) # TODO: Transform before or after the `preprocess_inputs` function? x = fix_channels(x) # Transform X using the function. x_t = self.function(x, fn_arg) # Get the code for the transformed x. h_x_t = self.encode(x_t) aux_layer_input = h_x_t if self.options.compare_with_original: aux_layer_input = torch.cat([h_x, h_x_t], dim=-1) # Get the predicted argument of the transformation. alpha_t = self.auxiliary_layer(aux_layer_input) # get the metrics for this particular argument (accuracy, mse, etc.) if isinstance(fn_arg, int): name = f"{fn_arg}" else: name = f"{fn_arg:.3f}" loss = Loss(name) loss.loss = self.loss(alpha_t, alpha) loss.metrics[name] = get_metrics(x=x_t, h_x=h_x_t, y_pred=alpha_t, y=alpha) # Save some tensors for debugging purposes: loss.tensors["x_t"] = x_t loss.tensors["h_x_t"] = h_x_t loss.tensors["alpha_t"] = alpha_t return loss class ClassifyTransformationTask(TransformationBasedTask): """ Generates an AuxiliaryTask for an arbitrary transformation function. Tries to classify which argument was passed to the function. `self.alphas` is the classification target. It indicates which transformation argument was used. I.e. a vector of 0's for function_args[0], 1's for function_args[1], etc. """ def __init__( self, function: Callable[[Tensor, Any], Tensor], function_args: List[Any], name: str = None, options: TransformationBasedTask.Options = None, ): super().__init__( function=function, function_args=function_args, name=name, loss=nn.CrossEntropyLoss(), options=options, ) self.labels = torch.arange(len(function_args), dtype=torch.long) def get_loss(self, x: Tensor, h_x: Tensor, y_pred: Tensor = None, y: Tensor = None) -> Loss: batch_size = x.shape[0] self.alphas = self.labels.view(-1, 1).repeat(1, batch_size) return super().get_loss(x=x, h_x=h_x, y_pred=y_pred, y=y) class RegressTransformationTask(TransformationBasedTask): """ Generates an AuxiliaryTask for an arbitrary transformation function. Tries to Regress which argument value was passed to the function. x -----------------------encoder(x)-> h_x -----| x --f(x, alpha)--> x_t --encoder(x)-> h_x_t ---|----A(h_x, h_x_t) --> alpha_pred <-MSE-> alpha Can either use a list of function arguments, or a range from which to sample the argument values uniformly. """ def __init__( self, function: Callable[[Tensor, Any], Tensor], function_args: List[Any] = None, name: str = None, function_arg_range: Tuple[float, float] = None, n_calls: int = 2, options: TransformationBasedTask.Options = None, ): super().__init__( function=function, function_args=[], name=name, loss=nn.MSELoss(), options=options, ) if function_arg_range: self.function_arg_range = function_arg_range self.n_calls = n_calls elif function_args: self.function_arg_range = (min(function_args), max(function_args)) self.n_calls = len(function_args) else: raise RuntimeError("`function_args` or `function_arg_range` must be set.") self.arg_min = self.function_arg_range[0] self.arg_max = self.function_arg_range[1] self.arg_med = (self.arg_min + self.arg_max) / 2 self.arg_amp = self.arg_max - self.arg_min input_dims = AuxiliaryTask.hidden_size if self.options.compare_with_original: input_dims *= 2 self.auxiliary_layer = nn.Sequential( nn.Flatten(), nn.Linear(input_dims, 1), nn.Sigmoid(), ScaleToRange(arg_min=self.arg_min, arg_amp=self.arg_amp), ) def get_function_args(self) -> Tensor: # sample random arguments in the range [self.min_arg, self.max_arg] args = torch.rand(self.n_calls) args *= self.arg_amp args += self.arg_min return args def get_loss(self, x: Tensor, h_x: Tensor, y_pred: Tensor = None, y: Tensor = None) -> Loss: batch_size = x.shape[0] random_alphas = self.get_function_args() self.function_args = random_alphas.tolist() self.alphas = random_alphas.view(-1, 1, 1).repeat(1, batch_size, 1) loss = super().get_loss(x=x, h_x=h_x, y_pred=y_pred, y=y) return loss class ScaleToRange(nn.Module): def __init__(self, arg_min: float, arg_amp: float): super().__init__() self.arg_min = arg_min self.arg_max = arg_amp def forward(self, x: Tensor) -> Tensor: return self.arg_min + self.arg_amp * x ================================================ FILE: sequoia/methods/aux_tasks/transformation_based/rotation.py ================================================ from dataclasses import dataclass from torch import Tensor from .bases import ClassifyTransformationTask def rotate(x: Tensor, angle: int) -> Tensor: """Rotates the given tensor `x` by an angle `angle`. Currently only supports multiples of 90 degrees. Args: x (Tensor): An image or a batch of images, with shape [(b), C, H, W] angle (int): An angle. Currently only supports {0, 90, 180, 270}. Returns: Tensor: The tensor x, rotated by `angle` degrees counter-clockwise. Example: >>> import torch >>> x = torch.Tensor([ ... [1, 2, 3], ... [4, 5, 6], ... [7, 8, 9], ... ]) >>> print(x) tensor([[1., 2., 3.], [4., 5., 6.], [7., 8., 9.]]) >>> x = x.view(1, 3, 3) >>> x_rot = rotate(x, 90) >>> print(x_rot.shape) torch.Size([1, 3, 3]) >>> print(x_rot) tensor([[[3., 6., 9.], [2., 5., 8.], [1., 4., 7.]]]) """ # TODO: Test that this works. assert angle % 90 == 0, "can only rotate 0, 90, 180, or 270 degrees for now." k = angle // 90 # BUG: Very rarely, this condition won't work! (More specifically, only on the last batch of data!) # assert min(x.shape) == x.shape[-3], f"Image should be in [(b) C H W] format. (image shape: {x.shape}" return x.rot90(k, dims=(-2, -1)) if __name__ == "__main__": import doctest doctest.testmod() class RotationTask(ClassifyTransformationTask): @dataclass class Options(ClassifyTransformationTask.Options): """Command-line options for the Transformation-based auxiliary task.""" # Wether or not both the original and transformed codes should be passed # to the auxiliary layer in order to detect the transformation. # TODO: Maybe try with this set to False, to learn "innate" orientation rather than relative orientation. compare_with_original: bool = True def __init__(self, name="rotation", options: "RotationTask.Options" = None): super().__init__( function=rotate, function_args=[0, 90, 180, 270], name=name, options=options or RotationTask.Options(), ) ================================================ FILE: sequoia/methods/avalanche_methods/__init__.py ================================================ """ Adapters for Avalanche Strategies, so they can be used as Methods in Sequoia. See the Avalanche repo for more info: https://github.com/ContinualAI/avalanche """ # from .agem import AGEMMethod # from .ar1 import AR1Method # from .base import AvalancheMethod # from .cwr_star import CWRStarMethod # from .ewc import EWCMethod # # Still quite buggy, needs to be fixed on the avalanche side. # from .gdumb import GDumbMethod # from .gem import GEMMethod # from .lwf import LwFMethod # from .naive import NaiveMethod # from .replay import ReplayMethod # from .synaptic_intelligence import SynapticIntelligenceMethod ================================================ FILE: sequoia/methods/avalanche_methods/agem.py ================================================ """ Method based on AGEM from [Avalanche](https://github.com/ContinualAI/avalanche). See `avalanche.training.plugins.agem.AGEMPlugin` or `avalanche.training.strategies.strategy_wrappers.AGEM` for more info. """ from dataclasses import dataclass from typing import ClassVar, Type import pytest from avalanche.training.strategies import AGEM, BaseStrategy from simple_parsing import ArgumentParser from simple_parsing.helpers.hparams import uniform from sequoia.methods import register_method from sequoia.settings.sl import TaskIncrementalSLSetting from .base import AvalancheMethod @register_method @dataclass class AGEMMethod(AvalancheMethod[AGEM]): """Average Gradient Episodic Memory (AGEM) strategy from Avalanche. See AGEM plugin for details. This strategy does not use task identities. See the parent class `AvalancheMethod` for the other hyper-parameters and methods. """ # number of patterns per experience in the memory patterns_per_exp: int = uniform(10, 1000, default=100) # number of patterns in memory sample when computing reference gradient. sample_size: int = uniform(16, 256, default=64) strategy_class: ClassVar[Type[BaseStrategy]] = AGEM if __name__ == "__main__": setting = TaskIncrementalSLSetting( dataset="mnist", nb_tasks=5, monitor_training_performance=True ) # Create the Method, either manually or through the command-line: parser = ArgumentParser(__doc__) parser.add_arguments(AGEMMethod, "method") args = parser.parse_args() method: AGEMMethod = args.method results = setting.apply(method) ================================================ FILE: sequoia/methods/avalanche_methods/agem_test.py ================================================ """ WIP: Tests for the AGEM Method. For now this only inherits the tests from the AvalancheMethod class. """ from typing import ClassVar, Type from .agem import AGEMMethod from .base import AvalancheMethod from .base_test import _TestAvalancheMethod class TestAGEMMethod(_TestAvalancheMethod): Method: ClassVar[Type[AvalancheMethod]] = AGEMMethod ================================================ FILE: sequoia/methods/avalanche_methods/ar1.py ================================================ """ Method based on AR1 from [Avalanche](https://github.com/ContinualAI/avalanche). See `avalanche.training.strategies.ar1.AR1` for more info. """ from dataclasses import dataclass from typing import ClassVar, Type from avalanche.training.strategies import AR1, BaseStrategy from simple_parsing.helpers.hparams import log_uniform, uniform from sequoia.methods import register_method from sequoia.settings.sl import TaskIncrementalSLSetting from .base import AvalancheMethod @register_method @dataclass class AR1Method(AvalancheMethod[AR1]): """AR1 strategy from Avalanche. See AR1 plugin for details. This strategy does not use task identities. See the parent class `AvalancheMethod` for the other hyper-parameters and methods. """ # The learning rate (SGD optimizer). lr: float = log_uniform(1e-6, 1e-2, default=0.001) # The momentum (SGD optimizer). momentum: float = uniform(0.9, 0.999, default=0.9) # The L2 penalty used for weight decay. l2: float = uniform(1e-6, 1e-3, default=0.0005) # The number of training epochs. Defaults to 4. train_epochs: int = uniform(1, 50, default=4) # The initial update rate of BatchReNorm layers. init_update_rate: float = 0.01 # The incremental update rate of BatchReNorm layers. inc_update_rate: float = 0.00005 # The maximum r value of BatchReNorm layers. max_r_max: float = 1.25 # The maximum d value of BatchReNorm layers. max_d_max: float = 0.5 # The incremental step of r and d values of BatchReNorm layers. inc_step: float = 4.1e-05 # The size of the replay buffer. The replay buffer is shared across classes. rm_sz: int = uniform(500, 2000, default=1500) # A string describing the name of the layer to use while freezing the lower # (nearest to the input) part of the model. The given layer is not frozen # (exclusive). freeze_below_layer: str = "lat_features.19.bn.beta" # The number of the layer to use as the Latent Replay Layer. Usually this is the # same of `freeze_below_layer`. latent_layer_num: int = 19 # The Synaptic Intelligence lambda term. Defaults to 0, which means that the # Synaptic Intelligence regularization will not be applied. ewc_lambda: float = uniform(0, 1, default=0) # The train minibatch size. Defaults to 128. train_mb_size: int = uniform(1, 512, default=128) # The eval minibatch size. Defaults to 128. eval_mb_size: int = uniform(1, 512, default=128) strategy_class: ClassVar[Type[BaseStrategy]] = AR1 if __name__ == "__main__": from simple_parsing import ArgumentParser setting = TaskIncrementalSLSetting( dataset="mnist", nb_tasks=5, monitor_training_performance=True ) # Create the Method, either manually or through the command-line: parser = ArgumentParser(__doc__) parser.add_arguments(AR1Method, "method") args = parser.parse_args() method: AR1Method = args.method results = setting.apply(method) ================================================ FILE: sequoia/methods/avalanche_methods/ar1_test.py ================================================ """ WIP: Tests for the AR1 Method. For now this only inherits the tests from the AvalancheMethod class. """ from typing import ClassVar, Type import pytest from avalanche.models import SimpleCNN, SimpleMLP from torch.nn import Module from sequoia.common.config import Config from sequoia.conftest import xfail_param from sequoia.settings.sl import TaskIncrementalSLSetting from .ar1 import AR1Method from .base import AvalancheMethod from .base_test import _TestAvalancheMethod from .patched_models import MTSimpleCNN, MTSimpleMLP @pytest.mark.xfail(reason="AR1 isn't super well supported yet.") class TestAR1Method(_TestAvalancheMethod): Method: ClassVar[Type[AvalancheMethod]] = AR1Method @pytest.mark.timeout(60) @pytest.mark.parametrize( "model_type", [ xfail_param( SimpleCNN, reason="seems like the model in AR1 is supposed to be larger?", ), SimpleMLP, xfail_param( MTSimpleCNN, reason="IndexError Bug inside `avalanche/models/dynamic_modules.py", ), xfail_param( MTSimpleMLP, reason="IndexError Bug inside `avalanche/models/dynamic_modules.py", ), ], ) def test_short_task_incremental_setting( self, model_type: Type[Module], short_task_incremental_setting: TaskIncrementalSLSetting, config: Config, ): method = self.Method(model=model_type) results = short_task_incremental_setting.apply(method, config) assert 0.05 < results.average_final_performance.objective ================================================ FILE: sequoia/methods/avalanche_methods/base.py ================================================ """ Adapter for the `BaseStrategy` from Avalanche, wrapping it up into a Sequoia Method. See the Avalanche repo for more info: https://github.com/ContinualAI/avalanche """ import inspect import warnings from dataclasses import dataclass, fields from typing import ClassVar, Dict, Generic, List, Optional, Type, TypeVar, Union import gym import torch import tqdm from avalanche.benchmarks.scenarios import Experience from avalanche.evaluation.metrics import accuracy_metrics, forgetting_metrics, loss_metrics from avalanche.logging import InteractiveLogger from avalanche.logging.wandb_logger import WandBLogger as _WandBLogger from avalanche.models import SimpleCNN, SimpleMLP from avalanche.models.utils import avalanche_forward from avalanche.training.plugins import EvaluationPlugin, StrategyPlugin from avalanche.training.strategies import BaseStrategy from gym import spaces from gym.spaces.utils import flatdim from gym.utils import colorize from simple_parsing.helpers import choice, field, list_field from simple_parsing.helpers.hparams import HyperParameters, log_uniform, uniform from torch import nn, optim from torch.nn import Module from torch.optim import SGD from torch.optim.optimizer import Optimizer from sequoia.common.spaces import Image from sequoia.methods import Method from sequoia.settings.sl import ( ClassIncrementalSetting, ContinualSLSetting, PassiveEnvironment, SLSetting, ) from sequoia.settings.sl.continual import Actions, ContinualSLTestEnvironment, Observations, Rewards from sequoia.settings.sl.continual.setting import smart_class_prediction from sequoia.utils import get_logger from .experience import SequoiaExperience from .patched_models import MTSimpleCNN, MTSimpleMLP logger = get_logger(__name__) StrategyType = TypeVar("StrategyType", bound=BaseStrategy) # "Patch" for the WandbLogger of Avalanche class WandBLogger(_WandBLogger): # def before_run(self): # if self.wandb is None: # self.import_wandb() # if self.init_kwargs: # self.wandb.init(**self.init_kwargs) # else: # self.wandb.init() def import_wandb(self): try: import wandb except ImportError: raise ImportError('Please run "pip install wandb" to install wandb') self.wandb = wandb def args_parse(self): self.init_kwargs = {"project": self.project_name, "name": self.run_name} if self.params: self.init_kwargs.update(self.params) def before_run(self): if self.wandb is None: self.import_wandb() if self.init_kwargs: if not self.wandb.run: self.wandb.init(**self.init_kwargs) else: if not self.wandb.run: self.wandb.init() @dataclass class AvalancheMethod( Method, HyperParameters, Generic[StrategyType], target_setting=ContinualSLSetting, ): """Base class for all the Methods adapted from Avalanche.""" # Name for the 'family' of methods, use to differentiate methods with the same name. family: ClassVar[str] = "avalanche" # The Strategy class to use for this Method. Subclasses have to add this property. strategy_class: ClassVar[Type[StrategyType]] = BaseStrategy # TODO: Maybe use a 'PluginClass', so that we can avoid subclassing both the # plugin and the strategy when we need to patch something in the plugin. plugin_class: ClassVar[Optional[Type[StrategyPlugin]]] # Class Variable to hold the types of models available as options for the `model` # field below. available_models: ClassVar[Dict[str, Type[nn.Module]]] = { "simple_cnn": SimpleCNN, "simple_mlp": SimpleMLP, "mt_simple_cnn": MTSimpleCNN, "mt_simple_mlp": MTSimpleMLP, } # Class Variable to hold the types of optimizers available for the `optimizer` field # below. available_optimizers: ClassVar[Dict[str, Type[Optimizer]]] = { "sgd": SGD, "adam": optim.Adam, "rmsprop": optim.RMSprop, } # Class variable to hold the types of loss functions available for the `criterion` # field below. available_criterions: ClassVar[Dict[str, Type[nn.Module]]] = { "cross_entropy_loss": nn.CrossEntropyLoss, } # The model. model: Union[Module, Type[Module]] = choice(available_models, default=SimpleCNN) # The optimizer to use. optimizer: Union[Optimizer, Type[Optimizer]] = choice(available_optimizers, default=optim.Adam) # The loss criterion to use. criterion: Union[Module, Type[Module]] = choice( available_criterions, default=nn.CrossEntropyLoss ) # The train minibatch size. train_mb_size: int = uniform(1, 2048, default=64) # The number of training epochs. train_epochs: int = uniform(1, 100, default=5) # The eval minibatch size. eval_mb_size: int = 1 # The device to use. Defaults to None (cpu). device: torch.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # Plugins to be added. Defaults to None. plugins: Optional[List[StrategyPlugin]] = list_field(default=None, cmd=False, to_dict=False) # (optional) instance of EvaluationPlugin for logging and metric computations. evaluator: Optional[EvaluationPlugin] = field(None, cmd=False, to_dict=False) # The frequency of the calls to `eval` inside the training loop. # if -1: no evaluation during training. # if 0: calls `eval` after the final epoch of each training # experience. # if >0: calls `eval` every `eval_every` epochs and at the end # of all the epochs for a single experience. eval_every: int = -1 # Learning rate of the optimizer. learning_rate: float = log_uniform(1e-6, 1e-2, default=1e-3) # L2 regularization term for the model weights. weight_decay: float = log_uniform(1e-12, 1e-3, default=1e-6) # Hidden size of the model, when applicable. hidden_size: int = uniform(128, 1024, default=512) # Number of workers of the dataloader. Defaults to 4. num_workers: int = 4 def __post_init__(self): super().__post_init__() # Count the number of calls to `configure`. (useful when running sweeps, as we # reuse the Method instance.) self._n_configures: int = 0 self.setting: ClassIncrementalSetting self.cl_strategy: StrategyType def configure(self, setting: ClassIncrementalSetting) -> None: self.setting = setting self.model = self.create_model(setting).to(self.device) # Select the loss function to use. if not isinstance(self.criterion, nn.Module): self.criterion = self.criterion() metrics = [ accuracy_metrics(epoch=True, experience=True, stream=True), forgetting_metrics(experience=True, stream=True), loss_metrics(minibatch=False, epoch=True, experience=True, stream=True), ] loggers = [ # BUG: evaluation.py:94, _update_metrics: # before_training() takes 2 positional arguments but 3 were given # default_logger, InteractiveLogger(), ] if setting.wandb and setting.wandb.project: wandb_logger = WandBLogger( project_name=setting.wandb.project, run_name=setting.wandb.run_name, params=setting.wandb.wandb_init_kwargs(), ) loggers.append(wandb_logger) self.evaluator = EvaluationPlugin( *metrics, loggers=loggers, ) self.optimizer = self.make_optimizer() # Actually initialize the strategy using the fields on `self`. self.cl_strategy: StrategyType = self.create_cl_strategy(setting) if setting.monitor_training_performance and ( type(self).environment_to_experience is AvalancheMethod.environment_to_experience ): warnings.warn( UserWarning( colorize( "This Setting would like to monitor the online training " "performance, which means that the rewards/labels (`y`) are " "returned after sending an action (prediction) to the training " "environment." "\n" "However, Avalanche does not currently support training on " "'active' dataloaders or gym environments, and needs access to " "the 'x' and 'y' at the same time, as is usually the case in " "Supervised CL." "\n" "Therefore, the current solution I've found for this issue is " "to iterate once over the training environment, sending it " "(by default random) actions, in order to create an " "'Experience' object expected by the Avalanche Strategies." "\n" "Concretely, this means that, unless you overwrite the " "`environment_to_experience` method, **your online performance " "score will be limited to chance accuracy!**", "yellow", ) ) ) def create_cl_strategy(self, setting: ClassIncrementalSetting) -> StrategyType: strategy_constructor_params: List[str] = list( inspect.signature(self.strategy_class.__init__).parameters.keys() ) cl_strategy_kwargs = { f.name: getattr(self, f.name) for f in fields(self) if f.name in strategy_constructor_params } return self.strategy_class(**cl_strategy_kwargs) def create_model(self, setting: ClassIncrementalSetting) -> Module: """Create the Model for the setting. Parameters ---------- setting : ClassIncrementalSetting The Setting on which this Method will be applied. Returns ------- Module The Model to be used, which will be passed to the Strategy constructor. """ image_space: Image = setting.observation_space.x input_dims = flatdim(image_space) assert isinstance( setting.action_space, spaces.Discrete ), "assume a classification problem for now." num_classes = setting.action_space.n if setting.task_labels_at_train_time: if setting.task_labels_at_test_time: if self.model is SimpleCNN and MTSimpleCNN in self.available_models.values(): self.model = MTSimpleCNN logger.info( f"Upgrading the model to a {MTSimpleCNN}, since task-labels " f"are available at train and test time." ) if self.model is SimpleMLP and MTSimpleMLP in self.available_models.values(): self.model = MTSimpleMLP logger.info( f"Upgrading the model to a {MTSimpleMLP}, since task-labels " f"are available at train and test time." ) if isinstance(self.model, nn.Module): if self._n_configures > 0: logger.info("Resetting the model, since this isn't the first run.") self.model = type(self.model) self._n_configures += 1 else: logger.info(f"Using model {self.model}.") return self.model if self.model is SimpleMLP: return self.model( input_size=input_dims, hidden_size=self.hidden_size, num_classes=num_classes, ) if self.model is MTSimpleMLP: return self.model(input_size=input_dims, hidden_size=self.hidden_size) if self.model is SimpleCNN: return self.model(num_classes=num_classes) # self.model is most probably a type of nn.Module, so we instantiate it. # These other models (MTSimpleCNN) don't seem to take any kwargs. return self.model() def make_optimizer(self) -> Optimizer: """Creates the Optimizer.""" optimizer_class = self.optimizer if isinstance(self.optimizer, Optimizer): optimizer_class = type(self.optimizer) return optimizer_class( self.model.parameters(), lr=self.learning_rate, weight_decay=self.weight_decay, ) def fit(self, train_env: PassiveEnvironment, valid_env: PassiveEnvironment): train_exp = self.environment_to_experience(train_env, setting=self.setting) valid_exp = self.environment_to_experience(valid_env, setting=self.setting) self.cl_strategy.train(train_exp, eval_streams=[valid_exp], num_workers=self.num_workers) def get_actions( self, observations: ClassIncrementalSetting.Observations, action_space: gym.Space, ) -> ClassIncrementalSetting.Actions: observations = observations.to(self.device) with torch.no_grad(): x = observations.x task_labels = observations.task_labels logits = avalanche_forward(self.model, x=x, task_labels=task_labels) if task_labels is not None: # If task labels are available, figure out the possible classes for # each task, and 'mask out' those so they aren't predicted. y_pred = smart_class_prediction( logits, task_labels, setting=self.setting, train=False ) else: y_pred = logits.argmax(-1) return self.target_setting.Actions(y_pred=y_pred) def set_testing(self): self.model.current_task_id = None return super().set_testing() def on_task_switch(self, task_id: Optional[int]) -> None: if self.training: # No need to tell the cl_strategy, because we call `.train` which calls # `before_training_exp` with the current exp (the current task). self.model.current_task_id = task_id else: # TODO: In Sequoia, the test 'epoch' goes through the sequence of tasks, not # necessarily in the same order as during training, while in Avalanche the # 'eval' occurs on a per-task basis. # TODO: There is a bug with task-incremental setting, where during testing # the algo might be tested on tasks it hasn't built an output layer for yet, # but building this layer requires calling `adaptation(dataset)` and this # dataset will be iterated on, which isn't great in the case of the test # env... # encountered before. # During test-time, there might be a task boundary, and we need to let the # cl_strategy and the plugins know. # TODO: Get this working, figure out what the plugins expect to retrieve # from the cl_strategy in this callback. pass def get_search_space(self, setting: ClassIncrementalSetting): return self.get_orion_space() def adapt_to_new_hparams(self, new_hparams: Dict): for k, v in new_hparams.items(): if isinstance(v, dict): raise NotImplementedError(f"todo: set hparam {k} to value {v}") setattr(self, k, v) def environment_to_experience(self, env: PassiveEnvironment, setting: SLSetting) -> Experience: """ "Converts" the PassiveEnvironments (dataloaders) from Sequoia into an Experience object usable by the Avalanche Strategies. By default, this just iterates through the environment, giving back the actions from the `get_actions` method. NOTE: You could instead train an online model here, in order to get better online performance! """ all_observations: List[Observations] = [] all_rewards: List[Rewards] = [] for batch in tqdm.tqdm(env, desc="Converting environment into TensorDataset"): observations: Observations rewards: Optional[Rewards] if isinstance(batch, Observations): observations = batch rewards = None else: assert isinstance(batch, tuple) and len(batch) == 2 observations, rewards = batch if rewards is None: # Need to send actions to the env before we can actually get the # associated Reward. Here there are (at least) three options to choose # from: # Option 1: Select action at random: action = env.action_space.sample() if observations.batch_size != action.shape[0]: action = action[: observations.batch_size] rewards: Rewards = env.send(action) # Option 2: Use the current model, in 'inference' mode: # action = self.get_actions(observations, action_space=env.action_space) # rewards: Rewards = env.send(action) # Option 3: Train an online model: # # NOTE: You might have to change this for your strategy. For instance, # # currently does not take any plugins into consideration. # self.cl_strategy.optimizer.zero_grad() # x = observations.x.to(self.cl_strategy.device) # task_labels = observations.task_labels # logits = avalanche_forward(self.model, x=x, task_labels=task_labels) # y_pred = logits.argmax(-1) # action = self.target_setting.Actions(y_pred=y_pred) # rewards: Rewards = env.send(action) # y = rewards.y.to(self.cl_strategy.device) # # Train the model: # loss = self.cl_strategy.criterion(logits, y) # loss.backward() # self.cl_strategy.optimizer.step() all_observations.append(observations) all_rewards.append(rewards) # Stack all the observations into a single `Observations` object: stacked_observations: Observations = Observations.concatenate(all_observations) stacked_rewards: Rewards = Rewards.concatenate(all_rewards) # BUG: Cuda errors, probably due to indexing into a tensor on different device # /numpy/etc. stacked_observations = stacked_observations.cpu() stacked_rewards = stacked_rewards.cpu() x = stacked_observations.x task_labels = stacked_observations.task_labels y = stacked_rewards.y return SequoiaExperience(env=env, setting=setting, x=x, y=y, task_labels=task_labels) def test_epoch(strategy, test_env: ContinualSLTestEnvironment, **kwargs): strategy.is_training = False strategy.model.eval() strategy.model.to(strategy.device) # strategy.before_eval(**kwargs) # Data Adaptation # strategy.before_eval_dataset_adaptation(**kwargs) # strategy.eval_dataset_adaptation(**kwargs) # strategy.after_eval_dataset_adaptation(**kwargs) # strategy.make_eval_dataloader(**kwargs) # strategy.before_eval_exp(**kwargs) # strategy.eval_epoch(**kwargs) test_epoch_gym_env(strategy, test_env) # strategy.after_eval_exp(**kwargs) def test_epoch_gym_env(strategy: BaseStrategy, test_env: ContinualSLTestEnvironment, **kwargs): strategy.mb_it = 0 episode = 0 strategy.experience = test_env total_steps = 0 max_episodes = 1 # Only one 'episode' / 'epoch'. while not test_env.is_closed() and episode < max_episodes: observations: Observations = test_env.reset() done = False step = 0 with tqdm.tqdm(desc="Eval epoch") as pbar: while not done: # strategy.before_eval_iteration(**kwargs) strategy.mb_x = observations.x strategy.mb_task_id = observations.task_labels strategy.mb_x = strategy.mb_x.to(strategy.device) # IDEA: Should probably return a random action whenever we have task # labels in the test loop the task id isn't a known one in the model: # strategy.before_eval_forward(**kwargs) strategy.logits = avalanche_forward( model=strategy.model, x=strategy.mb_x, task_labels=strategy.mb_task_id, ) y_pred = strategy.logits.argmax(-1) actions = Actions(y_pred=y_pred) observations, rewards, done, info = test_env.step(actions) step += 1 pbar.update() total_steps += 1 if not isinstance(done, bool): assert False, done strategy.mb_y = rewards.y.to(strategy.device) if rewards is not None else None # strategy.after_eval_forward(**kwargs) strategy.mb_it += 1 strategy.loss = strategy.criterion(strategy.logits, strategy.mb_y) # strategy.after_eval_iteration(**kwargs) pbar.set_postfix( { "Episode": f"{episode}/{max_episodes}", "step": f"{step}", "total_steps": f"{total_steps}", "loss": f"{strategy.loss.item()}", } ) episode += 1 ================================================ FILE: sequoia/methods/avalanche_methods/base_test.py ================================================ import inspect from inspect import Signature, _empty, getsourcefile from typing import ClassVar, List, Optional, Type import pytest import tqdm from avalanche.models import SimpleCNN, SimpleMLP from avalanche.models.utils import avalanche_forward from avalanche.training.strategies import BaseStrategy from sequoia.common.config import Config from sequoia.conftest import slow from sequoia.methods.method_test import MethodTests from sequoia.settings.sl import ClassIncrementalSetting, SLSetting from sequoia.settings.sl.incremental.objects import Observations, Rewards from .base import AvalancheMethod from .experience import SequoiaExperience from .patched_models import MTSimpleCNN, MTSimpleMLP class _TestAvalancheMethod(MethodTests): Method: ClassVar[Type[AvalancheMethod]] = AvalancheMethod # Names of (hyper-)parameters which are allowed to have a different default value in # Sequoia compared to their implementations in Avalanche. ignored_parameter_differences: ClassVar[List[str]] = [ "plugins", "device", "eval_mb_size", "criterion", "train_mb_size", "train_epochs", "evaluator", ] @classmethod @pytest.fixture(params=[SimpleCNN, SimpleMLP, MTSimpleCNN, MTSimpleMLP]) def method(cls, config: Config, request) -> AvalancheMethod: """Fixture that returns the Method instance to use when testing/debugging.""" model_type = request.param return cls.Method(model=model_type, train_mb_size=10, train_epochs=1) def test_hparams_have_same_defaults_as_in_avalanche(self): strategy_type: Type[BaseStrategy] = self.Method.strategy_class method = self.Method() strategy_constructor: Signature = inspect.signature(strategy_type.__init__) strategy_init_params = strategy_constructor.parameters # TODO: Use the plugin constructor as the reference, rather than the Strategy # constructor. # plugin_constructor for parameter_name, parameter in strategy_init_params.items(): if parameter.default is _empty: continue assert hasattr(method, parameter_name) method_value = getattr(method, parameter_name) # Ignore mismatches in some parameters, like `device`. if parameter_name in self.ignored_parameter_differences: continue assert method_value == parameter.default, ( f"{self.Method.__name__} in Sequoia has different default value for " f"hyper-parameter '{parameter_name}' than in Avalanche: \n" f"\t{method_value} != {parameter.default}\n" f"Path to sequoia implementation: {getsourcefile(self.Method)}\n" f"Path to SB3 implementation: {getsourcefile(strategy_type)}\n" ) def validate_results( self, setting: SLSetting, method: AvalancheMethod, results: SLSetting.Results, ) -> None: assert results assert results.objective # TODO: Set some 'reasonable' bounds on the performance here, depending on the # setting/dataset.# def validate_results @slow @pytest.mark.timeout(60) def test_short_sl_track( self, method: AvalancheMethod, short_sl_track_setting: ClassIncrementalSetting, config: Config, ): # Use the same batch size as the setting, since it's shorter than usual. method.train_mb_size = short_sl_track_setting.batch_size results = short_sl_track_setting.apply(method, config=config) # TODO: Set up a more reasonable bound on the expected performance. For now this # is fine as we're just debugging: the test passes as long as there is a results # object that contains a non-zero online performance (meaning that the setting # was monitoring training performance correctly). assert 0 < results.average_online_performance.objective assert 0 < results.average_final_performance.objective def test_warning_if_environment_to_experience_isnt_overwritten(short_sl_track_setting): """When""" method = AvalancheMethod() assert short_sl_track_setting.monitor_training_performance with pytest.warns(UserWarning, match="chance accuracy"): method.configure(short_sl_track_setting) class MyDummyMethod(AvalancheMethod): def environment_to_experience(self, env, setting): all_observations: List[Observations] = [] all_rewards: List[Rewards] = [] for batch in tqdm.tqdm(env, desc="Converting environment into TensorDataset"): observations: Observations rewards: Optional[Rewards] if isinstance(batch, Observations): observations = batch rewards = None else: assert isinstance(batch, tuple) and len(batch) == 2 observations, rewards = batch if rewards is None: # Need to send actions to the env before we can actually get the # associated Reward. Here there are (at least) three options to choose # from: # Option 1: Select action at random: # action = env.action_space.sample() # if observations.batch_size != action.shape[0]: # action = action[: observations.batch_size] # rewards: Rewards = env.send(action) # Option 2: Use the current model, in 'inference' mode: # action = self.get_actions(observations, action_space=env.action_space) # rewards: Rewards = env.send(action) # Option 3: Train an online model: # NOTE: You might have to change this for your strategy. For instance, # currently does not take any plugins into consideration. self.cl_strategy.optimizer.zero_grad() x = observations.x.to(self.cl_strategy.device) task_labels = observations.task_labels logits = avalanche_forward(self.model, x=x, task_labels=task_labels) y_pred = logits.argmax(-1) action = self.target_setting.Actions(y_pred=y_pred) rewards: Rewards = env.send(action) y = rewards.y.to(self.cl_strategy.device) # Train the model: loss = self.cl_strategy.criterion(logits, y) loss.backward() self.cl_strategy.optimizer.step() all_observations.append(observations) all_rewards.append(rewards) # Stack all the observations into a single `Observations` object: stacked_observations: Observations = Observations.concatenate(all_observations) x = stacked_observations.x task_labels = stacked_observations.task_labels stacked_rewards: Rewards = Rewards.concatenate(all_rewards) y = stacked_rewards.y return SequoiaExperience(env=env, setting=setting, x=x, y=y, task_labels=task_labels) def test_no_warning_if_environment_to_experience_is_overwritten(short_sl_track_setting): """When the Method doesn't overwrite the `environment_to_experience` method, we raise a Warning to let the User know that they can only expect chance online accuracy. """ method = MyDummyMethod() assert short_sl_track_setting.monitor_training_performance with pytest.warns(None) as record: method.configure(short_sl_track_setting) assert len(record) == 0 ================================================ FILE: sequoia/methods/avalanche_methods/conftest.py ================================================ from pathlib import Path import pytest import torch from sklearn.datasets import make_classification from sklearn.model_selection import train_test_split from torch.utils.data import TensorDataset from sequoia.common.config import Config collect_ignore = [] collect_ignore_glob = [] try: from avalanche.training.strategies import BaseStrategy # type: ignore except ImportError: # pytest.skip(reason="Needs avalanche", allow_module_level=True) collect_ignore_glob.append("sequoia/methods/avalanche/**.py") # FIXME: Overwriting the 'config' fixture from before so it's 'session' scoped instead. @pytest.fixture(scope="session") def config(tmp_path_factory): test_log_dir = tmp_path_factory.mktemp("test_log_dir") return Config(debug=True, seed=123, log_dir=test_log_dir) @pytest.fixture(scope="session") def fast_scenario(use_task_labels=False, shuffle=True): """Copied directly from Avalanche in "tests/unit_tests_utils.py". Not used anywhere atm, but could be used as inspiration for writing quicker tests in Sequoia. """ n_samples_per_class = 100 dataset = make_classification( n_samples=10 * n_samples_per_class, n_classes=10, n_features=6, n_informative=6, n_redundant=0, ) X = torch.from_numpy(dataset[0]).float() y = torch.from_numpy(dataset[1]).long() train_X, test_X, train_y, test_y = train_test_split( X, y, train_size=0.6, shuffle=True, stratify=y ) from avalanche.benchmarks import nc_benchmark # type: ignore train_dataset = TensorDataset(train_X, train_y) test_dataset = TensorDataset(test_X, test_y) my_nc_benchmark = nc_benchmark( train_dataset, test_dataset, 5, task_labels=use_task_labels, shuffle=shuffle ) return my_nc_benchmark ================================================ FILE: sequoia/methods/avalanche_methods/cwr_star.py ================================================ """ Method based on CWRStar from [Avalanche](https://github.com/ContinualAI/avalanche). See `avalanche.training.plugins.cwr_star.CWRStarPlugin` or `avalanche.training.strategies.strategy_wrappers.CWRStar` for more info. """ from dataclasses import dataclass from typing import ClassVar, Optional, Type from avalanche.training.strategies import BaseStrategy, CWRStar from sequoia.methods import register_method from sequoia.settings.sl import TaskIncrementalSLSetting from .base import AvalancheMethod @register_method @dataclass class CWRStarMethod(AvalancheMethod[CWRStar]): """CWRStar strategy from Avalanche. See CWRStar plugin for details. This strategy does not use task identities. See the parent class `AvalancheMethod` for the other hyper-parameters and methods. """ # Name of the CWR layer. Defaults to None, which means that the last fully connected # layer will be used. cwr_layer_name: Optional[str] = None strategy_class: ClassVar[Type[BaseStrategy]] = CWRStar if __name__ == "__main__": from simple_parsing import ArgumentParser setting = TaskIncrementalSLSetting( dataset="mnist", nb_tasks=5, monitor_training_performance=True ) # Create the Method, either manually or through the command-line: parser = ArgumentParser(__doc__) parser.add_arguments(CWRStarMethod, "method") args = parser.parse_args() method: CWRStarMethod = args.method results = setting.apply(method) ================================================ FILE: sequoia/methods/avalanche_methods/cwr_star_test.py ================================================ """ WIP: Tests for the CWRStar Method. For now this only inherits the tests from the AvalancheMethod class. """ from typing import ClassVar, Type from .base import AvalancheMethod from .base_test import _TestAvalancheMethod from .cwr_star import CWRStarMethod class TestCWRStarMethod(_TestAvalancheMethod): Method: ClassVar[Type[AvalancheMethod]] = CWRStarMethod ================================================ FILE: sequoia/methods/avalanche_methods/ewc.py ================================================ """ Method based on EWC from [Avalanche](https://github.com/ContinualAI/avalanche). See `avalanche.training.plugins.ewc.EWCPlugin` or `avalanche.training.strategies.strategy_wrappers.EWC` for more info. """ from dataclasses import dataclass from typing import ClassVar, Dict, Optional, Type, Union from avalanche.models import SimpleCNN, SimpleMLP from avalanche.training.strategies import EWC, BaseStrategy from simple_parsing import ArgumentParser from simple_parsing.helpers import choice from simple_parsing.helpers.hparams import categorical, uniform from torch import nn from sequoia.methods import register_method from sequoia.settings.sl import TaskIncrementalSLSetting from .base import AvalancheMethod @register_method @dataclass class EWCMethod(AvalancheMethod[EWC]): """ Elastic Weight Consolidation (EWC) strategy from Avalanche. See EWC plugin for details. This strategy does not use task identities. See the parent class `AvalancheMethod` for the other hyper-parameters and methods. """ strategy_class: ClassVar[Type[BaseStrategy]] = EWC # Class Variable to hold the types of models available as options for the `model` # field below. available_models: ClassVar[Dict[str, Type[nn.Module]]] = { "simple_cnn": SimpleCNN, "simple_mlp": SimpleMLP, # "mt_simple_cnn": MTSimpleCNN, # These two still have some bugs in their loss # "mt_simple_mlp": MTSimpleMLP, # These two still have some bugs in their loss } # The model. model: Union[nn.Module, Type[nn.Module]] = choice(available_models, default=SimpleCNN) # Hyperparameter to weigh the penalty inside the total loss. The larger the lambda, # the larger the regularization. ewc_lambda: float = uniform(1e-3, 1.0, default=0.1) # todo: set the right value to use here. # `separate` to keep a separate penalty for each previous experience. `online` to # keep a single penalty summed with a decay factor over all previous tasks. mode: str = categorical("separate", "online", default="separate") # Used only if `mode` is 'online'. It specify the decay term of the # importance matrix. decay_factor: Optional[float] = uniform(0.0, 1.0, default=0.9) # if True, keep in memory both parameter values and importances for all previous # task, for all modes. If False, keep only last parameter values and importances. If # mode is `separate`, the value of `keep_importance_data` is set to be True. keep_importance_data: bool = categorical(True, False, default=False) if __name__ == "__main__": setting = TaskIncrementalSLSetting( dataset="mnist", nb_tasks=5, monitor_training_performance=True ) # Create the Method, either manually or through the command-line: parser = ArgumentParser(__doc__) parser.add_arguments(EWCMethod, "method") args = parser.parse_args() method: EWCMethod = args.method results = setting.apply(method) ================================================ FILE: sequoia/methods/avalanche_methods/ewc_test.py ================================================ """ WIP: Tests for the EWC Method. For now this only inherits the tests from the AvalancheMethod class. """ from typing import ClassVar, List, Type import pytest from avalanche.models import SimpleCNN, SimpleMLP from torch.nn import Module from sequoia.common import Config from sequoia.conftest import xfail_param from sequoia.settings.sl import IncrementalSLSetting, TaskIncrementalSLSetting from .base import AvalancheMethod from .base_test import _TestAvalancheMethod from .ewc import EWCMethod from .patched_models import MTSimpleCNN, MTSimpleMLP class TestEWCMethod(_TestAvalancheMethod): Method: ClassVar[Type[AvalancheMethod]] = EWCMethod ignored_parameter_differences: ClassVar[ List[str] ] = _TestAvalancheMethod.ignored_parameter_differences + [ "decay_factor", ] @classmethod @pytest.fixture( params=[ SimpleCNN, SimpleMLP, xfail_param( MTSimpleCNN, reason=( "Shape Mismatch between the saved parameter importance and the " "current weight tensor in EWC plugin." ), ), xfail_param( MTSimpleMLP, reason=( "Shape Mismatch between the saved parameter importance and the " "current weight tensor in EWC plugin." ), ), ] ) def method(cls, config: Config, request) -> AvalancheMethod: """Fixture that returns the Method instance to use when testing/debugging.""" model_type = request.param return cls.Method(model=model_type, train_mb_size=10, train_epochs=1) @pytest.mark.timeout(60) @pytest.mark.parametrize( "model_type", [ SimpleCNN, SimpleMLP, # MTSimpleCNN, xfail_param( MTSimpleCNN, reason=( "Shape Mismatch between the saved parameter importance and the " "current weight tensor in EWC plugin." ), ), # MTSimpleMLP, xfail_param( MTSimpleMLP, reason=( "Shape Mismatch between the saved parameter importance and the " "current weight tensor in EWC plugin." ), ), ], ) def test_short_task_incremental_setting( self, model_type: Type[Module], short_task_incremental_setting: TaskIncrementalSLSetting, config: Config, ): method = self.Method(model=model_type, train_mb_size=10, train_epochs=1) results = short_task_incremental_setting.apply(method, config) assert 0.05 < results.average_final_performance.objective @pytest.mark.timeout(60) @pytest.mark.parametrize( "model_type", [ SimpleCNN, SimpleMLP, xfail_param( MTSimpleCNN, reason=( "Shape Mismatch between the saved parameter importance and the " "current weight tensor in EWC plugin." ), ), # MTSimpleMLP, xfail_param( MTSimpleMLP, reason=( "Shape Mismatch between the saved parameter importance and the " "current weight tensor in EWC plugin." ), ), ], ) def test_short_class_incremental_setting( self, model_type: Type[Module], short_class_incremental_setting: IncrementalSLSetting, config: Config, ): method = self.Method(model=model_type, train_mb_size=10, train_epochs=1) results = short_class_incremental_setting.apply(method, config) assert 0.05 < results.average_final_performance.objective # @pytest.mark.timeout(60) # @pytest.mark.parametrize( # "model_type", # [ # SimpleCNN, # SimpleMLP, # xfail_param( # MTSimpleCNN, # reason=( # "Shape Mismatch between the saved parameter importance and the " # "current weight tensor in EWC plugin." # ), # ), # # MTSimpleMLP, # xfail_param( # MTSimpleMLP, # reason=( # "Shape Mismatch between the saved parameter importance and the " # "current weight tensor in EWC plugin." # ), # ), # ], # ) # def test_short_continual_sl_setting( # self, # model_type: Type[Module], # short_continual_sl_setting: ContinualSLSetting, # config: Config, # ): # super().test_short_continual_sl_setting( # model_type=model_type, # short_continual_sl_setting=short_continual_sl_setting, # config=config, # ) # @pytest.mark.timeout(60) # @pytest.mark.parametrize( # "model_type", # [ # SimpleCNN, # SimpleMLP, # xfail_param( # MTSimpleCNN, # reason=( # "Shape Mismatch between the saved parameter importance and the " # "current weight tensor in EWC plugin." # ), # ), # # MTSimpleMLP, # xfail_param( # MTSimpleMLP, # reason=( # "Shape Mismatch between the saved parameter importance and the " # "current weight tensor in EWC plugin." # ), # ), # ], # ) # def test_short_discrete_task_agnostic_sl_setting( # self, # model_type: Type[Module], # short_discrete_task_agnostic_sl_setting: DiscreteTaskAgnosticSLSetting, # config: Config, # ): # super().test_short_discrete_task_agnostic_sl_setting( # model_type=model_type, # short_discrete_task_agnostic_sl_setting=short_discrete_task_agnostic_sl_setting, # config=config, # ) ================================================ FILE: sequoia/methods/avalanche_methods/experience.py ================================================ """ 'Wrapper' around a PassiveEnvironment from Sequoia, disguising it as an 'Experience' from Avalanche. """ from typing import List, Optional import tqdm from avalanche.benchmarks.scenarios import Experience from avalanche.benchmarks.utils.avalanche_dataset import AvalancheDataset, AvalancheDatasetType from torch import Tensor from torch.utils.data import TensorDataset from sequoia.common.gym_wrappers.utils import IterableWrapper from sequoia.settings.sl import IncrementalSLSetting, PassiveEnvironment, SLSetting from sequoia.settings.sl.incremental.objects import Observations, Rewards class SequoiaExperience(IterableWrapper, Experience): def __init__( self, env: PassiveEnvironment, setting: IncrementalSLSetting, x: Tensor = None, y: Tensor = None, task_labels: Tensor = None, ): super().__init__(env=env) self.setting = setting self.type: str if isinstance(setting, IncrementalSLSetting): self.task_id = setting.current_task_id else: # No known task, or we don't have access to the task ID, so just consider # this to come from the first task. self.task_id = 0 if env is setting.train_env: self.type = "Train" self.transforms = setting.train_transforms elif env is setting.val_env: self.type = "Valid" self.transforms = setting.val_transforms else: self.type = "Test" assert env is setting.test_env self.transforms = setting.test_transforms self.name = f"{self.type}_{self.task_id}" if x is None and y is None and task_labels is None: # Collect the x, y, and perhaps t if they aren't provided. all_observations: List[Observations] = [] all_rewards: List[Rewards] = [] for batch in tqdm.tqdm(self, desc="Converting environment into TensorDataset"): observations: Observations rewards: Optional[Rewards] if isinstance(batch, Observations): observations = batch rewards = None else: assert isinstance(batch, tuple) and len(batch) == 2 observations, rewards = batch if rewards is None: # Need to send actions to the env before we can actually get the # associated Reward. # Here we sample a random action (no other choice really..) and so we # are going to get bad results in case the online performance is being # evaluated. action = self.env.action_space.sample() if observations.batch_size != action.shape[0]: action = action[: observations.batch_size] rewards = self.env.send(action) all_observations.append(observations) all_rewards.append(rewards) # TODO: This will be absolutely unfeasable for larger dataset like ImageNet. stacked_observations: Observations = Observations.concatenate(all_observations) x = stacked_observations.x task_labels = stacked_observations.task_labels assert all( y_i is not None for y in all_rewards for y_i in y ), "Need fully labeled train dataset for now." stacked_rewards: Rewards = Rewards.concatenate(all_rewards) y = stacked_rewards.y if task_labels is not None and all(t is None for t in task_labels): # The task labels are None, even at training time, which indicates this # is probably a `ContinualSLSetting` task_labels = None elif isinstance(task_labels, Tensor): task_labels = task_labels.cpu().numpy().tolist() dataset = TensorDataset(x, y) self._tensor_dataset = dataset self._dataset = AvalancheDataset( dataset=dataset, task_labels=task_labels, targets=y.tolist(), dataset_type=AvalancheDatasetType.CLASSIFICATION, ) # self.task_pattern_indices = {} # self.task_set = ... # class DummyDataset(AvalancheDataset): # pass # def train(self): # return self # self._dataset = self # self.tasks_pattern_indices = {} #dict({0: np.arange(len(self._dataset))}) # self.task_set = ... #_TaskSubsetDict(self._dataset) # self._dataset = env # from avalanche.benchmarks import GenericScenarioStream # class FakeStream(GenericScenarioStream): # pass # self.origin_stream = FakeStream("train", scenario="whatever") # self.origin_stream.name = "train" @property def dataset(self) -> AvalancheDataset: return self._dataset @dataset.setter def dataset(self, value: AvalancheDataset) -> None: self._dataset = value @property def task_label(self): """ The task label. This value will never have value "None". However, for scenarios that don't produce task labels a placeholder value like 0 is usually set. Beware that this field is meant as a shortcut to obtain a unique task label: it assumes that only patterns labeled with a single task label are present. If this experience contains patterns from multiple tasks, accessing this property will result in an exception. """ if not self.setting.task_labels_at_test_time: return 0 if self.type == "Test" and self.setting.task_labels_at_test_time: raise RuntimeError("More than one tasks present, can't use this property.") return self.task_id @property def task_labels(self): return self._tensor_dataset.tensors[-1] @property def current_experience(self): # Return the index of the return self.task_id @property def origin_stream(self) -> SLSetting: # NOTE: This class DummyStream(list): name = self.name # raise NotImplementedError return DummyStream() # def train(self): # return self ================================================ FILE: sequoia/methods/avalanche_methods/gdumb.py ================================================ """ Method based on GDumb from [Avalanche](https://github.com/ContinualAI/avalanche). See `avalanche.training.plugins.gdumb.GDumbPlugin` or `avalanche.training.strategies.strategy_wrappers.GDumb` for more info. BUG: There appears to be a bug in the GDumb plugin, caused by a mismatch in the tensor shapes when concatenating them into a TensorDataset, when batch size > 1. """ from collections import defaultdict from dataclasses import dataclass from typing import Any, ClassVar, Dict, List, Optional, Tuple, Type import torch import tqdm from avalanche.benchmarks.utils import AvalancheConcatDataset from avalanche.training.plugins.gdumb import GDumbPlugin as _GDumbPlugin from avalanche.training.strategies import BaseStrategy, GDumb from simple_parsing import ArgumentParser from simple_parsing.helpers.hparams import uniform from torch import Tensor from torch.utils.data import TensorDataset from sequoia.methods import register_method from sequoia.settings.sl import ClassIncrementalSetting, TaskIncrementalSLSetting from sequoia.utils.logging_utils import get_logger from .base import AvalancheMethod logger = get_logger(__name__) class GDumbPlugin(_GDumbPlugin): """Patched version of the GDumbPlugin from Avalanche. The base implementation is quite inefficient: for each new item, it does an entire concatenation with the current dataset. This uses lists instead, and only concatenates once. It also uses the task labels from each sample in the dataset, rather than from the current experience, as there might be more than one task in the dataset. """ def __init__(self, mem_size: int = 200): super().__init__(mem_size=mem_size) self.ext_mem: Dict[Any, Tuple[List[Tensor], List[Tensor]]] = {} # count occurrences for each class self.counter: Dict[Any, Dict[Any, int]] = {} def after_train_dataset_adaptation(self, strategy: BaseStrategy, **kwargs): """Before training we make sure to organize the memory following GDumb approach and updating the dataset accordingly. """ # for each pattern, add it to the memory or not dataset = strategy.experience.dataset pbar = tqdm.tqdm(dataset, desc="Exhausting dataset to create GDumb buffer") for pattern, target, task_id in pbar: target = torch.as_tensor(target) target_value = target.item() if len(pattern.size()) == 1: pattern = pattern.unsqueeze(0) current_counter = self.counter.setdefault(task_id, defaultdict(int)) current_mem = self.ext_mem.setdefault(task_id, ([], [])) if current_counter == {}: # any positive (>0) number is ok patterns_per_class = 1 else: patterns_per_class = int(self.mem_size / len(current_counter.keys())) if ( target_value not in current_counter or current_counter[target_value] < patterns_per_class ): # add new pattern into memory if sum(current_counter.values()) >= self.mem_size: # full memory: replace item from most represented class # with current pattern to_remove = max(current_counter, key=current_counter.get) # dataset_size = len(current_mem) # for j in range(dataset_size): # if current_mem.tensors[1][j].item() == to_remove: # current_mem.tensors[0][j] = pattern # current_mem.tensors[1][j] = target # break dataset_size = len(current_mem[0]) for j in range(dataset_size): if current_mem[1][j].item() == to_remove: current_mem[0][j] = pattern current_mem[1][j] = target break current_counter[to_remove] -= 1 else: # memory not full: add new pattern current_mem[0].append(pattern) current_mem[1].append(target) # Indicate that we've changed the number of stored instances of this # class. current_counter[target_value] += 1 task_datasets: Dict[Any, TensorDataset] = {} for task_id, task_mem_tuple in self.ext_mem.items(): patterns, targets = task_mem_tuple task_dataset = TensorDataset(torch.stack(patterns, dim=0), torch.stack(targets, dim=0)) task_datasets[task_id] = task_dataset logger.debug( f"There are {len(task_dataset)} entries from task {task_id} in the new " f"dataset." ) adapted_dataset = AvalancheConcatDataset(task_datasets.values()) strategy.adapted_dataset = adapted_dataset @register_method @dataclass class GDumbMethod(AvalancheMethod[GDumb]): """GDumb strategy from Avalanche. See GDumbPlugin for more details. This strategy does not use task identities. See the parent class `AvalancheMethod` for the other hyper-parameters and methods. """ name: ClassVar[str] = "gdumb" # replay buffer size. mem_size: int = uniform(100, 1_000, default=200) # The number of training epochs. train_epochs: int = uniform(1, 100, default=20) strategy_class: ClassVar[Type[BaseStrategy]] = GDumb def create_cl_strategy(self, setting: ClassIncrementalSetting) -> GDumb: strategy = super().create_cl_strategy(setting) # TODO: Replace the GDumbPlugin with our own version, with the same parameters. old_gdumb_plugin_index: Optional[int] = None for i, plugin in enumerate(strategy.plugins): if isinstance(plugin, _GDumbPlugin): old_gdumb_plugin_index = i break if old_gdumb_plugin_index is None: raise RuntimeError("Couldn't find the Strategy's GDumb plugin!") old_gdumb_plugin: _GDumbPlugin = strategy.plugins.pop(old_gdumb_plugin_index) logger.info("Replacing the GDumbPlugin with our 'patched' version.") new_gdumb_plugin = GDumbPlugin(mem_size=old_gdumb_plugin.mem_size) # NOTE: Might not be necessarily, since those should be empty, but here we also # copy the state from the old plugin to the new one. new_gdumb_plugin.ext_mem = old_gdumb_plugin.ext_mem new_gdumb_plugin.counter = old_gdumb_plugin.counter strategy.plugins.insert(old_gdumb_plugin_index, new_gdumb_plugin) return strategy if __name__ == "__main__": setting = TaskIncrementalSLSetting( dataset="mnist", nb_tasks=5, monitor_training_performance=True ) # Create the Method, either manually or through the command-line: parser = ArgumentParser(__doc__) parser.add_arguments(GDumbMethod, "method") args = parser.parse_args() method: GDumbMethod = args.method results = setting.apply(method) ================================================ FILE: sequoia/methods/avalanche_methods/gdumb_test.py ================================================ """ WIP: Tests for the GDumb Method. For now this only inherits the tests from the AvalancheMethod class. """ from typing import ClassVar, Type from .base import AvalancheMethod from .base_test import _TestAvalancheMethod from .gdumb import GDumbMethod class TestGDumbMethod(_TestAvalancheMethod): Method: ClassVar[Type[AvalancheMethod]] = GDumbMethod ================================================ FILE: sequoia/methods/avalanche_methods/gem.py ================================================ """ Method based on GEM from [Avalanche](https://github.com/ContinualAI/avalanche). See `avalanche.training.plugins.gem.GEMPlugin` or `avalanche.training.strategies.strategy_wrappers.GEM` for more info. """ from dataclasses import dataclass from typing import ClassVar, Type from avalanche.training.strategies import GEM, BaseStrategy from simple_parsing import ArgumentParser from simple_parsing.helpers.hparams import uniform from sequoia.methods import register_method from sequoia.settings.sl import TaskIncrementalSLSetting from .base import AvalancheMethod @register_method @dataclass class GEMMethod(AvalancheMethod[GEM]): """Gradient Episodic Memory (GEM) strategy from Avalanche. See GEM plugin for details. This strategy does not use task identities. See the parent class `AvalancheMethod` for the other hyper-parameters and methods. """ # number of patterns per experience in the memory patterns_per_exp: int = uniform(10, 1000, default=100) # Offset to add to the projection direction in order to favour backward transfer # (gamma in original paper). memory_strength: float = uniform(1e-2, 1.0, default=0.5) strategy_class: ClassVar[Type[BaseStrategy]] = GEM if __name__ == "__main__": setting = TaskIncrementalSLSetting( dataset="mnist", nb_tasks=5, monitor_training_performance=True ) # Create the Method, either manually or through the command-line: parser = ArgumentParser(__doc__) parser.add_arguments(GEMMethod, "method") args = parser.parse_args() method: GEMMethod = args.method results = setting.apply(method) ================================================ FILE: sequoia/methods/avalanche_methods/gem_test.py ================================================ """ WIP: Tests for the GEM Method. For now this only inherits the tests from the AvalancheMethod class. """ from typing import ClassVar, Type from .base import AvalancheMethod from .base_test import _TestAvalancheMethod from .gem import GEMMethod class TestGEMMethod(_TestAvalancheMethod): Method: ClassVar[Type[AvalancheMethod]] = GEMMethod ================================================ FILE: sequoia/methods/avalanche_methods/lwf.py ================================================ """ Method based on LwF from [Avalanche](https://github.com/ContinualAI/avalanche). See `avalanche.training.plugins.lwf.LwFPlugin` or `avalanche.training.strategies.strategy_wrappers.LwF` for more info. """ from dataclasses import dataclass from typing import ClassVar, Optional, Sequence, Type, Union from avalanche.training.plugins.lwf import LwFPlugin as LwFPlugin_ from avalanche.training.strategies import LwF from simple_parsing.helpers.hparams import uniform from torch import Tensor from sequoia.methods import register_method from sequoia.settings.sl import SLSetting, TaskIncrementalSLSetting from .base import AvalancheMethod class LwFPlugin(LwFPlugin_): """Patching a little error that happens in the 'LwFPlugin' which happens when a Multi-Task model is used, and when we grow the output space after each task. """ def _distillation_loss(self, out: Tensor, prev_out: Tensor) -> Tensor: """ Compute distillation loss between output of the current model and and output of the previous (saved) model. """ # Little "patch" to make sure this doesn't break if the shapes aren't exactly # the same: if out.shape != prev_out.shape: prev_outputs = prev_out.shape[-1] current_outputs = out.shape[-1] assert prev_outputs < current_outputs # Only consider the loss for the overlapping classes. We assume that the # first columns are for the same class, so this should be fine. out = out[..., :prev_outputs] return super()._distillation_loss(out=out, prev_out=prev_out) @register_method @dataclass class LwFMethod(AvalancheMethod[LwF]): """Learning without Forgetting strategy from Avalanche. See LwF plugin for details. This strategy does not use task identities. See the parent class `AvalancheMethod` for the other hyper-parameters and methods. """ # changing the 'name' in this case here, because the default name would be # 'lw_f'. name: ClassVar[str] = "lwf" # distillation hyperparameter. It can be either a float number or a list containing # alpha for each experience. alpha: Union[float, Sequence[float]] = uniform( 1e-2, 1, default=1 ) # TODO: Check if the range makes sense. # softmax temperature for distillation temperature: float = uniform(1, 10, default=2) # TODO: Check if the range makes sense. strategy_class: ClassVar[Type[LwF]] = LwF def create_cl_strategy(self, setting: SLSetting) -> LwF: strategy = super().create_cl_strategy(setting) # Find and replace the 'LwFPlugin' with our "patched" version: plugin_index: Optional[int] = None for i, plugin in enumerate(strategy.plugins): if type(plugin) is LwFPlugin_: plugin_index = i break assert plugin_index is not None, "LwF strategy should have an LwF Plugin, no?" assert isinstance(plugin_index, int) old_plugin: LwFPlugin_ = strategy.plugins[plugin_index] new_plugin = LwFPlugin(alpha=old_plugin.alpha, temperature=old_plugin.temperature) new_plugin.prev_model = old_plugin.prev_model strategy.plugins[plugin_index] = new_plugin return strategy if __name__ == "__main__": from simple_parsing import ArgumentParser setting = TaskIncrementalSLSetting( dataset="mnist", nb_tasks=5, monitor_training_performance=True ) # Create the Method, either manually or through the command-line: parser = ArgumentParser(__doc__) parser.add_arguments(LwFMethod, "method") args = parser.parse_args() method: LwFMethod = args.method results = setting.apply(method) ================================================ FILE: sequoia/methods/avalanche_methods/lwf_test.py ================================================ """ WIP: Tests for the LwF Method. For now this only inherits the tests from the AvalancheMethod class. """ from typing import ClassVar, Type from .base import AvalancheMethod from .base_test import _TestAvalancheMethod from .lwf import LwFMethod class TestLwFMethod(_TestAvalancheMethod): Method: ClassVar[Type[AvalancheMethod]] = LwFMethod ================================================ FILE: sequoia/methods/avalanche_methods/naive.py ================================================ """ 'Naive' method from [Avalanche](https://github.com/ContinualAI/avalanche). See `avalanche.training.strategies.Naive` for more info. """ from typing import ClassVar, Type from avalanche.training.strategies import BaseStrategy, Naive from sequoia.settings.sl import TaskIncrementalSLSetting from .base import AvalancheMethod class NaiveMethod(AvalancheMethod[Naive]): """'Naive' Strategy from [Avalanche](https://github.com/ContinualAI/avalanche). The simplest (and least effective) Continual Learning strategy. Naive just incrementally fine tunes a single model without employing any method to contrast the catastrophic forgetting of previous knowledge. This strategy does not use task identities. Naive is easy to set up and its results are commonly used to show the worst performing baseline. See the parent class `AvalancheMethod` for the other hyper-parameters and methods. """ strategy_class: ClassVar[Type[BaseStrategy]] = Naive if __name__ == "__main__": setting = TaskIncrementalSLSetting( dataset="mnist", nb_tasks=5, monitor_training_performance=True ) method = NaiveMethod() results = setting.apply(method) ================================================ FILE: sequoia/methods/avalanche_methods/naive_test.py ================================================ """ WIP: Tests for the Naive Method. For now this only inherits the tests from the AvalancheMethod class. """ from typing import ClassVar, Type from .base import AvalancheMethod from .base_test import _TestAvalancheMethod from .naive import NaiveMethod class TestNaiveMethod(_TestAvalancheMethod): Method: ClassVar[Type[AvalancheMethod]] = NaiveMethod ================================================ FILE: sequoia/methods/avalanche_methods/patched_models.py ================================================ """ Patch for the multi-task models in Avalanche, so that we can evaluate on future tasks, by selecting random prediction. """ import warnings from abc import abstractmethod from typing import Any, List, Optional import torch from avalanche.models import MTSimpleCNN as _MTSimpleCNN from avalanche.models import MTSimpleMLP as _MTSimpleMLP from avalanche.models import MultiHeadClassifier as _MultiHeadClassifier from avalanche.models.dynamic_modules import MultiTaskModule from torch import Tensor from torch.nn import functional as F from sequoia.utils import get_logger logger = get_logger(__name__) class PatchedMultiTaskModule(MultiTaskModule): @property @abstractmethod def known_task_ids(self) -> List[Any]: pass def task_inference_forward_pass(self, x: Tensor) -> Tensor: """Forward pass with a simple form of task inference.""" # We don't have access to task labels (`task_labels` is None). # --> Perform a simple kind of task inference: # 1. Perform a forward pass with each task's output head; # 2. Merge these predictions into a single prediction somehow. # NOTE: This assumes that the observations are batched. # These are used below to indicate the shape of the different tensors. B = x.shape[0] T = len(self.known_task_ids) # N = self.action_space.n # Tasks encountered previously and for which we have an output head. # TODO: This assumes that the keys of the ModuleDict are integers. known_task_ids: List[int] = list(int(t) for t in self.known_task_ids) assert known_task_ids # Placeholder for the predictions from each output head for each item in the # batch task_outputs = [None for _ in known_task_ids] # [T, B, N] # Get the forward pass for each task. for task_id in known_task_ids: # Create 'fake' Observations for this forward pass, with 'fake' task labels. # NOTE: We do this so we can call `self.forward` and not get an infinite # recursion. task_labels = torch.full([B], task_id, device=x.device, dtype=int) # task_observations = replace(observations, task_labels=task_labels) # Setup the model for task `task_id`, and then do a forward pass. task_forward_pass = self.forward(x, task_labels=task_labels) task_outputs[task_id] = task_forward_pass if len(task_outputs) == 1: return task_outputs[0] N = max(task_output.shape[-1] for task_output in task_outputs) # 'Merge' the predictions from each output head using some kind of task # inference. assert all(item is not None for item in task_outputs) # Stack the predictions (logits) from each output head. # NOTE: Here in Avalanche it's possible that each output head's output had a # different shape. Therefore we need to handle it like a list of tensors rather # than a stacked tensor. if all(not task_output.shape[-1] == N for task_output in task_outputs): raise NotImplementedError("TODO: Output heads didn't give outputs of the same shape!") # logits_from_each_head = task_outputs # probs_from_each_head = [ # torch.softmax(head_logits, dim=-1) for head_logits in logits_from_each_head # ] # IDEA: Add zeros to the outputs of a different shape. else: logits_from_each_head = torch.stack(task_outputs, dim=1) # Normalize the logits from each output head with softmax. # Example with batch size of 1, output heads = 2, and classes = 4: # logits from each head: [[[123, 456, 123, 123], [1, 1, 2, 1]]] # 'probs' from each head: [[[0.1, 0.6, 0.1, 0.1], [0.2, 0.2, 0.4, 0.2]]] probs_from_each_head = torch.softmax(logits_from_each_head, dim=-1) assert probs_from_each_head.shape == (B, T, N) # Simple kind of task inference: # For each item in the batch, use the class that has the highest probability # accross all output heads. max_probs_across_heads, chosen_head_per_class = probs_from_each_head.max(dim=1) assert max_probs_across_heads.shape == (B, N) assert chosen_head_per_class.shape == (B, N) # Example (continued): # max probs across heads: [[0.2, 0.6, 0.4, 0.2]] # chosen output heads per class: [[1, 0, 1, 1]] # Determine which output head has highest "confidence": max_prob_value, most_probable_class = max_probs_across_heads.max(dim=1) assert max_prob_value.shape == (B,) assert most_probable_class.shape == (B,) # Example (continued): # max_prob_value: [0.6] # max_prob_class: [1] # A bit of boolean trickery to get what we need, which is, for each item, the # index of the output head that gave the most confident prediction. mask = F.one_hot(most_probable_class, N).to(dtype=bool, device=x.device) chosen_output_head_per_item = chosen_head_per_class[mask] assert mask.shape == (B, N) assert chosen_output_head_per_item.shape == (B,) # Example (continued): # mask: [[False, True, False, True]] # chosen_output_head_per_item: [0] # Create a bool tensor to select items associated with the chosen output head. selected_mask = F.one_hot(chosen_output_head_per_item, T).to(dtype=bool, device=x.device) assert selected_mask.shape == (B, T) # Select the logits using the mask: selected_outputs = logits_from_each_head[selected_mask] assert selected_outputs.shape == (B, N) return selected_outputs from avalanche.benchmarks.utils import AvalancheDataset class MultiHeadClassifier(_MultiHeadClassifier): def __init__(self, in_features: int, initial_out_features: int = 2): """Multi-head classifier with separate classifiers for each task. Typically used in task-incremental scenarios where task labels are available and provided to the model. :param in_features: number of input features. :param initial_out_features: initial number of classes (can be dynamically expanded). """ super().__init__(in_features=in_features, initial_out_features=initial_out_features) def adaptation(self, dataset: AvalancheDataset): """If `dataset` contains new tasks, a new head is initialized. :param dataset: data from the current experience. :return: """ super().adaptation(dataset) def forward(self, x: Tensor, task_labels: Optional[Tensor]) -> Tensor: if task_labels is None: # We don't do task inference in this layer, since it's handled in the # patched models below. raise NotImplementedError("Shouldn't get None task labels in the MultiHeadClassifier!") else: assert isinstance(task_labels, Tensor) return super().forward(x, task_labels) def forward_single_task(self, x: Tensor, task_label: Optional[Tensor]): """compute the output given the input `x`. This module uses the task label to activate the correct head. :param x: :param task_label: :return: """ if task_label is not None: if not isinstance(task_label, int): task_label = task_label.item() # TODO: If/when we make the context variable truly continuous, then this # won't work. assert task_label is None or isinstance(task_label, int), task_label if str(task_label) not in self.classifiers: # TODO: Let's use the most 'recent' output head instead? known_task_labels = list(self.classifiers.keys()) assert known_task_labels, "Need to have seen at least one task!" last_known_task = known_task_labels[-1] task_label = last_known_task warnings.warn( RuntimeWarning( f"performing forward pass on previously unseen task, will pretend " f"inputs come from task {last_known_task} instead." ) ) return super().forward_single_task(x, task_label) class MTSimpleCNN(_MTSimpleCNN, PatchedMultiTaskModule): def __init__(self): super().__init__() self.classifier = MultiHeadClassifier(in_features=64) def forward(self, x: Tensor, task_labels: Optional[Tensor] = None) -> Tensor: if task_labels is None: # NOTE: When training, we could rely on a property like `current_task_id` # being set within the `on_task_switch` callback. # The reason for this is that in some of the strategies, `GEM` strategy (and # others), when training they sometimes don't pass a task index! In the case # of GEM though, it doesnt pass the task id when calculating the # reference gradient, so I'm not sure we want to be using this in this case. if self.training: warnings.warn( RuntimeWarning("Using task inference in the forward pass while training?") ) return self.task_inference_forward_pass(x=x) return super().forward(x=x, task_labels=task_labels) @property def known_task_ids(self) -> List[Any]: return list(self.classifier.classifiers.keys()) class MTSimpleMLP(_MTSimpleMLP, PatchedMultiTaskModule): def __init__(self, input_size: int = 28 * 28, hidden_size: int = 512): """ Multi-task MLP with multi-head classifier. """ super().__init__(input_size=input_size, hidden_size=hidden_size) self.classifier = MultiHeadClassifier(in_features=hidden_size) def forward(self, x: Tensor, task_labels: Optional[Tensor] = None) -> Tensor: if task_labels is None: if self.training: warnings.warn( RuntimeWarning("Using task inference in the forward pass while training?") ) return self.task_inference_forward_pass(x=x) return super().forward(x=x, task_labels=task_labels) @property def known_task_ids(self) -> List[Any]: return list(self.classifier.classifiers.keys()) ================================================ FILE: sequoia/methods/avalanche_methods/plugins.py ================================================ """ WIP: @lebrice: Plugins that I was using while trying to get the BaseStrategy and plugins from Avalanche to work directly with the Sequoia environments. """ from typing import List import numpy as np import torch from avalanche.training.plugins import StrategyPlugin from avalanche.training.strategies import BaseStrategy from torch import Tensor from torch.utils.data import TensorDataset class GatherDataset(StrategyPlugin): """IDEA: A Plugin that accumulates the tensors from the env to create a "proper" Dataset to be used by the plugins. """ def __init__(self): self.train_xs: List[Tensor] = [] self.train_ys: List[Tensor] = [] self.train_ts: List[Tensor] = [] self.train_dataset: TensorDataset self.train_datasets: List[TensorDataset] = [] self.eval_xs: List[Tensor] = [] self.eval_ys: List[Tensor] = [] self.eval_ts: List[Tensor] = [] self.eval_dataset: TensorDataset self.eval_datasets: List[TensorDataset] = [] def after_forward(self, strategy, **kwargs): x, y, t = strategy.mb_x, strategy.mb_task_id, strategy.mb_y self.train_xs.append(x) self.train_ys.append(y) self.train_ts.append(t) return super().after_forward(strategy, **kwargs) def after_training_epoch(self, strategy, **kwargs): self.train_dataset = TensorDataset( torch.cat(self.train_xs), torch.cat(self.train_ys), torch.cat(self.train_ts) ) self.train_xs.clear() self.train_ys.clear() self.train_ts.clear() return super().after_training_epoch(strategy, **kwargs) def after_eval_forward(self, strategy, **kwargs): x, y, t = strategy.mb_x, strategy.mb_task_id, strategy.mb_y self.eval_xs.append(x) self.eval_ys.append(y) self.eval_ts.append(t) return super().after_eval_forward(strategy, **kwargs) def after_eval_exp(self, strategy, **kwargs): self.eval_dataset = TensorDataset( torch.cat(self.eval_xs), torch.cat(self.eval_ys), torch.cat(self.eval_ts) ) self.eval_xs.clear() self.eval_ys.clear() self.eval_ts.clear() if strategy.setting: strategy.experience.dataset = self.eval_dataset self.eval_datasets.append(self.eval_dataset) return super().after_eval_exp(strategy, **kwargs) def train(self): return self.train_dataset def eval(self): return self.eval_dataset def after_training_exp(self, strategy: "BaseStrategy", **kwargs): """ Compute importances of parameters after each experience. """ if strategy.setting: strategy.experience.dataset = self.train_dataset self.train_datasets.append(self.train_dataset) return super().after_training_exp(strategy, **kwargs) # def after_eval_exp(self, strategy: "BaseStrategy", **kwargs): # """ # Compute importances of parameters after each experience. # """ # return super().after_eval_exp(strategy, **kwargs) class OnlineAccuracyPlugin(StrategyPlugin): def __init__(self): self.current_task_accuracies: List[float] = [] self.all_task_accuracies: List[List[float]] = [] self.enabled: bool = True def _calc_accuracy(self, strategy: "BaseStrategy") -> float: y_pred = strategy.logits.argmax(-1) y = strategy.mb_y acc = ((y_pred == y).sum() / len(y_pred)).item() return acc def after_forward(self, strategy: "BaseStrategy", **kwargs): if not self.enabled: return acc = self._calc_accuracy(strategy) self.current_task_accuracies.append(acc) return super().after_forward(strategy, **kwargs) def after_training_epoch(self, strategy, **kwargs): # Turn off at the end of the first epoch. self.all_task_accuracies.append(np.mean(self.current_task_accuracies)) self.current_task_accuracies.clear() self.enabled = False return super().after_training_epoch(strategy, **kwargs) ================================================ FILE: sequoia/methods/avalanche_methods/replay.py ================================================ """ Method based on Replay from [Avalanche](https://github.com/ContinualAI/avalanche). See `avalanche.training.plugins.replay.ReplayPlugin` or `avalanche.training.strategies.strategy_wrappers.Replay` for more info. """ import warnings from dataclasses import dataclass from typing import ClassVar, Optional, Type from avalanche.training.plugins.replay import ( ExperienceBalancedStoragePolicy as ExperienceBalancedStoragePolicy_, ) from avalanche.training.plugins.replay import ReplayPlugin as ReplayPlugin_ from avalanche.training.plugins.replay import StoragePolicy from avalanche.training.strategies import BaseStrategy, Replay from simple_parsing.helpers.hparams import uniform from sequoia.methods import register_method from sequoia.settings.sl import SLSetting, TaskIncrementalSLSetting from .base import AvalancheMethod class ReplayPlugin(ReplayPlugin_): def __init__(self, mem_size: int = 200, storage_policy: Optional["StoragePolicy"] = None): super().__init__(mem_size=mem_size, storage_policy=storage_policy) # "patch" the ExperienceBalanchedStoragePolicy: if type(self.storage_policy) is ExperienceBalancedStoragePolicy_: self.storage_policy = ExperienceBalancedStoragePolicy( ext_mem=self.storage_policy.ext_mem, mem_size=self.storage_policy.mem_size, adaptive_size=self.storage_policy.adaptive_size, num_experiences=self.storage_policy.num_experiences, ) class ExperienceBalancedStoragePolicy(ExperienceBalancedStoragePolicy_): def __call__(self, strategy: BaseStrategy, **kwargs): num_exps = strategy.training_exp_counter + 1 num_exps = num_exps if self.adaptive_size else self.num_experiences curr_data = strategy.experience.dataset # new group may be bigger because of the remainder. group_size = self.mem_size // num_exps new_group_size = group_size + (self.mem_size % num_exps) self.subsample_all_groups(group_size * (num_exps - 1)) curr_data = self.subsample_single(curr_data, new_group_size) self.ext_mem[strategy.training_exp_counter + 1] = curr_data # buffer size should always equal self.mem_size len_tot = sum(len(el) for el in self.ext_mem.values()) # TODO: Just disabling the failing assert check for now. Should check if this # makes any difference in the performance of the plugin: # assert len_tot == self.mem_size warnings.warn( RuntimeWarning( f"Ignoring a failing assert in Avalanche's Replay plugin: " f"len_tot ({len_tot}) != self.mem_size ({self.mem_size})" ) ) # NOTE: Could also avoid copying the code from their method here by suppressing # AssertionErrors: # import contextlib # with contextlib.suppress(AssertionError): # return super().__call__(strategy=strategy, **kwargs) @register_method @dataclass class ReplayMethod(AvalancheMethod[Replay]): """Replay strategy from Avalanche. See Replay plugin for details. This strategy does not use task identities. See the parent class `AvalancheMethod` for the other hyper-parameters and methods. """ # Replay buffer size. mem_size: int = uniform(100, 2_000, default=200) strategy_class: ClassVar[Type[BaseStrategy]] = Replay def create_cl_strategy(self, setting: SLSetting) -> Replay: strategy = super().create_cl_strategy(setting) # Find and replace the original plugin with our "patched" version: plugin_index: Optional[int] = None for i, plugin in enumerate(strategy.plugins): if type(plugin) is ReplayPlugin_: plugin_index = i break assert plugin_index is not None, "strategy should have the Plugin, no?" assert isinstance(plugin_index, int) old_plugin: ReplayPlugin_ = strategy.plugins[plugin_index] new_plugin = ReplayPlugin( mem_size=old_plugin.mem_size, storage_policy=old_plugin.storage_policy, ) strategy.plugins[plugin_index] = new_plugin return strategy if __name__ == "__main__": from simple_parsing import ArgumentParser setting = TaskIncrementalSLSetting( dataset="mnist", nb_tasks=5, monitor_training_performance=True ) # Create the Method, either manually or through the command-line: parser = ArgumentParser(__doc__) parser.add_arguments(ReplayMethod, "method") args = parser.parse_args() method: ReplayMethod = args.method results = setting.apply(method) ================================================ FILE: sequoia/methods/avalanche_methods/replay_test.py ================================================ """ WIP: Tests for the Replay Method. For now this only inherits the tests from the AvalancheMethod class. """ from typing import ClassVar, Type from .base import AvalancheMethod from .base_test import _TestAvalancheMethod from .replay import ReplayMethod class TestReplayMethod(_TestAvalancheMethod): Method: ClassVar[Type[AvalancheMethod]] = ReplayMethod ================================================ FILE: sequoia/methods/avalanche_methods/synaptic_intelligence.py ================================================ """ Method based on SynapticIntelligence from [Avalanche](https://github.com/ContinualAI/avalanche). See `avalanche.training.plugins.synaptic_intelligence.SynapticIntelligencePlugin` or `avalanche.training.strategies.strategy_wrappers.SynapticIntelligence` for more info. """ from dataclasses import dataclass from typing import ClassVar, Optional, Set, Type import numpy as np import torch from avalanche.training.plugins.synaptic_intelligence import EwcDataType, ParamDict from avalanche.training.plugins.synaptic_intelligence import ( SynapticIntelligencePlugin as SynapticIntelligencePlugin_, ) from avalanche.training.plugins.synaptic_intelligence import SynDataType from avalanche.training.strategies import BaseStrategy, SynapticIntelligence from simple_parsing import ArgumentParser from simple_parsing.helpers.hparams import uniform from torch import Tensor from torch.nn import Module from sequoia.methods import register_method from sequoia.settings.sl import SLSetting, TaskIncrementalSLSetting from .base import AvalancheMethod class SynapticIntelligencePlugin(SynapticIntelligencePlugin_): # TODO: Why do they have everything as a static method rather than as a classmethod? # Makes it almost impossible to extend this SynapticIntelligencePlugin! @staticmethod @torch.no_grad() def extract_weights(model: Module, target: ParamDict, excluded_parameters: Set[str]): params = SynapticIntelligencePlugin_.allowed_parameters(model, excluded_parameters) # Getting this error: # RuntimeError: The expanded size of the tensor (128) must match the existing # size (256) at non-singleton dimension 0. Target sizes: [128]. # Tensor sizes: [256] # TODO: @lebrice For now I'll just replace the entries in that 'target' dict if # the shapes don't match, and hope it still works. for name, param in params: # target[name][...] = param.detach().cpu().flatten() if param.shape == target[name].shape: target[name][...] = param.detach().cpu().flatten() else: # Replace the entries with a different shape, rather than replacing their data # as done above? target[name].data = param.detach().cpu().flatten() @staticmethod @torch.no_grad() def extract_grad(model, target: ParamDict, excluded_parameters: Set[str]): params = SynapticIntelligencePlugin_.allowed_parameters(model, excluded_parameters) # Store the gradients into target for name, param in params: # BUG: Getting AttributeError: 'NoneType' object has no attribute 'detach' if param.grad is not None: target[name][...] = param.grad.detach().cpu().flatten() @staticmethod def compute_ewc_loss( model, ewc_data: EwcDataType, excluded_parameters: Set[str], device, lambd=0.0 ): params = SynapticIntelligencePlugin_.allowed_parameters(model, excluded_parameters) loss = None for name, param in params: weights = param.to(device).flatten() # Flat, not detached param_ewc_data_0 = ewc_data[0][name].to(device) # Flat, detached param_ewc_data_1 = ewc_data[1][name].to(device) # Flat, detached # BUG: Getting RuntimeError: inconsistent tensor size, expected tensor [128] # and src [256] to have the same number of elements, but got 128 and 256 # elements respectively if param_ewc_data_1.shape == param_ewc_data_0.shape == weights.shape: syn_loss: Tensor = torch.dot( param_ewc_data_1, (weights - param_ewc_data_0) ** 2 ) * (lambd / 2) else: # FIXME: For now, I'll just consider the 'common' elements? param_0_cols = param_ewc_data_0.shape[-1] param_1_cols = param_ewc_data_1.shape[-1] # Weird: why does param_0 have *more* columns than param_1? assert param_0_cols > param_1_cols # Assuming that the first indices are the common weights between tasks: param_ewc_data_0 = param_ewc_data_0[..., :param_1_cols] weights = weights[..., :param_1_cols] syn_loss: Tensor = torch.dot( param_ewc_data_1, (weights - param_ewc_data_0) ** 2 ) * (lambd / 2) if loss is None: loss = syn_loss else: loss += syn_loss return loss @staticmethod @torch.no_grad() def post_update(model, syn_data: SynDataType, excluded_parameters: Set[str]): SynapticIntelligencePlugin_.extract_weights( model, syn_data["new_theta"], excluded_parameters ) SynapticIntelligencePlugin_.extract_grad(model, syn_data["grad"], excluded_parameters) for param_name in syn_data["trajectory"]: # BUG: Getting RuntimeError: The size of tensor a (128) must match the size # of tensor b (256) at non-singleton dimension 0 # syn_data['trajectory'][param_name] += \ # syn_data['grad'][param_name] * ( # syn_data['new_theta'][param_name] - # syn_data['old_theta'][param_name]) destination: Tensor = syn_data["trajectory"][param_name] grad: Tensor = syn_data["grad"][param_name] new_theta: Tensor = syn_data["new_theta"][param_name] old_theta: Tensor = syn_data["old_theta"][param_name] if not (destination.shape == grad.shape == new_theta.shape == old_theta.shape): destination_cols = destination.shape[-1] grad_cols = grad.shape[-1] new_theta_cols = new_theta.shape[-1] old_theta_cols = old_theta.shape[-1] assert grad_cols < new_theta_cols and new_theta_cols == old_theta_cols # FIXME: @lebrice Chop the last two? or extend the grad? Extending the # grad with zeros for now (no idea what that implies though!) grad_extension = grad.new_zeros(size=[*grad.shape[:-1], new_theta_cols - grad_cols]) grad = torch.cat([grad, grad_extension], -1) destination_extension = destination.new_zeros( size=[*destination.shape[:-1], new_theta_cols - destination_cols] ) destination = torch.cat([destination, destination_extension], -1) assert destination.shape == grad.shape == new_theta.shape == old_theta.shape destination += grad * (new_theta - old_theta) # Replace the entry (in case we replaced the `destination` variable above). syn_data["trajectory"][param_name] = destination @staticmethod @torch.no_grad() def update_ewc_data( net, ewc_data: EwcDataType, syn_data: SynDataType, clip_to: float, excluded_parameters: Set[str], c=0.0015, ): SynapticIntelligencePlugin.extract_weights(net, syn_data["new_theta"], excluded_parameters) eps = 0.0000001 # 0.001 in few task - 0.1 used in a more complex setup for param_name in syn_data["cum_trajectory"]: # BUG: Getting RuntimeError: The size of tensor a (128) must match the size # of tensor b (256) at non-singleton dimension 0 # syn_data['cum_trajectory'][param_name] += \ # c * syn_data['trajectory'][param_name] / ( # np.square(syn_data['new_theta'][param_name] - # ewc_data[0][param_name]) + eps) cum_trajectory = syn_data["cum_trajectory"][param_name] trajectory = syn_data["trajectory"][param_name] new_theta = syn_data["new_theta"][param_name] ewc_data_0 = ewc_data[0][param_name] if not ( cum_trajectory.shape == trajectory.shape == new_theta.shape == ewc_data_0.shape ): cum_trajectory_cols = cum_trajectory.shape[-1] trajectory_cols = trajectory.shape[-1] new_theta_cols = new_theta.shape[-1] ewc_data_0_cols = ewc_data_0.shape[-1] assert cum_trajectory_cols < trajectory_cols == new_theta_cols == ewc_data_0_cols # FIXME: @lebrice Extending the cum_trajectory with zeros for now (no # idea what that implies though!) cum_trajectory_extension = cum_trajectory.new_zeros( size=[ *cum_trajectory.shape[:-1], trajectory_cols - cum_trajectory_cols, ] ) cum_trajectory = torch.cat([cum_trajectory, cum_trajectory_extension], -1) cum_trajectory += c * trajectory / (np.square(new_theta - ewc_data_0) + eps) # Reset the cum_trajectory variable in the dict, just in case we replaced # the variable above. syn_data["cum_trajectory"][param_name] = cum_trajectory for param_name in syn_data["cum_trajectory"]: ewc_data[1][param_name] = torch.empty_like( syn_data["cum_trajectory"][param_name] ).copy_(-syn_data["cum_trajectory"][param_name]) # change sign here because the Ewc regularization # in Caffe (theta - thetaold) is inverted w.r.t. syn equation [4] # (thetaold - theta) for param_name in ewc_data[1]: ewc_data[1][param_name] = torch.clamp(ewc_data[1][param_name], max=clip_to) ewc_data[0][param_name] = syn_data["new_theta"][param_name].clone() # TODO: Why do they have everything as a static method rather than as a classmethod? # Makes it almost impossible to extend this SynapticIntelligencePlugin! SynapticIntelligencePlugin_.extract_weights = SynapticIntelligencePlugin.extract_weights SynapticIntelligencePlugin_.extract_grad = SynapticIntelligencePlugin.extract_grad SynapticIntelligencePlugin_.compute_ewc_loss = SynapticIntelligencePlugin.compute_ewc_loss SynapticIntelligencePlugin_.post_update = SynapticIntelligencePlugin.post_update SynapticIntelligencePlugin_.update_ewc_data = SynapticIntelligencePlugin.update_ewc_data @register_method @dataclass class SynapticIntelligenceMethod(AvalancheMethod[SynapticIntelligence]): """The Synaptic Intelligence strategy from Avalanche. This is the Synaptic Intelligence PyTorch implementation of the algorithm described in the paper "Continuous Learning in Single-Incremental-Task Scenarios" (https://arxiv.org/abs/1806.08568) The original implementation has been proposed in the paper "Continual Learning Through Synaptic Intelligence" (https://arxiv.org/abs/1703.04200). The Synaptic Intelligence regularization can also be used in a different strategy by applying the :class:`SynapticIntelligencePlugin` plugin. See the parent class `AvalancheMethod` for the other hyper-parameters and methods. """ # Synaptic Intelligence lambda term. si_lambda: float = uniform(1e-2, 1.0, default=0.5) # TODO: Check the range. strategy_class: ClassVar[Type[BaseStrategy]] = SynapticIntelligence def create_cl_strategy(self, setting: SLSetting) -> SynapticIntelligence: strategy = super().create_cl_strategy(setting) # Find and replace the original plugin with our "patched" version: plugin_index: Optional[int] = None for i, plugin in enumerate(strategy.plugins): if type(plugin) is SynapticIntelligencePlugin_: plugin_index = i break assert plugin_index is not None, "strategy should have the Plugin, no?" assert isinstance(plugin_index, int) old_plugin: SynapticIntelligencePlugin_ = strategy.plugins[plugin_index] new_plugin = SynapticIntelligencePlugin( si_lambda=old_plugin.si_lambda, excluded_parameters=old_plugin.excluded_parameters, # device=old_plugin.device, ) new_plugin.ewc_data = old_plugin.ewc_data new_plugin.syn_data = old_plugin.syn_data new_plugin._device = old_plugin._device strategy.plugins[plugin_index] = new_plugin return strategy if __name__ == "__main__": setting = TaskIncrementalSLSetting( dataset="mnist", nb_tasks=5, monitor_training_performance=True ) # Create the Method, either manually or through the command-line: parser = ArgumentParser(__doc__) parser.add_arguments(SynapticIntelligenceMethod, "method") args = parser.parse_args() method: SynapticIntelligenceMethod = args.method results = setting.apply(method) ================================================ FILE: sequoia/methods/avalanche_methods/synaptic_intelligence_test.py ================================================ """ WIP: Tests for the SynapticIntelligence Method. For now this only inherits the tests from the AvalancheMethod class. """ from typing import ClassVar, Type from .base import AvalancheMethod from .base_test import _TestAvalancheMethod from .synaptic_intelligence import SynapticIntelligenceMethod class TestSynapticIntelligenceMethod(_TestAvalancheMethod): Method: ClassVar[Type[AvalancheMethod]] = SynapticIntelligenceMethod ================================================ FILE: sequoia/methods/base_method.py ================================================ """ Defines a Method, which is a "solution" for a given "problem" (a Setting). The Method could be whatever you want, really. For the 'baselines' we have here, we use pytorch-lightning, and a few little utility classes such as `Metrics` and `Loss`, which are basically just like dicts/objects, with some cool other methods. TODO: Add a wrapper to limit the 'epoch' length in RL, and then use an early-stopping callback to also perform validation like in SL. """ import warnings from dataclasses import dataclass, fields, is_dataclass from pathlib import Path from typing import Any, Callable, Dict, List, Mapping, Optional, Tuple, Type, Union import gym import torch from pytorch_lightning import Callback, Trainer from pytorch_lightning.callbacks.early_stopping import EarlyStopping from simple_parsing import mutable_field from wandb.wandb_run import Run from sequoia.common import Config from sequoia.common.spaces import Image from sequoia.methods import register_method from sequoia.settings import RLSetting, SLSetting from sequoia.settings.assumptions.incremental import IncrementalAssumption from sequoia.settings.base import Method from sequoia.settings.base.environment import Environment from sequoia.settings.base.objects import Actions, Observations, Rewards from sequoia.settings.base.results import Results from sequoia.settings.base.setting import Setting, SettingType from sequoia.settings.rl.continual import ContinualRLSetting from sequoia.utils.logging_utils import get_logger from sequoia.utils.parseable import Parseable from sequoia.utils.serialization import Serializable from sequoia.utils.utils import compute_identity from .models import BaseModel from .trainer import Trainer, TrainerConfig logger = get_logger(__name__) # TODO: Set the target setting back to Setting once we fix the PL + RL issues. @register_method @dataclass class BaseMethod(Method, Serializable, Parseable, target_setting=SLSetting): """Versatile Base method which targets all settings. Uses pytorch-lightning's Trainer for training and LightningModule as model. Uses a [BaseModel](methods/models/base_model/base_model.py), which can be used for: - Self-Supervised training with modular auxiliary tasks; - Semi-Supervised training on partially labeled batches; - Multi-Head prediction (e.g. in task-incremental scenario); """ # NOTE: these two fields are also used to create the command-line arguments. # HyperParameters of the method. hparams: BaseModel.HParams = mutable_field(BaseModel.HParams) # Configuration options. config: Config = mutable_field(Config) # Options for the Trainer object. trainer_options: TrainerConfig = mutable_field(TrainerConfig) def __init__( self, hparams: BaseModel.HParams = None, config: Config = None, trainer_options: TrainerConfig = None, **kwargs, ): """Creates a new BaseMethod, using the provided configuration options. Parameters ---------- hparams : BaseModel.HParams, optional Hyper-parameters of the BaseModel used by this Method. Defaults to None. config : Config, optional Configuration dataclass with options like log_dir, device, etc. Defaults to None. trainer_options : TrainerConfig, optional Dataclass which holds all the options for creating the `pl.Trainer` which will be used for training. Defaults to None. **kwargs : If any of the above arguments are left as `None`, then they will be created using any appropriate value from `kwargs`, if present. ## Examples: ``` method = BaseMethod(hparams=BaseModel.HParams(learning_rate=0.01)) method = BaseMethod(learning_rate=0.01) # Same as above method = BaseMethod(config=Config(debug=True)) method = BaseMethod(debug=True) # Same as above method = BaseMethod(hparams=BaseModel.HParams(learning_rate=0.01), config=Config(debug=True)) method = BaseMethod(learning_rate=0.01, debug=True) # Same as above ``` """ # TODO: When creating a Method from a script, like `BaseMethod()`, # should we expect the hparams to be passed? Should we create them from # the **kwargs? Should we parse them from the command-line? # Get the type of hparams to use from the field's type annotation. hparam_field = [f for f in fields(self) if f.name == "hparams"][0] hparam_type = hparam_field.type # Option 2: Try to use the keyword arguments to create the hparams, # config and trainer options. if kwargs: logger.info( f"using keyword arguments {kwargs} to populate the corresponding " f"values in the hparams, config and trainer_options." ) self.hparams = hparams or hparam_type.from_dict(kwargs, drop_extra_fields=True) self.config = config or Config.from_dict(kwargs, drop_extra_fields=True) self.trainer_options = trainer_options or TrainerConfig.from_dict( kwargs, drop_extra_fields=True ) elif self._argv: # Since the method was parsed from the command-line, parse those as # well from the argv that were used to create the Method. # Option 3: Parse them from the command-line. # assert not kwargs, "Don't pass any extra kwargs to the constructor!" self.hparams = hparams or hparam_type.from_args(self._argv, strict=False) self.config = config or Config.from_args(self._argv, strict=False) self.trainer_options = trainer_options or TrainerConfig.from_args( self._argv, strict=False ) else: # Option 1: Use the default values: self.hparams = hparams or hparam_type() self.config = config or Config() self.trainer_options = trainer_options or TrainerConfig() assert self.hparams assert self.config assert self.trainer_options if self.config.debug: # Disable wandb logging if debug is True. self.trainer_options.no_wandb = True # The model and Trainer objects will be created in `self.configure`. # NOTE: This right here doesn't create the fields, it just gives some # type information for static type checking. self.trainer: Trainer self.model: BaseModel self.additional_train_wrappers: List[Callable] = [] self.additional_valid_wrappers: List[Callable] = [] self.setting: Setting def configure(self, setting: SettingType) -> None: """Configures the method for the given Setting. Concretely, this creates the model and Trainer objects which will be used to train and test a model for the given `setting`. Args: setting (SettingType): The setting the method will be evaluated on. """ # Note: this here is temporary, just tinkering with wandb atm. method_name: str = self.get_name() # Set the default batch size to use, depending on the kind of Setting. if self.hparams.batch_size is None: if isinstance(setting, RLSetting): # Default batch size of 1 in RL self.hparams.batch_size = 1 elif isinstance(setting, SLSetting): self.hparams.batch_size = 32 else: warnings.warn( UserWarning( f"Dont know what batch size to use by default for setting " f"{setting}, will try 16." ) ) self.hparams.batch_size = 16 # Set the batch size on the setting. setting.batch_size = self.hparams.batch_size # TODO: Should we set the 'config' on the setting from here? if setting.config and setting.config == self.config: pass elif self.config != Config(): assert ( setting.config is None or setting.config == Config() ), "method.config has been modified, and so has setting.config!" setting.config = self.config elif setting.config: assert setting.config != Config(), "Weird, both configs have default values.." self.config = setting.config setting_name: str = setting.get_name() dataset = setting.dataset if isinstance(setting, IncrementalAssumption): if self.hparams.multihead is None: # Use a multi-head model by default if the task labels are # available at training time and has more than one task. if setting.task_labels_at_test_time: assert setting.task_labels_at_train_time self.hparams.multihead = setting.nb_tasks > 1 if not setting.known_task_boundaries_at_train_time: # If we won't have access to the task boundaries, so we can only do one # epoch. self.trainer_options.max_epochs = 1 if isinstance(setting, ContinualRLSetting): setting.add_done_to_observations = True setting.prefer_tensors = True if isinstance(setting.observation_space.x, Image): if self.hparams.encoder is None: self.hparams.encoder = "simple_convnet" # TODO: Add 'proper' transforms for cartpole, specifically? from sequoia.common.transforms import Transforms transforms = [ Transforms.three_channels, Transforms.to_tensor, Transforms.resize_64x64, ] setting.transforms = transforms setting.train_transforms = transforms setting.val_transforms = transforms setting.test_transforms = transforms # Configure the baseline specifically for an RL setting. # TODO: Select which output head to use from the command-line? # Limit the number of epochs so we never iterate on a closed env. # TODO: Would multiple "epochs" be possible? if setting.train_max_steps is not None: self.trainer_options.max_epochs = 1 self.trainer_options.limit_train_batches = setting.train_max_steps // ( setting.batch_size or 1 ) self.trainer_options.limit_val_batches = min( setting.train_max_steps // (setting.batch_size or 1), 1000 ) # TODO: Test batch size is limited to 1 for now. # NOTE: This isn't used, since we don't call `trainer.test()`. self.trainer_options.limit_test_batches = setting.train_max_steps # TODO: Debug the multi-GPU setup with DP accelerator and pytorch lightning. self.model = self.create_model(setting).to(self.config.device) # The PolicyHead actually does its own backward pass, so we disable # automatic optimization when using it. from .models.output_heads import PolicyHead if isinstance(self.model.output_head, PolicyHead): # Doing the backward pass manually, since there might not be a loss # at each step. self.trainer_options.automatic_optimization = False self.trainer = self.create_trainer(setting) self.setting = setting def fit( self, train_env: Environment[Observations, Actions, Rewards], valid_env: Environment[Observations, Actions, Rewards], ): """Called by the Setting to train the method. Could be called more than once before training is 'over', for instance when training on a series of tasks. Overwrite this to customize training. """ assert self.model is not None, ( "Setting should have been called method.configure(setting=self) " "before calling `fit`!" ) # TODO: Figure out if there is a smarter way to reset the state of the Trainer, # rather than just creating a new one every time. self.trainer = self.create_trainer(self.setting) # NOTE: It doesn't seem sufficient to just do this, since for instance the # early-stopping callback would prevent training on future tasks, since they # have higher validation loss: # self.trainer.current_epoch = 0 success = self.trainer.fit( model=self.model, train_dataloader=train_env, val_dataloaders=valid_env, ) # BUG: After `fit`, it seems like the output head of the model is on the CPU? self.model.to(self.config.device) return success def get_actions(self, observations: Observations, action_space: gym.Space) -> Actions: """Get a batch of predictions (actions) for a batch of observations. This gets called by the Setting during the test loop. TODO: There is a mismatch here between the type of the output of this method (`Actions`) and the type of `action_space`: we should either have a `Discrete` action space, and this method should return ints, or this method should return `Actions`, and the `action_space` should be a `TypedDictSpace` or something similar. Either way, `get_actions(obs, action_space) in action_space` should always be `True`. """ self.model.eval() with torch.no_grad(): forward_pass = self.model.forward(observations) actions: Actions = forward_pass.actions action_numpy = actions.actions_np assert action_numpy in action_space, (action_numpy, action_space) return actions def create_model(self, setting: SettingType) -> BaseModel[SettingType]: """Creates the BaseModel (a LightningModule) for the given Setting. You could extend this to customize which model is used depending on the setting. TODO: As @oleksost pointed out, this might allow the creation of weird 'frankenstein' methods that are super-specific to each setting, without really having anything in common. Args: setting (SettingType): An experimental setting. Returns: BaseModel[SettingType]: The BaseModel that is to be applied to that setting. """ # Create the model, passing the setting, hparams and config. return BaseModel(setting=setting, hparams=self.hparams, config=self.config) def create_trainer(self, setting: SettingType) -> Trainer: """Creates a Trainer object from pytorch-lightning for the given setting. NOTE: At the moment, uses the KNN and VAE callbacks. To use different callbacks, overwrite this method. Args: Returns: Trainer: the Trainer object. """ # We use this here to create loggers! # No need to use this, we can use callbacks = self.configure_callbacks(setting) loggers = [] if setting.wandb and setting.wandb.project: wandb_logger = setting.wandb.make_logger() loggers.append(wandb_logger) trainer = self.trainer_options.make_trainer( config=self.config, callbacks=callbacks, loggers=loggers, ) return trainer def get_experiment_name(self, setting: Setting, experiment_id: str = None) -> str: """Gets a unique name for the experiment where `self` is applied to `setting`. This experiment name will be passed to `orion` when performing a run of Hyper-Parameter Optimization. Parameters ---------- - setting : Setting The `Setting` onto which this method will be applied. This method will be used when - experiment_id: str, optional A custom hash to append to the experiment name. When `None` (default), a unique hash will be created based on the values of the Setting's fields. Returns ------- str The name for the experiment. """ if not experiment_id: setting_dict = setting.to_dict() # BUG: Some settings have non-string keys/value or something? from sequoia.utils.utils import flatten_dict d = flatten_dict(setting_dict) experiment_id = compute_identity(size=5, **d) assert isinstance(setting.dataset, str), "assuming that dataset is a str for now." return f"{self.get_name()}-{setting.get_name()}_{setting.dataset}_{experiment_id}" def get_search_space(self, setting: Setting) -> Mapping[str, Union[str, Dict]]: """Returns the search space to use for HPO in the given Setting. Parameters ---------- setting : Setting The Setting on which the run of HPO will take place. Returns ------- Mapping[str, Union[str, Dict]] An orion-formatted search space dictionary, mapping from hyper-parameter names (str) to their priors (str), or to nested dicts of the same form. """ return { "hparams": self.hparams.get_orion_space(), "trainer_options": self.trainer_options.get_orion_space(), } def adapt_to_new_hparams(self, new_hparams: Dict[str, Any]) -> None: """Adapts the Method when it receives new Hyper-Parameters to try for a new run. It is required that this method be implemented if you want to perform HPO sweeps with Orion. Parameters ---------- new_hparams : Dict[str, Any] The new hyper-parameters being recommended by the HPO algorithm. These will have the same structure as the search space. """ # Here we overwrite the corresponding attributes with the new suggested values # leaving other fields unchanged. self.hparams = self.hparams.replace(**new_hparams["hparams"]) # BUG with the `replace` function and Union[int, float] type, it doesn't # preserve the type of the field when serializing/deserializing! self.trainer_options.max_epochs = new_hparams["trainer_options"]["max_epochs"] def hparam_sweep( self, setting: Setting, search_space: Dict[str, Union[str, Dict]] = None, experiment_id: str = None, database_path: Union[str, Path] = None, max_runs: int = None, hpo_algorithm: Union[str, Dict] = "BayesianOptimizer", debug: bool = False, ) -> Tuple[BaseModel.HParams, float]: # Setting max epochs to 1, just to keep runs somewhat short. # NOTE: Now we're actually going to have the max_epochs as a tunable # hyper-parameter, so we're not hard-setting this value anymore. # self.trainer_options.max_epochs = 1 # Call 'configure', so that we create `self.model` at least once, which will # update the hparams.output_head field to be of the right type. This is # necessary in order for the `get_orion_space` to retrieve all the hparams # of the output head. self.configure(setting) return super().hparam_sweep( setting=setting, search_space=search_space, experiment_id=experiment_id, database_path=database_path, max_runs=max_runs, debug=debug or self.config.debug, hpo_algorithm=hpo_algorithm, ) def receive_results(self, setting: Setting, results: Results): """Receives the results of an experiment, where `self` was applied to Setting `setting`, which produced results `results`. """ super().receive_results(setting, results=results) def configure_callbacks(self, setting: SettingType = None) -> List[Callback]: """Create the PytorchLightning Callbacks for this Setting. These callbacks will get added to the Trainer in `create_trainer`. Parameters ---------- setting : SettingType The `Setting` on which this Method is going to be applied. Returns ------- List[Callback] A List of `Callaback` objects to use during training. """ setting = setting or self.setting # TODO: Move this to something like a `configure_callbacks` method in the model, # once PL adds it. # from sequoia.common.callbacks.vae_callback import SaveVaeSamplesCallback return [ EarlyStopping(monitor="val/loss"), # self.hparams.knn_callback, # SaveVaeSamplesCallback(), ] def apply_all(self, argv: Union[str, List[str]] = None) -> Dict[Type[Setting], Results]: """(WIP): Runs this Method on all its applicable settings. Returns ------- Dict mapping from setting type to the Results produced by this method. """ applicable_settings = self.get_applicable_settings() all_results: Dict[Type[Setting], Results] = {} for setting_type in applicable_settings: setting = setting_type.from_args(argv) results = setting.apply(self) all_results[setting_type] = results print(f"All results for method of type {type(self)}:") print( { method.get_name(): (results.get_metric() if results else "crashed") for method, results in all_results.items() } ) return all_results def __init_subclass__(cls, target_setting: Type[SettingType] = Setting, **kwargs) -> None: """Called when creating a new subclass of Method. Args: target_setting (Type[Setting], optional): The target setting. Defaults to None, in which case the method will inherit the target setting of it's parent class. """ if not is_dataclass(cls): logger.critical( UserWarning( f"The BaseMethod subclass {cls} should be decorated with " f"@dataclass!\n" f"While this isn't strictly necessary for things to work, it is" f"highly recommended, as any dataclass-style class attributes " f"won't have the corresponding command-line arguments " f"generated, which can cause a lot of subtle bugs." ) ) super().__init_subclass__(target_setting=target_setting, **kwargs) def on_task_switch(self, task_id: Optional[int]) -> None: """Called when switching between tasks. Args: task_id (int, optional): the id of the new task. When None, we are basically being informed that there is a task boundary, but without knowing what task we're switching to. """ self.model.on_task_switch(task_id) def setup_wandb(self, run: Run) -> None: """Called by the Setting when using Weights & Biases, after `wandb.init`. This method is here to provide Methods with the opportunity to log some of their configuration options or hyper-parameters to wandb. NOTE: The Setting has already set the `"setting"` entry in the `wandb.config` by this point. Parameters ---------- run : wandb.Run Current wandb Run. """ # TODO: (@lebrice) I think these will probably be set by the wandb logger, # run.config["config"] = self.config.to_dict() # Need to check wether this causes any issues. # run.config["hparams"] = self.hparams.to_dict() # run.config["trainer_config"] = self.trainer_options ================================================ FILE: sequoia/methods/base_method_test.py ================================================ from typing import ClassVar, Dict, Type import pytest import torch from sequoia.common.config import Config from sequoia.conftest import slow from sequoia.methods.trainer import TrainerConfig from sequoia.settings import ( ClassIncrementalSetting, IncrementalRLSetting, Setting, TraditionalRLSetting, ) from sequoia.settings.rl.continual.results import ContinualRLResults from .base_method import BaseMethod from .method_test import MethodTests class TestBaseMethod(MethodTests): Method: ClassVar[Type[BaseMethod]] = BaseMethod method_debug_kwargs: ClassVar[Dict] = {"max_epochs": 1} @classmethod @pytest.fixture(scope="module") def trainer_options(cls, tmp_path_factory) -> TrainerConfig: tmp_path = tmp_path_factory.mktemp("log_dir") return TrainerConfig( # logger=False, max_epochs=1, checkpoint_callback=False, default_root_dir=tmp_path, ) @classmethod @pytest.fixture def method(cls, config: Config, trainer_options: TrainerConfig) -> BaseMethod: """Fixture that returns the Method instance to use when testing/debugging.""" trainer_options.max_epochs = 1 return cls.Method(trainer_options=trainer_options, config=config) def validate_results( self, setting: Setting, method: BaseMethod, results: Setting.Results, ) -> None: assert results assert results.objective # TODO: Set some 'reasonable' bounds on the performance here, depending on the # setting/dataset. @pytest.mark.xfail(reason="TODO: Re-enable once we fix the bugs for BaseMethod in RL.") @slow @pytest.mark.timeout(120) def test_cartpole_state(self, config: Config, trainer_options: TrainerConfig): """Test that the baseline method can learn cartpole (state input)""" # TODO: Actually remove the trainer_config class from the BaseMethod? trainer_options.max_epochs = 1 method = self.Method(config=config, trainer_options=trainer_options) method.hparams.learning_rate = 0.01 setting = TraditionalRLSetting( dataset="CartPole-v0", train_max_steps=5000, nb_tasks=1, test_max_steps=2_000, config=config, ) results: ContinualRLResults = setting.apply(method) print(results.to_log_dict()) # The method should normally get the maximum length (200), but checking with # 100 just to account for randomness. assert results.average_metrics.mean_episode_length > 100.0 @pytest.mark.xfail(reason="TODO: Re-enable once we fix the bugs for BaseMethod in RL.") @slow @pytest.mark.timeout(120) def test_incremental_cartpole_state(self, config: Config, trainer_options: TrainerConfig): """Test that the baseline method can learn cartpole (state input)""" # TODO: Actually remove the trainer_config class from the BaseMethod? trainer_options.max_epochs = 1 method = self.Method(config=config, trainer_options=trainer_options) method.hparams.learning_rate = 0.01 setting = IncrementalRLSetting( dataset="cartpole", train_max_steps=5000, nb_tasks=2, test_max_steps=1000 ) results: ContinualRLResults = setting.apply(method) print(results.to_log_dict()) # The method should normally get the maximum length (200), but checking with # 100 just to account for randomness. assert results.mean_episode_length > 100.0 @pytest.mark.xfail(reason="TODO: Unreliable test.") @pytest.mark.timeout(30) @pytest.mark.skipif(not torch.cuda.is_available(), reason="Cuda is required.") def test_device_of_output_head_is_correct( self, short_class_incremental_setting: ClassIncrementalSetting, trainer_options: TrainerConfig, config: Config, ): """There is a bug happening where the output head is on CPU while the rest of the model is on GPU. """ trainer_options.max_epochs = 1 method = self.Method(trainer_options=trainer_options, config=config) results = short_class_incremental_setting.apply(method) assert 0.20 <= results.objective def test_weird_pl_bug(): replica_device = None def find_tensor_with_device(tensor: torch.Tensor) -> torch.Tensor: nonlocal replica_device if replica_device is None and tensor.device != torch.device("cpu"): replica_device = tensor.device return tensor from pytorch_lightning.utilities.apply_func import apply_to_collection from sequoia.settings.sl.incremental.objects import ( IncrementalSLObservations, IncrementalSLRewards, ) # TODO: Not quite sure why there is also a `0` in there. input_device = "cuda" inputs = ( ( IncrementalSLObservations( x=torch.rand([32, 3, 28, 28], device=input_device), task_labels=torch.zeros([32], device=input_device), ), IncrementalSLRewards(y=torch.randint(10, [32], device=input_device)), ), 0, ) # from collections.abc import Mapping, Sequence apply_to_collection(inputs, dtype=torch.Tensor, function=find_tensor_with_device) assert replica_device is not None BaseMethodTests = TestBaseMethod ================================================ FILE: sequoia/methods/conftest.py ================================================ import pytest from sequoia.client import SettingProxy from sequoia.common.config import Config from sequoia.settings.sl import ( ClassIncrementalSetting, ContinualSLSetting, DiscreteTaskAgnosticSLSetting, TaskIncrementalSLSetting, ) from sequoia.settings.sl.continual.setting import random_subset @pytest.fixture(scope="session") def short_class_incremental_setting(session_config: Config): setting = ClassIncrementalSetting( dataset="mnist", nb_tasks=5, monitor_training_performance=True, ) setting.config = session_config setting.prepare_data() setting.setup() # Testing this out: Shortening the train datasets: setting.train_datasets = [ random_subset(task_dataset, 100) for task_dataset in setting.train_datasets ] setting.val_datasets = [ random_subset(task_dataset, 100) for task_dataset in setting.val_datasets ] setting.test_datasets = [ random_subset(task_dataset, 100) for task_dataset in setting.test_datasets ] assert len(setting.train_datasets) == 5 assert len(setting.val_datasets) == 5 assert len(setting.test_datasets) == 5 assert all(len(dataset) == 100 for dataset in setting.train_datasets) assert all(len(dataset) == 100 for dataset in setting.val_datasets) assert all(len(dataset) == 100 for dataset in setting.test_datasets) # Assert that calling setup doesn't overwrite the datasets. setting.setup() assert len(setting.train_datasets) == 5 assert len(setting.val_datasets) == 5 assert len(setting.test_datasets) == 5 assert all(len(dataset) == 100 for dataset in setting.train_datasets) assert all(len(dataset) == 100 for dataset in setting.val_datasets) assert all(len(dataset) == 100 for dataset in setting.test_datasets) return setting @pytest.fixture(scope="session") def short_continual_sl_setting(session_config: Config): setting = ContinualSLSetting( dataset="mnist", monitor_training_performance=True, ) setting.config = session_config setting.prepare_data() setting.setup() # Testing this out: Shortening the train datasets: setting.train_datasets = [ random_subset(task_dataset, 100) for task_dataset in setting.train_datasets ] setting.val_datasets = [ random_subset(task_dataset, 100) for task_dataset in setting.val_datasets ] setting.test_datasets = [ random_subset(task_dataset, 100) for task_dataset in setting.test_datasets ] assert len(setting.train_datasets) == 5 assert len(setting.val_datasets) == 5 assert len(setting.test_datasets) == 5 assert all(len(dataset) == 100 for dataset in setting.train_datasets) assert all(len(dataset) == 100 for dataset in setting.val_datasets) assert all(len(dataset) == 100 for dataset in setting.test_datasets) # Assert that calling setup doesn't overwrite the datasets. setting.setup() assert len(setting.train_datasets) == 5 assert len(setting.val_datasets) == 5 assert len(setting.test_datasets) == 5 assert all(len(dataset) == 100 for dataset in setting.train_datasets) assert all(len(dataset) == 100 for dataset in setting.val_datasets) assert all(len(dataset) == 100 for dataset in setting.test_datasets) return setting @pytest.fixture(scope="session") def short_discrete_task_agnostic_sl_setting(session_config: Config): setting = DiscreteTaskAgnosticSLSetting( dataset="mnist", monitor_training_performance=True, ) setting.config = session_config setting.prepare_data() setting.setup() # Testing this out: Shortening the train datasets: setting.train_datasets = [ random_subset(task_dataset, 100) for task_dataset in setting.train_datasets ] setting.val_datasets = [ random_subset(task_dataset, 100) for task_dataset in setting.val_datasets ] setting.test_datasets = [ random_subset(task_dataset, 100) for task_dataset in setting.test_datasets ] assert len(setting.train_datasets) == 5 assert len(setting.val_datasets) == 5 assert len(setting.test_datasets) == 5 assert all(len(dataset) == 100 for dataset in setting.train_datasets) assert all(len(dataset) == 100 for dataset in setting.val_datasets) assert all(len(dataset) == 100 for dataset in setting.test_datasets) # Assert that calling setup doesn't overwrite the datasets. setting.setup() assert len(setting.train_datasets) == 5 assert len(setting.val_datasets) == 5 assert len(setting.test_datasets) == 5 assert all(len(dataset) == 100 for dataset in setting.train_datasets) assert all(len(dataset) == 100 for dataset in setting.val_datasets) assert all(len(dataset) == 100 for dataset in setting.test_datasets) return setting @pytest.fixture(scope="session") def short_task_incremental_setting(session_config: Config): setting = TaskIncrementalSLSetting( dataset="mnist", nb_tasks=5, monitor_training_performance=True, ) setting.config = session_config setting.prepare_data() setting.setup() # Testing this out: Shortening the train datasets: setting.train_datasets = [ random_subset(task_dataset, 100) for task_dataset in setting.train_datasets ] setting.val_datasets = [ random_subset(task_dataset, 100) for task_dataset in setting.val_datasets ] setting.test_datasets = [ random_subset(task_dataset, 100) for task_dataset in setting.test_datasets ] assert len(setting.train_datasets) == 5 assert len(setting.val_datasets) == 5 assert len(setting.test_datasets) == 5 assert all(len(dataset) == 100 for dataset in setting.train_datasets) assert all(len(dataset) == 100 for dataset in setting.val_datasets) assert all(len(dataset) == 100 for dataset in setting.test_datasets) # Assert that calling setup doesn't overwrite the datasets. setting.setup() assert len(setting.train_datasets) == 5 assert len(setting.val_datasets) == 5 assert len(setting.test_datasets) == 5 assert all(len(dataset) == 100 for dataset in setting.train_datasets) assert all(len(dataset) == 100 for dataset in setting.val_datasets) assert all(len(dataset) == 100 for dataset in setting.test_datasets) return setting @pytest.fixture(scope="session") def short_sl_track_setting(session_config: Config): setting = SettingProxy( ClassIncrementalSetting, "sl_track", # dataset="synbols", # nb_tasks=12, # class_order=class_order, # monitor_training_performance=True, ) setting.config = session_config # TODO: This could be a bit more convenient. setting.data_dir = session_config.data_dir assert setting.config == session_config assert setting.data_dir == session_config.data_dir assert setting.nb_tasks == 12 # For now we'll just shorten the tests by shortening the datasets. samples_per_task = 100 setting.batch_size = 10 setting.setup() # Testing this out: Shortening the train datasets: setting.train_datasets = [ random_subset(task_dataset, samples_per_task) for task_dataset in setting.train_datasets ] setting.val_datasets = [ random_subset(task_dataset, samples_per_task) for task_dataset in setting.val_datasets ] setting.test_datasets = [ random_subset(task_dataset, samples_per_task) for task_dataset in setting.test_datasets ] assert len(setting.train_datasets) == setting.nb_tasks assert len(setting.val_datasets) == setting.nb_tasks assert len(setting.test_datasets) == setting.nb_tasks assert all(len(dataset) == samples_per_task for dataset in setting.train_datasets) assert all(len(dataset) == samples_per_task for dataset in setting.val_datasets) assert all(len(dataset) == samples_per_task for dataset in setting.test_datasets) # Assert that calling setup doesn't overwrite the datasets. setting.setup() assert len(setting.train_datasets) == setting.nb_tasks assert len(setting.val_datasets) == setting.nb_tasks assert len(setting.test_datasets) == setting.nb_tasks assert all(len(dataset) == samples_per_task for dataset in setting.train_datasets) assert all(len(dataset) == samples_per_task for dataset in setting.val_datasets) assert all(len(dataset) == samples_per_task for dataset in setting.test_datasets) return setting ================================================ FILE: sequoia/methods/d3rlpy_methods/__init__.py ================================================ ================================================ FILE: sequoia/methods/d3rlpy_methods/base.py ================================================ from typing import ClassVar, Type, Union import gym import numpy as np try: from d3rlpy.algos import * from d3rlpy.dataset import MDPDataset except ImportError as err: raise RuntimeError(f"You need to have `d3rlpy` installed to use these methods.") from err from gym import Space from gym.wrappers.record_episode_statistics import RecordEpisodeStatistics from sequoia import Actions, Environment, Method, Observations, Rewards from sequoia.settings.offline_rl.setting import OfflineRLSetting class OfflineRLWrapper(gym.Wrapper): def __init__(self, env): super().__init__(env) self.observation_space = env.observation_space.x def reset(self): observation = super().reset() return observation.x def step(self, action): observation, reward, done, info = super().step(action) return observation.x, reward.y, done, info class BaseOfflineRLMethod(Method, target_setting=OfflineRLSetting): Algo: ClassVar[Type[AlgoBase]] = AlgoBase def __init__( self, train_steps: int = 1_000_000, train_steps_per_epoch=1_000_000, test_steps=1_000, scorers: dict = None, use_gpu: bool = False, **kwargs, ): super().__init__() self.train_steps = train_steps self.train_steps_per_epoch = train_steps_per_epoch self.test_steps = test_steps self.scorers = scorers self.offline_metrics = None self.use_gpu = use_gpu self.kwargs = kwargs self.algo = None def configure(self, setting: OfflineRLSetting) -> None: super().configure(setting) self.setting = setting self.algo = type(self).Algo(use_gpu=self.use_gpu, **self.kwargs) def fit( self, train_env: Union[Environment[Observations, Actions, Rewards], MDPDataset], valid_env: Union[Environment[Observations, Actions, Rewards], MDPDataset], ) -> None: """ Fit self.algo on training and evaluation environment Works for both gym environments and d3rlpy datasets """ if isinstance(self.setting, OfflineRLSetting): self.offline_metrics = self.algo.fit( train_env, eval_episodes=valid_env, n_steps=self.train_steps, n_steps_per_epoch=self.train_steps_per_epoch, scorers=self.scorers, ) else: train_env = RecordEpisodeStatistics(OfflineRLWrapper(train_env)) valid_env = RecordEpisodeStatistics(OfflineRLWrapper(valid_env)) self.algo.fit_online(env=train_env, eval_env=valid_env, n_steps=self.train_steps) def get_actions(self, obs: Union[np.ndarray, Observations], action_space: Space) -> np.ndarray: """ Return actions predicted by self.algo for given observation and action space """ if isinstance(obs, Observations): obs = obs.x obs = np.expand_dims(obs, axis=0) action = np.asarray(self.algo.predict(obs)).squeeze(axis=0) return action """ D3RLPY Methods: target OfflineRL and TraditionalRL assumptions """ class DQNMethod(BaseOfflineRLMethod): Algo: ClassVar[Type[AlgoBase]] = DQN class DoubleDQNMethod(BaseOfflineRLMethod): Algo: ClassVar[Type[AlgoBase]] = DoubleDQN class DDPGMethod(BaseOfflineRLMethod): Algo: ClassVar[Type[AlgoBase]] = DDPG class TD3Method(BaseOfflineRLMethod): Algo: ClassVar[Type[AlgoBase]] = TD3 class SACMethod(BaseOfflineRLMethod): Algo: ClassVar[Type[AlgoBase]] = SAC class DiscreteSACMethod(BaseOfflineRLMethod): Algo: ClassVar[Type[AlgoBase]] = DiscreteSAC class CQLMethod(BaseOfflineRLMethod): Algo: ClassVar[Type[AlgoBase]] = CQL class DiscreteCQLMethod(BaseOfflineRLMethod): Algo: ClassVar[Type[AlgoBase]] = DiscreteCQL class BEARMethod(BaseOfflineRLMethod): Algo: ClassVar[Type[AlgoBase]] = BEAR class AWRMethod(BaseOfflineRLMethod): Algo: ClassVar[Type[AlgoBase]] = AWR class DiscreteAWRMethod(BaseOfflineRLMethod): Algo: ClassVar[Type[AlgoBase]] = DiscreteAWR class BCMethod(BaseOfflineRLMethod): Algo: ClassVar[Type[AlgoBase]] = BC class DiscreteBCMethod(BaseOfflineRLMethod): Algo: ClassVar[Type[AlgoBase]] = DiscreteBC class BCQMethod(BaseOfflineRLMethod): Algo: ClassVar[Type[AlgoBase]] = BCQ class DiscreteBCQMethod(BaseOfflineRLMethod): Algo: ClassVar[Type[AlgoBase]] = DiscreteBCQ ================================================ FILE: sequoia/methods/d3rlpy_methods/base_test.py ================================================ import pytest from d3rlpy.constants import ActionSpace from sequoia import TraditionalRLSetting from sequoia.methods.d3rlpy_methods.base import * from sequoia.settings.offline_rl.setting import OfflineRLSetting class BaseOfflineRLMethodTests: Method: ClassVar[Type[BaseOfflineRLMethod]] @pytest.fixture def method(self): return self.Method(train_steps=1, train_steps_per_epoch=1) @pytest.mark.parametrize("dataset", OfflineRLSetting.available_datasets) def test_offlinerl(self, method, dataset: str): setting_offline = OfflineRLSetting(dataset=dataset) # # Check for mismatch if isinstance(setting_offline.env.action_space, gym.spaces.Box): if method.algo.get_action_type() not in {ActionSpace.CONTINUOUS, ActionSpace.BOTH}: pytest.skip("This setting requires continuous action space algorithm") elif isinstance(setting_offline.env.action_space, gym.spaces.discrete.Discrete): if method.algo.get_action_type() not in {ActionSpace.DISCRETE, ActionSpace.BOTH}: pytest.skip("This setting requires discrete action space algorithm") else: pytest.skip("Invalid setting action space") results = setting_offline.apply(method) # Difficult to set a meaningful threshold for 1 step fit assert isinstance(results.objective, float) @pytest.mark.parametrize("dataset", TraditionalRLSetting.available_datasets) def test_traditionalrl(self, method, dataset): # BC is a strictly offline method if isinstance(method, (BCMethod, BCQMethod, DiscreteBCMethod, DiscreteBCQMethod)): pytest.skip("This method only works on OfflineRLSetting") setting_online = TraditionalRLSetting(dataset=dataset, test_max_steps=10) # # Check for mismatch if isinstance(setting_online.action_space, gym.spaces.Box): if method.algo.get_action_type() != ActionSpace.CONTINUOUS: pytest.skip("This setting requires continuous action space algorithm") elif isinstance(setting_online.action_space, gym.spaces.discrete.Discrete): if method.algo.get_action_type() != ActionSpace.DISCRETE: pytest.skip("This setting requires discrete action space algorithm") else: pytest.skip("Invalid setting action space") results = setting_online.apply(method) # Difficult to set a meaningful threshold for 1 step fit assert isinstance(results.objective, (int, float)) class TestDQNMethod(BaseOfflineRLMethodTests): Method: ClassVar[Type[BaseOfflineRLMethod]] = DQNMethod class TestDoubleDQNMethod(BaseOfflineRLMethodTests): Method: ClassVar[Type[BaseOfflineRLMethod]] = DoubleDQNMethod class TestDDPGMethod(BaseOfflineRLMethodTests): Method: ClassVar[Type[BaseOfflineRLMethod]] = DDPGMethod class TestTD3Method(BaseOfflineRLMethodTests): Method: ClassVar[Type[BaseOfflineRLMethod]] = TD3Method class TestSACMethod(BaseOfflineRLMethodTests): Method: ClassVar[Type[BaseOfflineRLMethod]] = SACMethod class TestDiscreteSACMethod(BaseOfflineRLMethodTests): Method: ClassVar[Type[BaseOfflineRLMethod]] = DiscreteSACMethod class TestCQLMethod(BaseOfflineRLMethodTests): Method: ClassVar[Type[BaseOfflineRLMethod]] = CQLMethod class TestDiscreteCQLMethod(BaseOfflineRLMethodTests): Method: ClassVar[Type[BaseOfflineRLMethod]] = DiscreteCQLMethod class TestBEARMethod(BaseOfflineRLMethodTests): Method: ClassVar[Type[BaseOfflineRLMethod]] = BEARMethod class TestAWRMethod(BaseOfflineRLMethodTests): Method: ClassVar[Type[BaseOfflineRLMethod]] = AWRMethod class TestDiscreteAWRMethod(BaseOfflineRLMethodTests): Method: ClassVar[Type[BaseOfflineRLMethod]] = DiscreteAWRMethod class TestBCMethod(BaseOfflineRLMethodTests): Method: ClassVar[Type[BaseOfflineRLMethod]] = BCMethod class TestDiscreteBCMethod(BaseOfflineRLMethodTests): Method: ClassVar[Type[BaseOfflineRLMethod]] = DiscreteBCMethod class TestBCQMethod(BaseOfflineRLMethodTests): Method: ClassVar[Type[BaseOfflineRLMethod]] = BCQMethod class TestDiscreteBCQMethod(BaseOfflineRLMethodTests): Method: ClassVar[Type[BaseOfflineRLMethod]] = DiscreteBCQMethod ================================================ FILE: sequoia/methods/ewc_method.py ================================================ """Defines the EWC method, as a subclass of the BaseMethod. Likewise, defines the `EwcModel`, which is a very simple subclass of the `BaseModel`, adding in the Ewc auxiliary task (`EWCTask`). For a more detailed view of exactly how the EwcTask calculates its loss, see the `sequoia.methods.aux_tasks.ewc.EwcTask`. """ import warnings from dataclasses import dataclass from typing import Optional from gym.utils import colorize from simple_parsing import ArgumentParser, mutable_field from sequoia.common.config import Config from sequoia.methods import register_method from sequoia.methods.aux_tasks.ewc import EWCTask from sequoia.methods.base_method import BaseMethod, BaseModel from sequoia.methods.trainer import TrainerConfig from sequoia.settings import Setting, TaskIncrementalRLSetting, IncrementalSLSetting from sequoia.settings.assumptions.incremental import IncrementalAssumption class EwcModel(BaseModel): """Modified version of the BaseModel, which adds the EWC auxiliary task.""" @dataclass class HParams(BaseModel.HParams): """Hyper-parameters of the `EwcModel`.""" # Hyper-parameters related to the EWC auxiliary task. ewc: EWCTask.Options = mutable_field(EWCTask.Options) def __init__(self, setting: Setting, hparams: "EwcModel.HParams", config: Config): super().__init__(setting=setting, hparams=hparams, config=config) self.hp: EwcModel.HParams self.add_auxiliary_task(EWCTask(options=self.hp.ewc)) def get_loss(self, forward_pass, rewards=None, loss_name=""): return super().get_loss(forward_pass, rewards=rewards, loss_name=loss_name) @register_method @dataclass class EwcMethod(BaseMethod, target_setting=IncrementalSLSetting): """Subclass of the BaseMethod, which adds the EWCTask to the `BaseModel`. This Method is applicable to any CL setting (RL or SL) where there are clear task boundaries, regardless of if the task labels are given or not. """ hparams: EwcModel.HParams = mutable_field(EwcModel.HParams) def __init__( self, hparams: EwcModel.HParams = None, config: Config = None, trainer_options: TrainerConfig = None, **kwargs, ): super().__init__(hparams=hparams, config=config, trainer_options=trainer_options, **kwargs) def configure(self, setting: IncrementalAssumption): """Called before the method is applied on a setting (before training). You can use this to instantiate your model, for instance, since this is where you get access to the observation & action spaces. """ super().configure(setting) if setting.phases == 1: warnings.warn( RuntimeWarning( colorize( "Disabling the EWC portion of this Method entirely, as there " "is only one phase of training in this setting (i.e. `fit` is " "only called once).", "red", ) ) ) # We could also just disable the ewc task (after super().configure(setting)) self.model.tasks["ewc"].disable() def on_task_switch(self, task_id: Optional[int]): super().on_task_switch(task_id) def create_model(self, setting: Setting) -> EwcModel: """Create the Model to use for the given Setting. In this case, we want to return an `EwcModel` (our customized version of the BaseModel). Parameters ---------- setting : Setting The experimental Setting this Method will be applied to. Returns ------- EwcModel The Model that will be trained and used for evaluation. """ return EwcModel(setting=setting, hparams=self.hparams, config=self.config) def demo(): """Runs the EwcMethod on a simple setting, just to check that it works fine.""" # Adding arguments for each group directly: parser = ArgumentParser(description=__doc__) EwcMethod.add_argparse_args(parser, dest="method") parser.add_arguments(Config, "config") args = parser.parse_args() method = EwcMethod.from_argparse_args(args, dest="method") config: Config = args.config task_schedule = { 0: {"gravity": 10, "length": 0.2}, 1000: {"gravity": 100, "length": 1.2}, # 2000: {"gravity": 10, "length": 0.2}, } setting = TaskIncrementalRLSetting( dataset="cartpole", train_task_schedule=task_schedule, test_task_schedule=task_schedule, # max_steps=1000, ) # from sequoia.settings import TaskIncrementalSLSetting, ClassIncrementalSetting # setting = ClassIncrementalSetting(dataset="mnist", nb_tasks=5) # setting = TaskIncrementalSLSetting(dataset="mnist", nb_tasks=5) results = setting.apply(method, config=config) print(results.summary()) if __name__ == "__main__": demo() ================================================ FILE: sequoia/methods/ewc_method_test.py ================================================ """ TODO: Tests for the EWC Method. """ from functools import partial from typing import ClassVar, Type import numpy as np import pytest from torch import Tensor from sequoia.common import Loss from sequoia.common.config import Config from sequoia.conftest import slow from sequoia.methods.trainer import TrainerConfig from sequoia.settings.rl import IncrementalRLSetting, TaskIncrementalRLSetting, TraditionalRLSetting from sequoia.settings.sl import ( ClassIncrementalSetting, MultiTaskSLSetting, TaskIncrementalSLSetting, TraditionalSLSetting, ) from .base_method_test import TestBaseMethod as BaseMethodTests from .ewc_method import EwcMethod, EwcModel class TestEWCMethod(BaseMethodTests): Method: ClassVar[Type[Method]] = EwcMethod @classmethod @pytest.fixture def method(cls, config: Config, trainer_options: TrainerConfig) -> EwcMethod: """Fixture that returns the Method instance to use when testing/debugging.""" trainer_options.max_epochs = 1 return cls.Method(trainer_options=trainer_options, config=config) @slow @pytest.mark.timeout(300) def test_task_incremental_mnist(self, monkeypatch): # TODO: Change this to use the 'short task incremental setting'. setting = TaskIncrementalSLSetting(dataset="mnist", monitor_training_performance=True) total_ewc_losses_per_task = np.zeros(setting.nb_tasks) _training_step = EwcModel.training_step def wrapped_training_step(self: EwcModel, batch, batch_idx: int, *args, **kwargs): step_results = _training_step(self, batch, batch_idx=batch_idx, *args, **kwargs) loss_object: Loss = step_results["loss_object"] if "ewc" in loss_object.losses: ewc_loss_obj = loss_object.losses["ewc"] ewc_loss = ewc_loss_obj.total_loss if isinstance(ewc_loss, Tensor): ewc_loss = ewc_loss.detach().cpu().numpy() total_ewc_losses_per_task[self.current_task] += ewc_loss return step_results monkeypatch.setattr(EwcModel, "training_step", wrapped_training_step) _fit = EwcMethod.fit at_all_points_in_time = [] def wrapped_fit(self, train_env, valid_env): print(f"starting task {self.model.current_task}: {total_ewc_losses_per_task}") total_ewc_losses_per_task[:] = 0 _fit(self, train_env, valid_env) at_all_points_in_time.append(total_ewc_losses_per_task.copy()) monkeypatch.setattr(EwcMethod, "fit", wrapped_fit) # _on_epoch_end = EwcModel.on_epoch_end # def fake_on_epoch_end(self, *args, **kwargs): # assert False, f"heyo: {total_ewc_losses_per_task}" # return _on_epoch_end(self, *args, **kwargs) # # monkeypatch.setattr(EwcModel, "on_epoch_end", fake_on_epoch_end) method = EwcMethod(max_epochs=1) results = setting.apply(method) assert (at_all_points_in_time[0] == 0).all() assert at_all_points_in_time[1][1] != 0 assert at_all_points_in_time[2][2] != 0 assert at_all_points_in_time[3][3] != 0 assert at_all_points_in_time[4][4] != 0 assert 0.95 <= results.average_online_performance.objective # TODO: Fix this: Should be getting way better than this, even when just # debugging. assert 0.15 <= results.average_final_performance.objective @pytest.mark.parametrize( "non_cl_setting_fn", [ partial(ClassIncrementalSetting, nb_tasks=1), MultiTaskSLSetting, TraditionalSLSetting, TraditionalRLSetting, partial(IncrementalRLSetting, nb_tasks=1), partial(TaskIncrementalRLSetting, nb_tasks=1), ], ) def test_raises_warning_when_applied_to_non_cl_setting(self, non_cl_setting_fn): """When applied onto a non-CL setting like IID or Multi-Task SL (or RL), the EWCMethod should raise a warning, and disable the auxiliary task. """ method = EwcMethod() setting = non_cl_setting_fn() with pytest.warns(RuntimeWarning): method.configure(setting) ================================================ FILE: sequoia/methods/experience_replay.py ================================================ """ Method that uses a replay buffer to prevent forgetting. TODO: Refactor this to be based on the BaseMethod, possibly using an auxiliary task for the Replay. """ from argparse import ArgumentParser, Namespace from collections.abc import Iterable from dataclasses import dataclass from typing import Any, Dict, List, Optional, Tuple, Type import gym import numpy as np import torch import torch.nn as nn import torch.nn.functional as F import torchvision.models as models import tqdm from gym import spaces from torch import Tensor from torchvision.models import ResNet from wandb.wandb_run import Run from sequoia.methods import register_method from sequoia.settings import ClassIncrementalSetting from sequoia.settings.base import Actions, Environment, Method, Observations from sequoia.settings.sl.continual.setting import smart_class_prediction from sequoia.utils import get_logger logger = get_logger(__name__) @register_method @dataclass class ExperienceReplayMethod(Method, target_setting=ClassIncrementalSetting): """Simple method that uses a replay buffer to reduce forgetting.""" def __init__( self, learning_rate: float = 1e-3, buffer_capacity: int = 200, max_epochs_per_task: int = 10, weight_decay: float = 1e-6, seed: int = None, ): self.learning_rate = learning_rate self.weight_decay = weight_decay self.buffer_capacity = buffer_capacity self.net: ResNet self.buffer: Optional[Buffer] = None self.optim: torch.optim.Optimizer self.task: int = 0 self.rng = np.random.default_rng(seed) self.seed = seed if seed: torch.manual_seed(seed) torch.set_deterministic(True) self.epochs_per_task: int = max_epochs_per_task self.early_stop_patience: int = 2 self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") def configure(self, setting: ClassIncrementalSetting): self.setting = setting # create the model self.net = models.resnet18(pretrained=False) self.net.fc = nn.Linear(512, setting.action_space.n) if torch.cuda.is_available(): self.net = self.net.to(device=self.device) # Set drop_last to True, to avoid getting a batch of size 1, which makes # batchnorm raise an error. setting.drop_last = True image_space: spaces.Box = setting.observation_space["x"] # Create the buffer. if self.buffer_capacity: self.buffer = Buffer( capacity=self.buffer_capacity, input_shape=image_space.shape, extra_buffers={"t": torch.LongTensor}, rng=self.rng, ).to(device=self.device) # Create the optimizer. self.optim = torch.optim.Adam( self.net.parameters(), lr=self.learning_rate, weight_decay=self.weight_decay, ) def fit(self, train_env: Environment, valid_env: Environment): self.net.train() # Simple example training loop, not using the validation loader. best_val_loss = np.inf best_epoch = 0 for epoch in range(self.epochs_per_task): train_pbar = tqdm.tqdm(train_env, desc=f"Training Epoch {epoch}") postfix = {} obs: ClassIncrementalSetting.Observations rew: ClassIncrementalSetting.Rewards for i, (obs, rew) in enumerate(train_pbar): self.optim.zero_grad() obs = obs.to(device=self.device) x = obs.x # FIXME: Batch norm will cause a crash if we pass x with batch_size==1! fake_batch = False if x.shape[0] == 1: # Pretend like this has batch_size of 2 rather than just 1. x = x.tile([2, *(1 for _ in x.shape[1:])]) x[1] += 1 # Just so the two samples aren't identical, otherwise # maybe the batch norm std would be nan or something. fake_batch = True logits = self.net(x) if fake_batch: logits = logits[:1] # Drop the 'fake' second item. if rew is None: # If our online training performance is being measured, we might # need to provide actions before we can get the corresponding # rewards (image labels in this case). y_pred = logits.argmax(1) rew = train_env.send(y_pred) rew = rew.to(device=self.device) y = rew.y loss = F.cross_entropy(logits, y) postfix["loss"] = loss.detach().item() if self.task > 0 and self.buffer: b_samples = self.buffer.sample(x.size(0)) b_logits = self.net(b_samples["x"]) loss_replay = F.cross_entropy(b_logits, b_samples["y"]) loss += loss_replay postfix["replay loss"] = loss_replay.detach().item() loss.backward() self.optim.step() train_pbar.set_postfix(postfix) # Only add new samples to the buffer (only during first epoch). if self.buffer and epoch == 0: self.buffer.add_reservoir({"x": x, "y": y, "t": self.task}) # Validation loop: self.net.eval() torch.set_grad_enabled(False) val_pbar = tqdm.tqdm(valid_env) val_pbar.set_description(f"Validation Epoch {epoch}") epoch_val_loss = 0.0 epoch_val_loss_list: List[float] = [] for i, (obs, rew) in enumerate(val_pbar): obs = obs.to(device=self.device) x = obs.x logits = self.net(x) if rew is None: y_pred = logits.argmax(-1) rew = valid_env.send(y_pred) assert rew is not None rew = rew.to(device=self.device) y = rew.y val_loss = F.cross_entropy(logits, y).item() epoch_val_loss_list.append(val_loss) postfix["validation loss"] = val_loss val_pbar.set_postfix(postfix) torch.set_grad_enabled(True) epoch_val_loss_mean = np.mean(epoch_val_loss_list) if epoch_val_loss_mean < best_val_loss: best_val_loss = epoch_val_loss_mean best_epoch = epoch if epoch - best_epoch > self.early_stop_patience: print(f"Early stopping at epoch {epoch}.") # TODO: Reload the weights from the best epoch. break def get_actions(self, observations: Observations, action_space: gym.Space) -> Actions: observations = observations.to(device=self.device) task_labels = observations.task_labels logits = self.net(observations.x) if task_labels is not None: y_pred = smart_class_prediction( logits=logits, task_labels=task_labels, setting=self.setting, train=False, ) else: y_pred = logits.argmax(1) return self.setting.Actions(y_pred=y_pred) def on_task_switch(self, task_id: Optional[int]): print(f"Switching from task {self.task} to task {task_id}") if self.training: self.task = task_id @classmethod def add_argparse_args(cls, parser: ArgumentParser) -> None: """Add the command-line arguments for this Method to the given parser. Parameters ---------- parser : ArgumentParser The ArgumentParser. """ parser.add_argument("--learning_rate", type=float, default=1e-3) parser.add_argument("--weight_decay", type=float, default=1e-6) parser.add_argument("--buffer_capacity", type=int, default=200) parser.add_argument("--max_epochs_per_task", type=int, default=10) parser.add_argument("--seed", type=int, default=None, help="Random seed") @classmethod def from_argparse_args(cls, args: Namespace, dest: str = None): """Extract the parsed command-line arguments from the namespace and return an instance of class `cls`. Parameters ---------- args : Namespace The namespace containing all the parsed command-line arguments. dest : str, optional The , by default None Returns ------- cls An instance of the class `cls`. """ args = args if not dest else getattr(args, dest) return cls( learning_rate=args.learning_rate, buffer_capacity=args.buffer_capacity, max_epochs_per_task=args.max_epochs_per_task, weight_decay=args.weight_decay, seed=args.seed, ) def get_search_space(self, setting: ClassIncrementalSetting) -> Dict: return { "learning_rate": "loguniform(1e-4, 5e-1, default_value=1e-3)", "buffer_capacity": "uniform(1000, 100_000, default_value=10_000, discrete=True)", "weight_decay": "loguniform(1e-12, 1e-3, default_value=1e-6)", "early_stop_patience": "uniform(0, 2, default_value=1, discrete=True)", } def adapt_to_new_hparams(self, new_hparams: Dict[str, Any]) -> None: """Adapts the Method when it receives new Hyper-Parameters to try for a new run. It is required that this method be implemented if you want to perform HPO sweeps with Orion. NOTE: It is very strongly recommended that you always re-create your model and any modules / components that depend on these hyper-parameters inside the `configure` method! (Otherwise these new hyper-parameters will not be used in the next run) Parameters ---------- new_hparams : Dict[str, Any] The new hyper-parameters being recommended by the HPO algorithm. These will have the same structure as the search space. """ # Here we overwrite the corresponding attributes with the new suggested values # leaving other fields unchanged. # NOTE: These new hyper-paramers will be used in the next run in the sweep, # since each call to `configure` will create a new Model. self.learning_rate = new_hparams["learning_rate"] self.weight_decay = new_hparams["weight_decay"] self.buffer_capacity = new_hparams["buffer_capacity"] def setup_wandb(self, run: Run) -> None: """Called by the Setting when using Weights & Biases, after `wandb.init`. This method is here to provide Methods with the opportunity to log some of their configuration options or hyper-parameters to wandb. NOTE: The Setting has already set the `"setting"` entry in the `wandb.config` by this point. Parameters ---------- run : wandb.Run Current wandb Run. """ run.config.update( dict( learning_rate=self.learning_rate, weight_decay=self.weight_decay, buffer_capacity=self.buffer_capacity, epochs_per_task=self.epochs_per_task, seed=self.seed, ) ) class Buffer(nn.Module): def __init__( self, capacity: int, input_shape: Tuple[int, ...], extra_buffers: Dict[str, Type[torch.Tensor]] = None, rng: np.random.RandomState = None, ): super().__init__() self.rng = rng or np.random.RandomState() bx = torch.zeros([capacity, *input_shape], dtype=torch.float) by = torch.zeros([capacity], dtype=torch.long) self.register_buffer("bx", bx) self.register_buffer("by", by) self.buffers = ["bx", "by"] extra_buffers = extra_buffers or {} for name, dtype in extra_buffers.items(): tmp = dtype(capacity).fill_(0) self.register_buffer(f"b{name}", tmp) self.buffers += [f"b{name}"] self.current_index = 0 self.n_seen_so_far = 0 self.is_full = 0 # (@lebrice) args isn't defined here: # self.to_one_hot = lambda x : x.new(x.size(0), args.n_classes).fill_(0).scatter_(1, x.unsqueeze(1), 1) self.arange_like = lambda x: torch.arange(x.size(0)).to(x.device) self.shuffle = lambda x: x[torch.randperm(x.size(0))] @property def x(self): return self.bx[: self.current_index] @property def y(self): raise NotImplementedError("Can't make y one-hot, dont have n_classes.") return self.to_one_hot(self.by[: self.current_index]) def add_reservoir(self, batch: Dict[str, Tensor]) -> None: n_elem = batch["x"].size(0) # add whatever still fits in the buffer place_left = max(0, self.bx.size(0) - self.current_index) if place_left: offset = min(place_left, n_elem) for name, data in batch.items(): buffer = getattr(self, f"b{name}") if isinstance(data, Iterable): buffer[self.current_index : self.current_index + offset].data.copy_( data[:offset] ) else: buffer[self.current_index : self.current_index + offset].fill_(data) self.current_index += offset self.n_seen_so_far += offset # everything was added if offset == batch["x"].size(0): return x = batch["x"] self.place_left = False indices = ( torch.FloatTensor(x.size(0) - place_left) .to(x.device) .uniform_(0, self.n_seen_so_far) .long() ) valid_indices: Tensor = (indices < self.bx.size(0)).long() idx_new_data = valid_indices.nonzero(as_tuple=False).squeeze(-1) idx_buffer = indices[idx_new_data] self.n_seen_so_far += x.size(0) if idx_buffer.numel() == 0: return # perform overwrite op for name, data in batch.items(): buffer = getattr(self, f"b{name}") if isinstance(data, Iterable): data = data[place_left:] buffer[idx_buffer] = data[idx_new_data] else: buffer[idx_buffer] = data def sample(self, n_samples: int, exclude_task: int = None) -> Dict[str, Tensor]: buffers = {} if exclude_task is not None: assert hasattr(self, "bt") valid_indices = (self.bt != exclude_task).nonzero().squeeze() for buffer_name in self.buffers: buffers[buffer_name] = getattr(self, buffer_name)[valid_indices] else: for buffer_name in self.buffers: buffers[buffer_name] = getattr(self, buffer_name)[: self.current_index] bx = buffers["bx"] if bx.size(0) < n_samples: return buffers else: indices_np = self.rng.choice(bx.size(0), n_samples, replace=False) indices = torch.from_numpy(indices_np).to(self.bx.device) return {k[1:]: v[indices] for (k, v) in buffers.items()} if __name__ == "__main__": ExperienceReplayMethod.main() ================================================ FILE: sequoia/methods/experience_replay_test.py ================================================ from typing import ClassVar, Dict, Type import pytest from sequoia.common.config import Config from sequoia.conftest import slow from sequoia.methods.method_test import MethodTests from sequoia.settings.sl import ClassIncrementalSetting, SLSetting from .experience_replay import ExperienceReplayMethod class TestExperienceReplay(MethodTests): Method: ClassVar[Type[ExperienceReplayMethod]] = ExperienceReplayMethod method_debug_kwargs: ClassVar[Dict] = {"buffer_capacity": 100, "max_epochs_per_task": 1} @classmethod @pytest.fixture def method(cls, config: Config) -> ExperienceReplayMethod: """Fixture that returns the Method instance to use when testing/debugging.""" return cls.Method(**cls.method_debug_kwargs) def validate_results( self, setting: SLSetting, method: ExperienceReplayMethod, results: SLSetting.Results, ) -> None: assert results assert results.objective @slow @pytest.mark.timeout(300) def test_class_incremental_mnist(self, config: Config): method = ExperienceReplayMethod(buffer_capacity=200, max_epochs_per_task=1) setting = ClassIncrementalSetting( dataset="mnist", monitor_training_performance=True, ) results = setting.apply(method, config=config) assert 0.90 <= results.average_online_performance.objective assert 0.70 <= results.final_performance_metrics[0].objective assert 0.70 <= results.final_performance_metrics[1].objective assert 0.70 <= results.final_performance_metrics[2].objective assert 0.70 <= results.final_performance_metrics[3].objective assert 0.70 <= results.final_performance_metrics[4].objective assert 0.80 <= results.average_final_performance.objective ================================================ FILE: sequoia/methods/hat.py ================================================ """ Hard Attention to the Task ``` @inproceedings{serra2018overcoming, title={Overcoming Catastrophic Forgetting with Hard Attention to the Task}, author={Serra, Joan and Suris, Didac and Miron, Marius and Karatzoglou, Alexandros}, booktitle={International Conference on Machine Learning}, pages={4548--4557}, year={2018} } ``` """ from argparse import Namespace from dataclasses import dataclass from typing import Any, Dict, Mapping, NamedTuple, Optional, Tuple, Union import gym import numpy as np import torch import tqdm from numpy import inf from simple_parsing import ArgumentParser from torch import Tensor from wandb.wandb_run import Run from sequoia.common import Config from sequoia.common.hparams import HyperParameters, categorical, log_uniform, uniform from sequoia.common.spaces import Image from sequoia.methods import register_method from sequoia.settings import Environment, Method, Setting from sequoia.settings.sl import TaskIncrementalSLSetting from sequoia.settings.sl.environment import PassiveEnvironment from sequoia.settings.sl.incremental.objects import Actions, Observations, Rewards class Masks(NamedTuple): """Named tuple for the masked tensors created in the HATNet.""" gc1: Tensor gc2: Tensor gc3: Tensor gfc1: Tensor gfc2: Tensor class HatNet(torch.nn.Module): """ @inproceedings{serra2018overcoming, title={Overcoming Catastrophic Forgetting with Hard Attention to the Task}, author={Serra, Joan and Suris, Didac and Miron, Marius and Karatzoglou, Alexandros}, booktitle={International Conference on Machine Learning}, pages={4548--4557}, year={2018} } The model is where the model weights are initialized. Just like a classic PyTorch, here the different layers and components of the model are defined. """ def __init__(self, image_space: Image, n_classes_per_task: Dict[int, int], s_hat: int = 50): super().__init__() ncha = image_space.channels size = image_space.width self.n_classes_per_task = n_classes_per_task self.s_hat = s_hat self.c1 = torch.nn.Conv2d(ncha, 64, kernel_size=size // 8) s = compute_conv_output_size(size, size // 8) s //= 2 self.c2 = torch.nn.Conv2d(64, 128, kernel_size=size // 10) s = compute_conv_output_size(s, size // 10) s //= 2 self.c3 = torch.nn.Conv2d(128, 256, kernel_size=2) s = compute_conv_output_size(s, 2) s //= 2 self.smid = s self.maxpool = torch.nn.MaxPool2d(2) self.relu = torch.nn.ReLU() self.drop1 = torch.nn.Dropout(0.2) self.drop2 = torch.nn.Dropout(0.5) self.fc1 = torch.nn.Linear(256 * self.smid * self.smid, 2048) self.fc2 = torch.nn.Linear(2048, 2048) self.output_layers = torch.nn.ModuleList() n_tasks = len(self.n_classes_per_task) # TODO: (@lebrice) Here I'm 'fixing' this, by making it so each output head has # as many outputs as there are classes in total. It's not super efficient, but # it should work. total_classes = sum(self.n_classes_per_task.values()) for task_index, n_classes_in_task in self.n_classes_per_task.items(): self.output_layers.append(torch.nn.Linear(2048, total_classes)) self.gate = torch.nn.Sigmoid() # All embedding stuff should start with 'e' self.ec1 = torch.nn.Embedding(n_tasks, 64) self.ec2 = torch.nn.Embedding(n_tasks, 128) self.ec3 = torch.nn.Embedding(n_tasks, 256) self.efc1 = torch.nn.Embedding(n_tasks, 2048) self.efc2 = torch.nn.Embedding(n_tasks, 2048) self.flatten = torch.nn.Flatten() self.loss = torch.nn.CrossEntropyLoss() self.current_task: Optional[int] = 0 def forward(self, observations: TaskIncrementalSLSetting.Observations) -> Tuple[Tensor, Masks]: observations.as_list_of_tuples() x = observations.x t = observations.task_labels # BUG: This won't work if task_labels is None (which is the case at # test-time in the ClassIncrementalSetting) masks = self.mask(t, s_hat=self.s_hat) gc1, gc2, gc3, gfc1, gfc2 = masks # Gated h = self.maxpool(self.drop1(self.relu(self.c1(x)))) h = h * gc1.unsqueeze(2).unsqueeze(3) h = self.maxpool(self.drop1(self.relu(self.c2(h)))) h = h * gc2.unsqueeze(2).unsqueeze(3) h = self.maxpool(self.drop2(self.relu(self.c3(h)))) h = h * gc3.unsqueeze(2).unsqueeze(3) h = self.flatten(h) h = self.drop2(self.relu(self.fc1(h))) h = h * gfc1.expand_as(h) h = self.drop2(self.relu(self.fc2(h))) h = h * gfc2.expand_as(h) # Each batch can have elements of more than one Task (in test) # In Task Incremental Learning, each task have it own classification head. y: Optional[Tensor] = None task_masks = {} for task_id in set(t.tolist()): task_mask = t == task_id task_masks[task_id] = task_mask y_pred_t = self.output_layers[task_id](h.clone()) if y is None: y = y_pred_t else: y[task_mask] = y_pred_t[task_mask] assert y is not None return y, masks def mask(self, t: Tensor, s_hat: float) -> Masks: gc1 = self.gate(s_hat * self.ec1(t)) gc2 = self.gate(s_hat * self.ec2(t)) gc3 = self.gate(s_hat * self.ec3(t)) gfc1 = self.gate(s_hat * self.efc1(t)) gfc2 = self.gate(s_hat * self.efc2(t)) return Masks(gc1, gc2, gc3, gfc1, gfc2) def shared_step( self, batch: Tuple[Observations, Optional[Rewards]], environment: Environment ) -> Tuple[Tensor, Dict]: """Shared step used for both training and validation. Parameters ---------- batch : Tuple[Observations, Optional[Rewards]] Batch containing Observations, and optional Rewards. When the Rewards are None, it means that we'll need to provide the Environment with actions before we can get the Rewards (e.g. image labels) back. This happens for example when being applied in a Setting which cares about sample efficiency or training performance, for example. environment : Environment The environment we're currently interacting with. Used to provide the rewards when they aren't already part of the batch, for example when our performance is being monitored during training. Returns ------- Tuple[Tensor, Dict] The Loss tensor, and a dict of metrics to be logged. """ # Since we're training on a Passive environment, we will get both observations # and rewards, unless we're being evaluated based on our training performance, # in which case we will need to send actions to the environments before we can # get the corresponding rewards (image labels) back. observations: Observations = batch[0] rewards: Optional[Rewards] = batch[1] # Get the predictions: logits, _ = self(observations) y_pred = logits.argmax(-1) if rewards is None: # If the rewards in the batch were None, it means we're expected to give # actions before we can get rewards back from the environment. # This happens when the Setting is monitoring our training performance. rewards = environment.send(Actions(y_pred)) assert rewards is not None image_labels = rewards.y loss = self.loss(logits, image_labels) accuracy = (y_pred == image_labels).sum().float() / len(image_labels) metrics_dict = {"accuracy": accuracy} return loss, metrics_dict def compute_conv_output_size( Lin: int, kernel_size: int, stride: int = 1, padding: int = 0, dilation: int = 1 ) -> int: return int(np.floor((Lin + 2 * padding - dilation * (kernel_size - 1) - 1) / float(stride) + 1)) @register_method class HatMethod(Method, target_setting=TaskIncrementalSLSetting): """Hard Attention to the Task ``` @inproceedings{serra2018overcoming, title={Overcoming Catastrophic Forgetting with Hard Attention to the Task}, author={Serra, Joan and Suris, Didac and Miron, Marius and Karatzoglou, Alexandros}, booktitle={International Conference on Machine Learning}, pages={4548--4557}, year={2018} } ``` """ @dataclass class HParams(HyperParameters): """Hyper-parameters of the Settings.""" # Learning rate of the optimizer. learning_rate: float = log_uniform(1e-6, 1e-2, default=0.001) # Batch size batch_size: int = categorical(16, 32, 64, 128, default=128) # weight/importance of the task embedding to the gate function s_hat: float = uniform(1.0, 100.0, default=50.0) # Maximum number of training epochs per task max_epochs_per_task: int = uniform(1, 20, default=10, discrete=True) def __init__(self, hparams: HParams = None): self.hparams: HatMethod.HParams = hparams or self.HParams() self.early_stopping_patience = 2 # We will create those when `configure` will be called, before training. self.model: HatNet self.optimizer: torch.optim.Optimizer def configure(self, setting: TaskIncrementalSLSetting): """Called before the method is applied on a setting (before training). You can use this to instantiate your model, for instance, since this is where you get access to the observation & action spaces. """ setting.batch_size = self.hparams.batch_size assert ( setting.increment == setting.test_increment ), "Assuming same number of classes per task for training and testing." n_classes_per_task = { i: setting.num_classes_in_task(i, train=True) for i in range(setting.nb_tasks) } image_space: Image = setting.observation_space["x"] self.model = HatNet( image_space=image_space, n_classes_per_task=n_classes_per_task, s_hat=self.hparams.s_hat, ) self.optimizer = torch.optim.Adam( self.model.parameters(), lr=self.hparams.learning_rate, ) def fit(self, train_env: PassiveEnvironment, valid_env: PassiveEnvironment): """ Train loop Different Settings can return elements from tasks in an other way, be it class incremental, task incremental, etc. Batch can have information about en environment, rewards, input, task labels, etc. And we call the forward training function of our method, independent of the settings """ # configure() will have been called by the setting before we get here, best_val_loss = inf best_epoch = 0 for epoch in range(self.hparams.max_epochs_per_task): self.model.train() print(f"Starting epoch {epoch}") # Training loop: with tqdm.tqdm(train_env) as train_pbar: postfix = {} train_pbar.set_description(f"Training Epoch {epoch}") for i, batch in enumerate(train_pbar): loss, metrics_dict = self.model.shared_step( batch, environment=train_env, ) self.optimizer.zero_grad() loss.backward() self.optimizer.step() postfix.update(metrics_dict) train_pbar.set_postfix(postfix) # Validation loop: self.model.eval() torch.set_grad_enabled(False) with tqdm.tqdm(valid_env) as val_pbar: postfix = {} val_pbar.set_description(f"Validation Epoch {epoch}") epoch_val_loss = 0.0 for i, batch in enumerate(val_pbar): batch_val_loss, metrics_dict = self.model.shared_step( batch, environment=valid_env, ) epoch_val_loss += batch_val_loss postfix.update(metrics_dict, val_loss=epoch_val_loss) val_pbar.set_postfix(postfix) torch.set_grad_enabled(True) if epoch_val_loss < best_val_loss: best_val_loss = epoch_val_loss best_epoch = i elif epoch - best_epoch > self.early_stopping_patience: print(f"Early stopping at epoch {epoch}") break def get_actions(self, observations: Observations, action_space: gym.Space) -> Actions: """Get a batch of predictions (aka actions) for these observations.""" with torch.no_grad(): logits, _ = self.model(observations) # Get the predicted classes y_pred = logits.argmax(dim=-1) return self.target_setting.Actions(y_pred) def on_task_switch(self, task_id: Optional[int]): # This method gets called if task boundaries are known in the current # setting. Furthermore, if task labels are available, task_id will be # the index of the new task. If not, task_id will be None. # TODO: Does this method actually work when task_id is None? self.model.current_task = task_id @classmethod def add_argparse_args(cls, parser: ArgumentParser) -> None: parser.add_arguments(cls.HParams, dest="hparams") # You can also add arguments as usual: # parser.add_argument("--foo", default=123) @classmethod def from_argparse_args(cls, args: Namespace) -> "HatMethod": hparams: HatMethod.HParams = args.hparams # foo: int = args.foo method = cls(hparams=hparams) return method def get_search_space(self, setting: Setting) -> Mapping[str, Union[str, Dict]]: """Returns the search space to use for HPO in the given Setting. Parameters ---------- setting : Setting The Setting on which the run of HPO will take place. Returns ------- Mapping[str, Union[str, Dict]] An orion-formatted search space dictionary, mapping from hyper-parameter names (str) to their priors (str), or to nested dicts of the same form. """ return self.hparams.get_orion_space() def adapt_to_new_hparams(self, new_hparams: Dict[str, Any]) -> None: """Adapts the Method when it receives new Hyper-Parameters to try for a new run. It is required that this method be implemented if you want to perform HPO sweeps with Orion. Parameters ---------- new_hparams : Dict[str, Any] The new hyper-parameters being recommended by the HPO algorithm. These will have the same structure as the search space. """ # Here we overwrite the corresponding attributes with the new suggested values # leaving other fields unchanged. # NOTE: These new hyper-paramers will be used in the next run in the sweep, # since each call to `configure` will create a new Model. self.hparams = self.hparams.replace(**new_hparams) def setup_wandb(self, run: Run) -> None: """Called by the Setting when using Weights & Biases, after `wandb.init`. This method is here to provide Methods with the opportunity to log some of their configuration options or hyper-parameters to wandb. NOTE: The Setting has already set the `"setting"` entry in the `wandb.config` by this point. Parameters ---------- run : wandb.Run Current wandb Run. """ run.config["hparams"] = self.hparams.to_dict() if __name__ == "__main__": # Example: Evaluate a Method on a single CL setting: parser = ArgumentParser(description=__doc__, add_dest_to_option_strings=False) """ We must define 3 main components: 1.- Setting: It is the continual learning scenario that we are working, SL or RL, TI or CI Each settings has it own parameters that can be customized. 2.- Model: Is the parameters and layers of the model, just like in PyTorch. We can use a predefined model or create your own 3.- Method: It is how we are going to use what the settings give us to train our model. Same as before, we can define our own or use pre-defined Methods. """ # Add arguments for the Method, the Setting, and the Config. # (Config contains options like the log_dir, the data_dir, etc.) HatMethod.add_argparse_args(parser, dest="method") parser.add_arguments(TaskIncrementalSLSetting, dest="setting") parser.add_arguments(Config, "config") args = parser.parse_args() # Create the Method from the args, and extract the Setting, and the Config: method: HatMethod = HatMethod.from_argparse_args(args, dest="method") setting: TaskIncrementalSLSetting = args.setting config: Config = args.config # Apply the method to the setting, optionally passing in a Config, # producing Results. results = setting.apply(method, config=config) print(results.summary()) print(f"objective: {results.objective}") ================================================ FILE: sequoia/methods/method_test.py ================================================ from abc import ABC, abstractmethod from dataclasses import dataclass from typing import ClassVar, Dict, Type, TypeVar import pytest from sequoia.common.config import Config from sequoia.conftest import config, session_config from sequoia.settings import RLSetting, Setting, SLSetting from sequoia.settings.base import Method from sequoia.settings.sl.continual.setting import random_subset def key_fn(setting_class: Type[Setting]): # order tests in terms of their 'depth' in the tree, and break ties arbitrarily # based on the name. return (len(setting_class.parents()), setting_class.__name__) def make_setting_type_fixture(method_type: Type[Method]) -> pytest.fixture: """Create a parametrized fixture that will go through all the applicable settings for a given method. """ def setting_type(self, request): setting_type = request.param return setting_type setting_types = set(method_type.get_applicable_settings()) settings_to_remove = set([Setting, SLSetting, RLSetting]) # NOTE: Need to make a deterministic ordering of settings, otherwise we can't # parallelize tests with pytest-xdist setting_types = sorted(list(setting_types - settings_to_remove), key=key_fn) return pytest.fixture( params=setting_types, scope="module", )(setting_type) MethodType = TypeVar("MethodType", bound=Method) class MethodTests(ABC): """Base class that can be extended to generate tests for a method. The main test of interest is `test_debug`. """ Method: ClassVar[Type[MethodType]] setting_type: pytest.fixture # Kwargs to pass when contructing the Settings. setting_kwargs: ClassVar[Dict] = {} method_debug_kwargs: ClassVar[Dict] = {} def __init_subclass__(cls, method: Type[MethodType] = None): """Dynamically generates a `setting_type` fixture on the subclass, which will be parametrized by the settings that the Method is applicable to. """ super().__init_subclass__() if not method and not hasattr(cls, "Method"): raise RuntimeError( "Need to either pass `method` when subclassing or set " "a 'Method' class attribute." ) cls.Method = cls.Method or method cls.setting_type: pytest.fixture = make_setting_type_fixture(cls.Method) @classmethod @abstractmethod @pytest.fixture def method(cls, config: Config) -> MethodType: """Fixture that returns the Method instance to use when testing/debugging. Needs to be implemented when creating a new test class (to generate tests for a new method). """ return cls.Method(**cls.method_debug_kwargs) @abstractmethod def validate_results( self, setting: Setting, method: MethodType, results: Setting.Results, ) -> None: assert results assert results.objective assert results.objective is not None print(results.summary()) # NOTE: Need to re-define these here, just so external packages, which maybe aren't # in the "scope" of `sequoia/conftest.py` can also use them: # Dropping the `self` argument by making those static methods on the class. session_config: pytest.fixture = staticmethod(session_config) config: pytest.fixture = staticmethod(config) @pytest.fixture(scope="module") def setting(self, setting_type: Type[Setting], session_config: Config): # TODO: Fix this test setup, nb_tasks should be something low like 2, and # perhaps use max_episode_steps to limit episode length if issubclass(setting_type, SLSetting): setting_kwargs = dict( nb_tasks=5, config=session_config, ) setting_kwargs.setdefault("monitor_training_performance", True) # TODO: Do we also want to parameterize the dataset? or is it too much? setting_kwargs.update(self.setting_kwargs) setting = setting_type( **setting_kwargs, ) assert setting.dataset, setting_kwargs setting.config = session_config setting.batch_size = 10 setting.prepare_data() setting.setup() nb_tasks = 5 samples_per_task = 50 # Testing this out: Shortening the train datasets: setting.train_datasets = [ random_subset(task_dataset, samples_per_task) for task_dataset in setting.train_datasets ] setting.val_datasets = [ random_subset(task_dataset, samples_per_task) for task_dataset in setting.val_datasets ] setting.test_datasets = [ random_subset(task_dataset, samples_per_task) for task_dataset in setting.test_datasets ] assert len(setting.train_datasets) == nb_tasks assert len(setting.val_datasets) == nb_tasks assert len(setting.test_datasets) == nb_tasks assert all(len(dataset) == samples_per_task for dataset in setting.train_datasets) assert all(len(dataset) == samples_per_task for dataset in setting.val_datasets) assert all(len(dataset) == samples_per_task for dataset in setting.test_datasets) # Assert that calling setup doesn't overwrite the datasets. setting.setup() assert len(setting.train_datasets) == nb_tasks assert len(setting.val_datasets) == nb_tasks assert len(setting.test_datasets) == nb_tasks assert all(len(dataset) == samples_per_task for dataset in setting.train_datasets) assert all(len(dataset) == samples_per_task for dataset in setting.val_datasets) assert all(len(dataset) == samples_per_task for dataset in setting.test_datasets) else: # RL setting: setting_kwargs = dict( nb_tasks=2, train_max_steps=1_000, test_max_steps=1_000, # train_steps_per_task=2_000, # test_steps_per_task=1_000, config=session_config, ) # TODO: Do we also want to parameterize the dataset? or is it too much? setting_kwargs.update(self.setting_kwargs) setting = setting_type( **setting_kwargs, ) yield setting def test_debug(self, method: MethodType, setting: Setting, config: Config): """Apply the Method onto a setting, and validate the results.""" results: Setting.Results = setting.apply(method, config=config) self.validate_results(setting=setting, method=method, results=results) @dataclass class NewSetting(Setting): pass @dataclass class NewMethod(Method, target_setting=NewSetting): def fit(self, train_env, valid_env): pass def get_actions(self, observations, action_space): return action_space.sample() def test_passing_arg_to_class_constructor_works(): assert NewMethod.target_setting is NewSetting assert NewMethod().target_setting is NewSetting @pytest.mark.xfail(reason="Not sure this is necessary.") def test_cant_change_target_setting(): with pytest.raises(AttributeError): NewMethod.target_setting = NewSetting with pytest.raises(AttributeError): NewMethod().target_setting = NewSetting def test_target_setting_is_inherited(): @dataclass class NewMethod2(NewMethod): pass assert NewMethod2.target_setting is NewSetting @dataclass class SettingA(Setting): pass @dataclass class SettingA1(SettingA): pass @dataclass class SettingA2(SettingA): pass @dataclass class SettingB(Setting): pass class MethodA(Method, target_setting=SettingA): def fit(self, train_env, valid_env): pass def get_actions(self, observations, action_space): return action_space.sample() class MethodB(Method, target_setting=SettingB): def fit(self, train_env, valid_env): pass def get_actions(self, observations, action_space): return action_space.sample() class CoolGeneralMethod(Method, target_setting=Setting): def fit(self, train_env, valid_env): pass def get_actions(self, observations, action_space): return action_space.sample() def test_method_is_applicable_to_setting(): """Test the mechanism for determining if a method is applicable for a given setting. Uses the mock hierarchy created above: - Setting - SettingA - SettingA1 - SettingA2 - SettingB - Method - MethodA (target_setting: SettingA) - MethodB (target_setting: SettingA) TODO: if we ever end up registering the method classes when declaring them, then we will need to check that this dummy test hierarchy doesn't actually show up in the real setting options. """ # A Method designed for `SettingA` ISN'T applicable on the root node # `Setting`: assert not MethodA.is_applicable(Setting) # A Method designed for `SettingA` IS applicable on the target node, and all # nodes below it in the tree: assert MethodA.is_applicable(SettingA) assert MethodA.is_applicable(SettingA1) assert MethodA.is_applicable(SettingA2) # A Method designed for `SettingA` ISN'T applicable on some other branch in # the tree: assert not MethodA.is_applicable(SettingB) # Same for Method designed for `SettingB` assert MethodB.is_applicable(SettingB) assert not MethodB.is_applicable(Setting) assert not MethodB.is_applicable(SettingA) assert not MethodB.is_applicable(SettingA1) assert not MethodB.is_applicable(SettingA2) def test_is_applicable_also_works_on_instances(): assert MethodA().is_applicable(SettingA) assert MethodA.is_applicable(SettingA()) assert MethodA().is_applicable(SettingA()) assert not MethodA().is_applicable(SettingB) assert not MethodA.is_applicable(SettingB()) assert not MethodA().is_applicable(SettingB()) ================================================ FILE: sequoia/methods/models/__init__.py ================================================ # from .actor_critic_agent import ActorCritic # from .agent import Agent from .base_model import BaseModel, Model, available_encoders, available_optimizers from .forward_pass import ForwardPass from .output_heads import ClassificationHead, OutputHead, RegressionHead ================================================ FILE: sequoia/methods/models/base_model/__init__.py ================================================ """ This module defines the `BaseModel` used by the `BaseMethod`. Output heads are available for both Supervised and Reinforcement Learning, and can be found in `sequoia.methods.models.output_heads`. Instead of defining the `Model` in one large file, it is instead split into a base class (`Model`, defined in `model.py`) on top of which a few "mixins" are added, each of which adds additional functionality: - [SemiSupervisedModel](self_supervised_model.py): Adds support for semi-supervised (partially labeled or un-labeled) training, by splitting up partially labeled batches into a fully labeled sub-batch and a fully unlabeled sub-batch. - [MultiHeadModel](multihead_model.py): Adds support for: - multi-head prediction: Using a dedicated output head for each task when task labels are available - Mixed batches (data coming from more than one task within the same batch) - TODO: Task inference: When task labels aren't available, perform some task inference in order to choose which output head to use. - [SelfSupervisedModel](self_supervised_model.py): Adds methods for adding self-supervised losses to the model using different Auxiliary Tasks. The `BaseModel` is then formed by inheriting from each of these mixins. """ from .base_model import BaseModel # TODO: Maybe the naming of these could be a bit better: Model seems more 'general' than BaseModel. from .model import Model, available_encoders, available_optimizers from .multihead_model import MultiHeadModel from .self_supervised_model import SelfSupervisedModel from .semi_supervised_model import SemiSupervisedModel ================================================ FILE: sequoia/methods/models/base_model/base_model.py ================================================ """ Example/Template of a Model to be used as part of a Method. You can use this as a base class when creating your own models, or you can start from scratch, whatever you like best. """ from dataclasses import dataclass from typing import ClassVar, Dict, Generic, Optional, Tuple, Type, TypeVar import numpy as np import torch from simple_parsing import choice, mutable_field from torch import Tensor, nn, optim from torch.optim.optimizer import Optimizer from torchvision import models as tv_models from sequoia.common.config import Config from sequoia.common.hparams import categorical, log_uniform from sequoia.methods.aux_tasks.auxiliary_task import AuxiliaryTask from sequoia.methods.models.output_heads import OutputHead, PolicyHead from sequoia.methods.models.simple_convnet import SimpleConvNet from sequoia.settings import Environment, Observations, Rewards, Setting from sequoia.settings.assumptions.incremental import IncrementalAssumption from sequoia.utils.logging_utils import get_logger from .model import ForwardPass from .multihead_model import MultiHeadModel from .self_supervised_model import SelfSupervisedModel from .semi_supervised_model import SemiSupervisedModel torch.autograd.set_detect_anomaly(True) logger = get_logger(__name__) SettingType = TypeVar("SettingType", bound=IncrementalAssumption) class BaseModel(SemiSupervisedModel, MultiHeadModel, SelfSupervisedModel, Generic[SettingType]): """Base model LightningModule (nn.Module extended by pytorch-lightning) This model splits the learning task into a representation-learning problem and a downstream task (output head) applied on top of it. The most important method to understand is the `get_loss` method, which is used by the [train/val/test]_step methods which are called by pytorch-lightning. """ @dataclass class HParams(SemiSupervisedModel.HParams, SelfSupervisedModel.HParams, MultiHeadModel.HParams): """HParams of the Model.""" # NOTE: All the fields below were just copied from the BaseHParams class, just # to improve visibility a bit. # Class variables that hold the available optimizers and encoders. # NOTE: These don't get parsed from the command-line. available_optimizers: ClassVar[Dict[str, Type[Optimizer]]] = { "sgd": optim.SGD, "adam": optim.Adam, "rmsprop": optim.RMSprop, } # Which optimizer to use. optimizer: Type[Optimizer] = categorical(available_optimizers, default=optim.Adam) available_encoders: ClassVar[Dict[str, Type[nn.Module]]] = { "vgg16": tv_models.vgg16, "resnet18": tv_models.resnet18, "resnet34": tv_models.resnet34, "resnet50": tv_models.resnet50, "resnet101": tv_models.resnet101, "resnet152": tv_models.resnet152, "alexnet": tv_models.alexnet, "densenet": tv_models.densenet161, # TODO: Add the self-supervised pl modules here! "simple_convnet": SimpleConvNet, } # Which encoder to use. encoder: Type[nn.Module] = choice( available_encoders, default=SimpleConvNet, # # TODO: Only considering these two for now when performing an HPO sweep. # probabilities={"resnet18": 0., "simple_convnet": 1.0}, ) # Learning rate of the optimizer. learning_rate: float = log_uniform(1e-6, 1e-2, default=1e-3) # L2 regularization term for the model weights. weight_decay: float = log_uniform(1e-12, 1e-3, default=1e-6) # Batch size to use during training and evaluation. batch_size: Optional[int] = None # Number of hidden units (before the output head). # When left to None (default), the hidden size from the pretrained # encoder model will be used. When set to an integer value, an # additional Linear layer will be placed between the outputs of the # encoder in order to map from the encoder's output size H_e # to this new hidden size `new_hidden_size`. new_hidden_size: Optional[int] = None # Retrain the encoder from scratch or start from pretrained weights. train_from_scratch: bool = False # Wether we should keep the weights of the encoder frozen. freeze_pretrained_encoder_weights: bool = False # Hyper-parameters of the output head. output_head: OutputHead.HParams = mutable_field(OutputHead.HParams) # Wether the output head should be detached from the representations. # In other words, if the gradients from the downstream task should be # allowed to affect the representations. detach_output_head: bool = False def __init__(self, setting: SettingType, hparams: HParams, config: Config): super().__init__(setting=setting, hparams=hparams, config=config) self.save_hyperparameters({"hparams": self.hp.to_dict(), "config": self.config.to_dict()}) logger.debug(f"setting of type {type(self.setting)}") logger.debug(f"Observation space: {self.observation_space}") logger.debug(f"Action/Output space: {self.action_space}") logger.debug(f"Reward/Label space: {self.reward_space}") if self.config.debug and self.config.verbose: logger.debug("Config:") logger.debug(self.config.dumps(indent="\t")) logger.debug("Hparams:") logger.debug(self.hp.dumps(indent="\t")) for task_name, task in self.tasks.items(): logger.debug("Auxiliary tasks:") assert isinstance( task, AuxiliaryTask ), f"Task {task} should be a subclass of {AuxiliaryTask}." if task.coefficient != 0: logger.debug(f"\t {task_name}: {task.coefficient}") logger.info( f"Enabling the '{task_name}' auxiliary task (coefficient of " f"{task.coefficient})" ) task.enable() from pytorch_lightning.loggers import WandbLogger self.logger: WandbLogger def on_fit_start(self): super().on_fit_start() # NOTE: We could use this to log stuff to wandb. # NOTE: The Setting already logs itself in the `wandb.config` dict. def forward(self, observations: Setting.Observations) -> ForwardPass: # type: ignore """Forward pass of the model. For the given observations, creates a `ForwardPass`, a dict-like object which will hold the observations, the representations and the output head predictions. NOTE: Base implementation is in `model.py`. Parameters ---------- observations : Setting.Observations Observations from one of the environments of a Setting. Returns ------- ForwardPass A dict-like object which holds the observations, representations, and output head predictions (actions). See the `ForwardPass` class for more info. """ # The observations should come from a batched environment. If they are not, we # add a batch dimension, which we will then remove. assert isinstance(observations.x, (Tensor, np.ndarray)) # Check if the observations are batched or not. not_batched = not self._are_batched(observations) if not_batched: observations = observations.with_batch_dimension() forward_pass = super().forward(observations) # Simplified this for now, but we could add more flexibility later. assert isinstance(forward_pass, ForwardPass) # If the original observations didn't have a batch dimension, # Remove the batch dimension from the results. if not_batched: forward_pass = forward_pass.remove_batch_dimension() return forward_pass def create_output_head(self, task_id: Optional[int]) -> OutputHead: """Create an output head for the current action and reward spaces. NOTE: This assumes that the input, action and reward spaces don't change between tasks. Parameters ---------- task_id : Optional[int] ID of the task associated with this new output head. Can be `None`, which is interpreted as saying that either that task labels aren't available, or that this output head will be used for all tasks. Returns ------- OutputHead The new output head for the given task. """ # NOTE: Actual implementation is in `model.py`. This is added here just for # convenience when extending the baseline model. return super().create_output_head(task_id=task_id) def output_head_type(self, setting: SettingType) -> Type[OutputHead]: """Return the type of output head we should use in a given setting.""" # NOTE: Implementation is in `model.py`. return super().output_head_type(setting) @property def automatic_optimization(self) -> bool: return not isinstance(self.output_head, PolicyHead) def training_step( self, batch: Tuple[Observations, Optional[Rewards]], batch_idx: int, environment: Environment = None, dataloader_idx: int = None, optimizer_idx: int = None, ) -> ForwardPass: return super().training_step( batch, batch_idx=batch_idx, environment=environment or self.setting.train_env, dataloader_idx=dataloader_idx, optimizer_idx=optimizer_idx, ) def validation_step( self, batch: Tuple[Observations, Optional[Rewards]], batch_idx: int, environment: Environment = None, dataloader_idx: int = None, ) -> ForwardPass: return super().validation_step( batch, batch_idx=batch_idx, environment=environment or self.setting.val_env, dataloader_idx=dataloader_idx, ) def test_step( self, batch: Tuple[Observations, Optional[Rewards]], batch_idx: int, environment: Environment = None, dataloader_idx: int = None, ) -> ForwardPass: return super().test_step( batch, batch_idx=batch_idx, environment=environment or self.setting.test_env, dataloader_idx=dataloader_idx, ) def shared_step( self, batch: Tuple[Observations, Optional[Rewards]], batch_idx: int, environment: Environment, phase: str, dataloader_idx: int = None, optimizer_idx: int = None, ) -> ForwardPass: return super().shared_step( batch, batch_idx=batch_idx, environment=environment, phase=phase, dataloader_idx=dataloader_idx, optimizer_idx=optimizer_idx, ) def on_task_switch(self, task_id: Optional[int]) -> None: """Called when switching between tasks. Args: task_id (int, optional): the id of the new task. When None, we are basically being informed that there is a task boundary, but without knowing what task we're switching to. """ return super().on_task_switch(task_id) ================================================ FILE: sequoia/methods/models/base_model/model.py ================================================ """Base for the model used by the `BaseMethod`. This model is basically just an encoder and an output head. Both of these can be switched out/customized as needed. """ import dataclasses from dataclasses import dataclass from typing import Any, ClassVar, Dict, Generic, List, Optional, Tuple, Type, TypeVar, Union import gym import numpy as np import torch import torchvision.models as tv_models from gym import Space, spaces from gym.spaces.utils import flatdim from pytorch_lightning import LightningModule from simple_parsing import choice, mutable_field from simple_parsing.helpers.hparams import HyperParameters from simple_parsing.helpers.serialization import register_decoding_fn from torch import Tensor, nn, optim from torch.optim.optimizer import Optimizer # type: ignore from sequoia.common.config import Config from sequoia.common.gym_wrappers.convert_tensors import add_tensor_support from sequoia.common.hparams import HyperParameters, categorical, log_uniform from sequoia.common.loss import Loss from sequoia.common.spaces import Image from sequoia.methods.models.output_heads import OutputHead from sequoia.settings.assumptions.incremental import IncrementalAssumption from sequoia.settings.base import Environment from sequoia.settings.base.setting import Actions, Observations, Rewards from sequoia.settings.rl import ContinualRLSetting, RLSetting from sequoia.settings.sl import SLSetting from sequoia.utils.logging_utils import get_logger from sequoia.utils.pretrained_utils import get_pretrained_encoder from ..fcnet import FCNet from ..forward_pass import ForwardPass from ..output_heads import ( ActorCriticHead, ClassificationHead, OutputHead, PolicyHead, RegressionHead, ) from ..output_heads.rl.episodic_a2c import EpisodicA2C from ..simple_convnet import SimpleConvNet logger = get_logger(__name__) SettingType = TypeVar("SettingType", bound=IncrementalAssumption) available_optimizers: Dict[str, Type[Optimizer]] = { "sgd": optim.SGD, "adam": optim.Adam, "rmsprop": optim.RMSprop, } available_encoders: Dict[str, Type[nn.Module]] = { "vgg16": tv_models.vgg16, "resnet18": tv_models.resnet18, "resnet34": tv_models.resnet34, "resnet50": tv_models.resnet50, "resnet101": tv_models.resnet101, "resnet152": tv_models.resnet152, "alexnet": tv_models.alexnet, "densenet": tv_models.densenet161, # TODO: Add the self-supervised pl modules here! "simple_convnet": SimpleConvNet, } class Model(LightningModule, Generic[SettingType]): """Basic Model to be used by a Method. Based on the `LightningModule` (nn.Module extended by pytorch-lightning). This Model can be trained on either Supervised or Reinforcement Learning environments. This model splits the learning task into a representation-learning problem and a downstream task (output head) applied on top of it. The most important method to understand is the `get_loss` method, which is used by the [train/val/test]_step methods which are called by pytorch-lightning. """ @dataclass class HParams(HyperParameters): """HParams of the Model.""" # Class variable versions of the above dicts, for easier subclassing. # NOTE: These don't get parsed from the command-line. available_optimizers: ClassVar[Dict[str, Type[Optimizer]]] = available_optimizers.copy() available_encoders: ClassVar[Dict[str, Type[nn.Module]]] = available_encoders.copy() # Learning rate of the optimizer. learning_rate: float = log_uniform(1e-6, 1e-2, default=1e-3) # L2 regularization term for the model weights. weight_decay: float = log_uniform(1e-12, 1e-3, default=1e-6) # Which optimizer to use. optimizer: Type[Optimizer] = categorical(available_optimizers, default=optim.Adam) # Use an encoder architecture from the torchvision.models package. encoder: Type[nn.Module] = categorical( available_encoders, default=tv_models.resnet18, # TODO: Only using these two by default when performing a sweep. probabilities={"resnet18": 0.5, "simple_convnet": 0.5}, ) # Batch size to use during training and evaluation. batch_size: Optional[int] = None # Number of hidden units (before the output head). # When left to None (default), the hidden size from the pretrained # encoder model will be used. When set to an integer value, an # additional Linear layer will be placed between the outputs of the # encoder in order to map from the pretrained encoder's output size H_e # to this new hidden size `new_hidden_size`. new_hidden_size: Optional[int] = None # Retrain the encoder from scratch. train_from_scratch: bool = False # Wether we should keep the weights of the pretrained encoder frozen. freeze_pretrained_encoder_weights: bool = False # Settings for the output head. # TODO: This could be overwritten in a subclass to do classification or # regression or RL, etc. output_head: OutputHead.HParams = mutable_field(OutputHead.HParams) # Wether the output head should be detached from the representations. # In other words, if the gradients from the downstream task should be # allowed to affect the representations. detach_output_head: bool = False # Which algorithm to use for the output head when in an RL setting. # TODO: Run the PolicyHead in the following conditions: # - Compare the big backward pass vs many small ones # - Try to have it learn from pixel input, if possible # - Try to have it learn on a multi-task RL setting, # TODO: Finish the ActorCritic and EpisodicA2C heads. rl_output_head_algo: Type[OutputHead] = choice( { "reinforce": PolicyHead, "a2c_online": ActorCriticHead, "a2c_episodic": EpisodicA2C, }, default=EpisodicA2C, ) def __init__(self, setting: SettingType, hparams: HParams, config: Config): super().__init__() self.setting: SettingType = setting self.hp: Model.HParams = hparams self.Observations: Type[Observations] = setting.Observations self.Actions: Type[Actions] = setting.Actions self.Rewards: Type[Rewards] = setting.Rewards # Choose what type of output head to use depending on the kind of # Setting. self.OutputHead: Type[OutputHead] = self.output_head_type(setting) self.observation_space: gym.Space = setting.observation_space self.action_space: gym.Space = setting.action_space self.reward_space: gym.Space = setting.reward_space self.input_shape = self.observation_space.x.shape self.reward_shape = self.reward_space.shape self.config: Config = config # NOTE: do NOT set the `datamodule` property, otherwise the trainer will ignore # the passed train/val/test dataloader from the Setting. # self.datamodule: LightningDataModule = setting # (Testing) Setting this attribute is supposed to help with ddp/etc # training in pytorch-lightning. Not 100% sure. # self.example_input_array = torch.rand(self.batch_size, *self.input_shape) # Create the encoder and the output head. # Space of our encoder representations. self.representation_space: gym.Space observing_state = not isinstance(setting.observation_space.x, Image) if isinstance(setting, ContinualRLSetting) and observing_state: # ISSUE # 62: Need to add a dense network instead of no encoder, and # change the PolicyHead to have only one layer. # Only pass the image, not the task labels to the encoder (for now). input_dims = flatdim(self.observation_space["x"]) output_dims = self.hp.new_hidden_size or 128 self.encoder = FCNet( in_features=input_dims, out_features=output_dims, hidden_layers=3, hidden_neurons=[256, 128, output_dims], activation=nn.ReLU, ) self.representation_space = add_tensor_support( spaces.Box(low=-np.inf, high=np.inf, shape=[output_dims]) ) self.hidden_size = output_dims else: self.encoder, self.hidden_size = self.make_encoder() # TODO: Check that the outputs of the encoders are actually # flattened. I'm not sure they all are, which case the samples # wouldn't match with this space. self.representation_space = spaces.Box(-np.inf, np.inf, (self.hidden_size,), np.float32) logger.info(f"Moving encoder to device {self.config.device}") self.encoder = self.encoder.to(self.config.device) self.representation_space = add_tensor_support(self.representation_space) # Upgrade the type of hparams for the output head based on the setting, if # needed. if not isinstance(self.hp.output_head, self.OutputHead.HParams): self.hp.output_head = self.hp.output_head.upgrade(target_type=self.OutputHead.HParams) # Then, create the 'default' output head. self.output_head: OutputHead = self.create_output_head(task_id=0) def make_encoder(self) -> Tuple[nn.Module, int]: """Creates an Encoder model and returns the number of output dimensions. Returns: Tuple[nn.Module, int]: the encoder and the hidden size. TODO: Could instead return its output space, in case we didn't necessarily want to flatten the representations (e.g. for image segmentation tasks). """ # Get the chosen type of encoder encoder_type: Type[nn.Module] = self.hp.encoder # This does a few things: # 1. Instantiate the model (with pretrained weights if desired) # 2. Infer the output size of the model # 3. Remove the output fully-connected layer, if present. encoder, hidden_size = get_pretrained_encoder( encoder_model=encoder_type, pretrained=not self.hp.train_from_scratch, freeze_pretrained_weights=self.hp.freeze_pretrained_encoder_weights, new_hidden_size=self.hp.new_hidden_size, ) return encoder, hidden_size def forward(self, observations: IncrementalAssumption.Observations) -> ForwardPass: """Forward pass of the Model. Returns a ForwardPass object (acts like a dict of Tensors.) """ # If there's any additional 'input preprocessing' to do, do it here. # NOTE (@lebrice): This is currently done this way so that we don't have # to pass transforms to the settings from the method side. observations = self.preprocess_observations(observations) # Encode the observation to get representations. assert observations.x.device == self.device representations = self.encode(observations) # Pass the observations and representations to the output head to get # the 'action' (prediction). if self.hp.detach_output_head: representations = representations.detach() actions = self.output_head(observations=observations, representations=representations) # NOTE: Need to put a `rewards` field in this forward_pass, so we can pass it # to the training_step_end method, which will calculate and aggregate the loss forward_pass = ForwardPass( observations=observations, representations=representations, actions=actions, rewards=None, ) return forward_pass def encode(self, observations: Observations) -> Tensor: """Encodes a batch of samples `x` into a hidden vector. Args: observations (Union[Tensor, Observation]): Tensor of Observation containing a batch of samples (before preprocess_observations). Returns: Tensor: The hidden vector / embedding for that sample, with size [B, `self.hidden_size`]. """ # Here in this base model the encoder only takes the 'x' from the # observations. x = torch.as_tensor(observations.x, device=self.device, dtype=self.dtype) assert x.device == self.device encoder_parameters = list(self.encoder.parameters()) encoder_device = encoder_parameters[0].device if encoder_parameters else self.device # BUG: WHen using the EWCTask, there seems to be some issues related to which # device the model is stored on. if encoder_device != self.device: x = x.to(encoder_device) # self.encoder = self.encoder.to(self.device) h_x = self.encoder(x) if encoder_device != self.device: h_x = h_x.to(self.device) if isinstance(h_x, list) and len(h_x) == 1: # Some pretrained encoders sometimes give back a list with one tensor. (?) h_x = h_x[0] if not isinstance(h_x, Tensor): h_x = torch.as_tensor(h_x, device=self.device, dtype=self.dtype) return h_x def create_output_head(self, task_id: Optional[int]) -> OutputHead: """Create an output head for the current action and reward spaces. NOTE: This assumes that the input, action and reward spaces don't change between tasks. Parameters ---------- task_id : Optional[int] ID of the task associated with this new output head. Can be `None`, which is interpreted as saying that either that task labels aren't available, or that this output head will be used for all tasks. Returns ------- OutputHead The new output head for the given task. """ # NOTE: This assumes that the input, action and reward spaces don't change # between tasks. # TODO: Maybe add something like `setting.get_action_space(task_id)` input_space: Space = self.representation_space action_space: Space = self.action_space reward_space: Space = self.reward_space hparams: OutputHead.HParams = self.hp.output_head # NOTE: self.OutputHead is the type of output head used for the current setting. # NOTE: Could also use a name for the output head using the task id, for example output_head_name = None # Use the name defined on the output head. output_head = self.OutputHead( input_space=input_space, action_space=action_space, reward_space=reward_space, hparams=hparams, name=output_head_name, ).to(self.device) # Do not add the output head's parameters to the optimizer of the whole model, # if it already has an `optimizer` attribute of its own. (NOTE: this isn't the # case in practice so far) add_to_optimizer = not getattr(output_head, "optimizer", None) if add_to_optimizer: # Add the new parameters to the Optimizer, if it already exists. # If we don't yet have a Trainer, the Optimizer hasn't been created # yet. Once it is created though, it will get the parameters of this output # head from `self.parameters()` is passed to its constructor, since the # output head will be stored in `self.output_heads`. if self.trainer: optimizer: Optimizer = self.optimizers() assert isinstance(optimizer, Optimizer) optimizer.add_param_group({"params": output_head.parameters()}) return output_head def output_head_type(self, setting: SettingType) -> Type[OutputHead]: """Return the type of output head we should use in a given setting.""" if isinstance(setting, RLSetting): if not isinstance(setting.action_space, spaces.Discrete): raise NotImplementedError("Only support discrete actions for now.") assert issubclass(self.hp.rl_output_head_algo, OutputHead) return self.hp.rl_output_head_algo assert isinstance(setting, SLSetting) if isinstance(setting.action_space, spaces.Discrete): # Discrete actions: i.e. classification problem. if isinstance(setting.reward_space, spaces.Discrete): # Classification problem: Discrete action, Discrete rewards (labels). return ClassificationHead # Reinforcement learning problem: Discrete action, float rewards. # TODO: There might be some RL environments with discrete # rewards, right? For instance CartPole is, on-paper, a discrete # reward setting, since its always 1. if isinstance(setting.action_space, spaces.Box): # Regression problem: For now there is only RL that has such a # space. return RegressionHead raise NotImplementedError(f"Unsupported action space: {setting.action_space}") def training_step( self, batch: Tuple[Observations, Optional[Rewards]], batch_idx: int, environment: Environment = None, dataloader_idx: int = None, optimizer_idx: int = None, ) -> ForwardPass: return self.shared_step( batch, batch_idx=batch_idx, environment=environment or self.setting.train_env, phase="train", dataloader_idx=dataloader_idx, optimizer_idx=optimizer_idx, ) def validation_step( self, batch: Tuple[Observations, Optional[Rewards]], batch_idx: int, environment: Environment = None, dataloader_idx: int = None, ) -> ForwardPass: return self.shared_step( batch, batch_idx=batch_idx, environment=environment or self.setting.val_env, phase="val", dataloader_idx=dataloader_idx, ) def test_step( self, batch: Tuple[Observations, Optional[Rewards]], batch_idx: int, environment: Environment = None, dataloader_idx: int = None, ) -> ForwardPass: return self.shared_step( batch, batch_idx=batch_idx, environment=environment or self.setting.test_env, phase="test", dataloader_idx=dataloader_idx, ) def shared_step( self, batch: Tuple[Observations, Optional[Rewards]], batch_idx: int, environment: Environment, phase: str, dataloader_idx: int = None, optimizer_idx: int = None, ) -> ForwardPass: """Main logic of the "forward pass". This is used as part of `training_step`, `validation_step` and `test_step`. See the PL docs for `training_step` for more info. NOTE: The prediction / environment interaction / loss calculation has been moved into the `shared_step_end` method for DP to also work. """ # Split the batch into observations and (maybe) rewards. observations: Observations rewards: Optional[Rewards] if isinstance(batch, tuple) and len(batch) == 2: observations, rewards = batch else: assert isinstance(batch, self.Observations), batch observations, rewards = batch, None # Get the forward pass results, containing: # - "observation": the augmented/transformed/processed observation. # - "representations": the representations for the observations. # - "actions": The actions (predictions) forward_pass: ForwardPass = self(observations) if rewards is not None: forward_pass = dataclasses.replace(forward_pass, rewards=rewards) return forward_pass def training_step_end(self, step_outputs: Union[Loss, List[Loss]]) -> Loss: loss_object: Loss = self.shared_step_end( step_outputs=step_outputs, phase="train", environment=self.setting.train_env ) loss = loss_object.loss if not isinstance(loss, Tensor) or not loss.requires_grad: # NOTE: There might be no loss at some steps, because for instance # we haven't reached the end of an episode in an RL setting. return None # NOTE In RL, we can only update the model's weights on steps where the output # head has as loss, because the output head has buffers of tensors whose grads # would become invalidated if we performed the optimizer step. if loss.requires_grad and not self.automatic_optimization: output_head_loss = loss_object.losses.get(self.output_head.name) update_model = output_head_loss is not None and output_head_loss.requires_grad optimizer = self.optimizers() self.manual_backward(loss, optimizer, retain_graph=not update_model) if update_model: optimizer.step() optimizer.zero_grad() # BUG: Need to return this dict, otherwise the optimizer closure in the DP # accelerator fails (it only expects to get `dict` or `Tensor` values for # `training_step_output` in `_process_training_step_output`) # return loss # NOTE: the 'hidden' key isn't currently used, but it could be in the future if # we added support for BBPT, i.e. recurrent policies or output heads, etc. return {"loss": loss, "hidden": loss_object.tensors.get("hidden")} def validation_step_end(self, step_outputs: Union[ForwardPass, List[ForwardPass]]) -> Loss: return self.shared_step_end( step_outputs=step_outputs, phase="val", environment=self.setting.val_env ) def test_step_end(self, step_outputs: Union[ForwardPass, List[ForwardPass]]) -> Loss: return self.shared_step_end( step_outputs=step_outputs, phase="test", environment=self.setting.test_env ) def shared_step_end( self, step_outputs: Union[ForwardPass, List[ForwardPass]], phase: str, environment: Environment, ) -> Loss: """Called with the outputs of each replica's `[train/validation/test]_step`: - Sends the Actions from each worker to the environment to obtain rewards, if necessary; - Calculates the loss, given the merged forward pass and the rewards/labels; - Aggregates the losses/metrics from each replica, logs the relevant values, and returns the aggregated losses and metrics (a single Loss object). """ forward_pass: ForwardPass if isinstance(step_outputs, list): forward_pass = ForwardPass.concatenate(step_outputs) else: forward_pass = step_outputs # get the actions from the forward pass: actions = forward_pass.actions rewards: Optional[Rewards] = forward_pass.rewards if rewards is None: # Get the reward from the environment (the dataloader). if self.config.debug and self.config.render: environment.render("human") # import matplotlib.pyplot as plt # plt.waitforbuttonpress(10) assert isinstance(actions, Actions), actions rewards = environment.send(actions) assert rewards is not None # BUG: Rewards is array of [None]s in TraditionalSL and MultiTask SL! assert isinstance(rewards, Rewards), rewards # Now that we have the rewards, we calculate the loss. loss: Loss = self.get_loss(forward_pass, rewards, loss_name=phase) loss_tensor: Tensor = loss.loss if loss_tensor == 0.0: return loss loss_pbar_dict = loss.to_pbar_message() for key, value in loss_pbar_dict.items(): assert not isinstance(value, dict), "shouldn't be nested at this point!" self.log(key, value, prog_bar=self.config.debug, logger=False) logger.debug(f"{key}: {value}") loss_log_dict = loss.to_log_dict(verbose=self.config.verbose) for key, value in loss_log_dict.items(): assert not isinstance(value, dict), "shouldn't be nested at this point!" self.log(key, value, prog_bar=False, logger=True) return loss def split_batch(self, batch: Any) -> Tuple[Observations, Optional[Rewards]]: """Splits the batch into the observations and the rewards. Uses the types defined on the setting that this model is being applied on (which were copied to `self.Observations` and `self.Actions`) to figure out how many fields each type requires. TODO: This is slightly confusing, should probably get rid of this. """ observations: Observations rewards: Optional[Rewards] if isinstance(batch, self.Observations): observations, rewards = batch, None else: assert isinstance(batch, (tuple, list)) and len(batch) == 2 observations, rewards = batch assert isinstance(observations, self.Observations), ( observations, type(observations), self.Observations, ) # Move the observations to the right device, and convert numpy arrays to # tensors. observations = observations.torch(device=self.device) if rewards is not None: rewards = rewards.torch(device=self.device) return observations, rewards def get_loss( self, forward_pass: ForwardPass, rewards: Rewards = None, loss_name: str = "" ) -> Loss: """Gets a Loss given the results of the forward pass and the reward. Args: forward_pass (Dict[str, Tensor]): Results of the forward pass. reward (Tensor, optional): The reward that resulted from the action chosen in the forward pass. Defaults to None. loss_name (str, optional): The name for the resulting Loss. Defaults to "". Returns: Loss: a Loss object containing the loss tensor, associated metrics and sublosses. This could look a bit like this, for example: ``` action = forward_pass["action"] predicted_reward = forward_pass["predicted_reward"] nce = self.loss_fn(predicted_reward, reward) loss = Loss(loss_name, loss=nce) return loss ``` """ assert loss_name # Create an 'empty' Loss object with the given name, so that we always # return a Loss object, even when `y` is None and we can't the loss from # the output_head. total_loss = Loss(name=loss_name) if rewards: assert rewards.y is not None # TODO: If we decide to re-organize the forward pass object to also # contain the predictions of the self-supervised tasks, (atm they # perform their 'forward pass' in their get_loss functions) # then we could change 'actions' to be a dict, and index the # dict with the 'name' of each output head, like so: # actions_of_head = forward_pass.actions[self.output_head.name] # rewards_of_head = forward_pass.rewards[self.output_head.name] # For now though, we only have one "prediction" in the actions: actions = forward_pass.actions # So far we only use 'y' from the rewards in the output head. supervised_loss = self.output_head_loss(forward_pass, actions=actions, rewards=rewards) total_loss += supervised_loss return total_loss def output_head_loss( self, forward_pass: ForwardPass, actions: Actions, rewards: Rewards ) -> Loss: """Gets the Loss of the output head.""" # TODO: The rewards can still contain just numpy arrays, keeping it so for now. assert actions.device == self.device # == rewards.device (would be None) return self.output_head.get_loss( forward_pass, actions=actions, rewards=rewards, ) def preprocess_observations(self, observations: Observations) -> Observations: assert isinstance(observations, self.Observations) # TODO: Make sure this also works in the supervised setting. # Convert all numpy arrays to tensors if possible. # TODO: Make sure this still works in settings without task labels ( # None in numpy arrays) observations = observations.torch(device=self.device) return observations def preprocess_rewards(self, reward: Rewards) -> Rewards: return reward def configure_optimizers(self): optimizer_class: Type[Optimzier] = self.hp.optimizer options = { "lr": self.hp.learning_rate, "weight_decay": self.hp.weight_decay, } return optimizer_class( self.parameters(), lr=self.hp.learning_rate, weight_decay=self.hp.weight_decay, ) @property def batch_size(self) -> int: return self.hp.batch_size @batch_size.setter def batch_size(self, value: int) -> None: self.hp.batch_size = value @property def learning_rate(self) -> float: return self.hp.learning_rate @learning_rate.setter def learning_rate(self, value: float) -> None: self.hp.learning_rate = value def on_task_switch(self, task_id: Optional[int]) -> None: """Called when switching between tasks. Args: task_id (Optional[int]): the Id of the task. """ def shared_modules(self) -> Dict[str, nn.Module]: """Returns any trainable modules in `self` that are shared across tasks. By giving this information, these weights can then be used in regularization-based auxiliary tasks like EWC, for example. Returns ------- Dict[str, nn.Module]: Dictionary mapping from name to the shared modules, if any. """ shared_modules: Dict[str, nn.Module] = nn.ModuleDict() if self.encoder: shared_modules["encoder"] = self.encoder if self.output_head: shared_modules["output_head"] = self.output_head return shared_modules # def summarize(self, mode: str = ModelSummary.MODE_DEFAULT) -> ModelSummary: # model_summary = ModelSummary(self, mode=mode) # log.debug("\n" + str(model_summary)) # return model_summary def _are_batched(self, observations: IncrementalAssumption.Observations) -> bool: """Returns wether these observations are batched.""" assert isinstance(self.observation_space, spaces.Dict) # if observations.task_labels is not None: # if isinstance(observations.task_labels, int): # return True # assert isinstance(observations.task_labels, (np.ndarray, Tensor)) # assert False, observations.shapes # return observations.task_labels.shape and observations.task_labels.shape[0] x_space: spaces.Box = self.observation_space["x"] if isinstance(x_space, Image) or len(x_space.shape) == 4: return observations.x.ndim == 4 if not isinstance(x_space, spaces.Box): raise NotImplementedError( f"Don't know how to tell if obs space {x_space} is batched, only " f"support Box spaces for the observation's 'x' for now." ) # self.observation_space *should* usually reflect the shapes of individual # (non-batched) observations. return observations.x.ndim == len(x_space.shape) + 1 # Registering this handler for decoding the type of output head to use (a field in the # hparams) from a dictionary. register_decoding_fn(Type[OutputHead], lambda v: v) ================================================ FILE: sequoia/methods/models/base_model/multihead_model.py ================================================ from dataclasses import dataclass, replace from typing import Dict, List, Optional, Sequence, Tuple, TypeVar, Union import numpy as np import torch import torch.nn.functional as F from torch import Tensor, nn from sequoia.common import Batch, Config, Loss from sequoia.settings import Actions, Environment, Observations, Rewards from sequoia.settings.assumptions.incremental import IncrementalAssumption from sequoia.utils.generic_functions import concatenate, get_slice, stack from sequoia.utils.logging_utils import get_logger from ..forward_pass import ForwardPass from ..output_heads import OutputHead from .model import Model, SettingType logger = get_logger(__name__) class MultiHeadModel(Model[SettingType]): """Mixin that adds multi-head prediction to the Model when task labels are available. """ @dataclass class HParams(Model.HParams): """Hyperparameters specific to a multi-head model.""" # Wether to create one output head per task. multihead: Optional[bool] = None def __init__(self, setting: SettingType, hparams: HParams, config: Config): super().__init__(setting=setting, hparams=hparams, config=config) # Dictionary of output heads! self.output_heads: Dict[str, OutputHead] = nn.ModuleDict() self.hp: MultiHeadModel.HParams self.setting: SettingType # TODO: Add an optional task inference mechanism # See https://github.com/lebrice/Sequoia/issues/49 self.task_inference_module: Optional[nn.Module] = None self.previous_task: Optional[int] = None self.current_task: Optional[int] = None self.previous_task_labels: Optional[Sequence[int]] = None if setting.task_labels_at_train_time: # NOTE: Not sure if this could cause an issue when setting is a SettingProxy starting_task_id = 0 # setting.current_task_id else: starting_task_id = None self.output_heads[str(starting_task_id)] = self.output_head def output_head_loss( self, forward_pass: ForwardPass, actions: Actions, rewards: Rewards ) -> Loss: """TODO: Need to then re-split stuff (undo the work we did in forward) to get a loss per output head? """ # Asks each output head for its contribution to the loss. observations: IncrementalAssumption.Observations = forward_pass.observations task_labels = observations.task_labels if isinstance(task_labels, Tensor): task_labels = task_labels.cpu().numpy() batch_size = forward_pass.batch_size assert batch_size is not None if task_labels is None: if self.task_inference_module: # TODO: Predict the task ids using some kind of task # inference mechanism. task_labels = self.task_inference_module(forward_pass) else: raise NotImplementedError( "Multihead model doesn't have access to task labels and " "doesn't have a task inference module!" ) # TODO: Maybe use the last trained output head, by default? # TODO: Check if this is still necessary if self.previous_task_labels is None: self.previous_task_labels = task_labels # Default behaviour: use the (only) output head. if not self.hp.multihead: return self.output_head.get_loss( forward_pass, actions=actions, rewards=rewards, ) # The sum of all the losses from all the output heads. total_loss = Loss(self.output_head.name) task_switched_in_env = task_labels != self.previous_task_labels # This `done` attribute isn't added in supervised settings. episode_ended = getattr(observations, "done", np.zeros(batch_size, dtype=bool)) # TODO: Remove all this useless conversion from Tensors to ndarrays if isinstance(episode_ended, Tensor): episode_ended = episode_ended.cpu().numpy() # logger.debug(f"Task labels: {task_labels}, task switched in env: {task_switched_in_env}, episode ended: {episode_ended}") done_set_to_false_temporarily_indices = [] if any(episode_ended & task_switched_in_env): # In the environments where there was a task switch to a different task and # where some episodes ended, we need to first get the corresponding output # head losses from these environments first. if self.batch_size in {None, 1}: # If the batch size is 1, this is a little bit simpler to deal with. previous_task: int = self.previous_task_labels[0].item() from sequoia.methods.models.output_heads.rl import PolicyHead previous_output_head = self.output_heads[str(previous_task)] assert isinstance( previous_output_head, PolicyHead ), "todo: assuming that this only happends in RL currently." # We want the loss from that output head, but we don't want to # re-compute it below! env_index_in_previous_batch = 0 # breakpoint() logger.debug( f"Getting a loss from the output head for task {previous_task}, that was used for the last task." ) env_episode_loss = previous_output_head.get_episode_loss( env_index_in_previous_batch, done=True ) # logger.debug(f"Loss from that output head: {env_episode_loss}") # Add this end-of-episode loss to the total loss. # breakpoint() # BUG: This can sometimes (rarely) be None! Need to better understand # why this is happening. if env_episode_loss is None: logger.warning( RuntimeWarning( f"BUG: Env {env_index_in_previous_batch} gave back a loss " f"of `None`, when we expected a loss from that output head " f"for task id {previous_task}." ) ) else: total_loss += env_episode_loss # We call on_episode_end so the output head can clear the relevant # buffers. Note that get_episode_loss(env_index, done=True) doesn't # clear the buffers, it just calculates a loss. previous_output_head.on_episode_end(env_index_in_previous_batch) # Set `done` to `False` for that env, to prevent the output head for the # new task from seeing the first observation in the episode as the last. observations.done[env_index_in_previous_batch] = False # FIXME: If we modify that entry in-place, then even after this method # returns, the change will persist.. Therefore we just save the indices # that we altered, and reset them before returning. done_set_to_false_temporarily_indices.append(env_index_in_previous_batch) else: raise NotImplementedError( "TODO: The BaseModel doesn't yet support having multiple " "different tasks within the same batch in RL. " ) # IDEA: Need to somehow pass the indices of which env to take care of to # each output head, so they can create / clear buffers only when needed. assert task_labels is not None all_task_indices: Dict[int, Tensor] = get_task_indices(task_labels) # Get the loss from each output head: if len(all_task_indices) == 1: # If everything is in the same task (only one key), no need to split/merge # stuff, so it's a bit easier: task_id: int = task_labels[0].item() self.setup_for_task(task_id) # task_output_head = self.output_heads[str(task_id)] total_loss += super().output_head_loss(forward_pass, actions=actions, rewards=rewards) # total_loss += self.output_head.get_loss( # forward_pass, actions=actions, rewards=rewards, # ) else: # Split off the input batch, do a forward pass for each sub-task. # (could be done in parallel but whatever.) # TODO: Also, not sure if this will play well with DP, DDP, etc. for task_id, task_indices in all_task_indices.items(): # Make a partial observation without the task labels, so that # super().forward will use the current output head. logger.debug( f"Getting output head loss for " f"{len(task_indices)/batch_size:.0%} of the batch which " f"has task_id of '{task_id}'." ) self.setup_for_task(task_id) task_loss = super().output_head_loss( forward_pass=get_slice(forward_pass, task_indices), actions=get_slice(actions, task_indices), rewards=get_slice(rewards, task_indices), ) # NOTE: useful for debugging, but shouldn't be enabled normally. # task_loss.name += f"(task {task_id})" logger.debug(f"Task {task_id} loss: {task_loss}") total_loss += task_loss self.previous_task_labels = task_labels # FIXME: Reset the 'done' to True, if we manually set it to False. for index in done_set_to_false_temporarily_indices: observations.done[index] = True return total_loss def on_before_zero_grad(self, optimizer): super().on_before_zero_grad(optimizer) from sequoia.methods.models.output_heads.rl import PolicyHead for task_id_string, output_head in self.output_heads.items(): if isinstance(output_head, PolicyHead): output_head.detach_all_buffers() def shared_step( self, batch: Tuple[Observations, Optional[Rewards]], batch_idx: int, environment: Environment, phase: str, dataloader_idx: int = None, optimizer_idx: int = None, ) -> Dict: assert phase if dataloader_idx is not None: logger.debug( "TODO: We were indirectly given a task id with the " "dataloader_idx. Ignoring for now, as we're trying to avoid " "this (the task labels should be given for each example " "anyway). " ) dataloader_idx = None return super().shared_step( batch=batch, batch_idx=batch_idx, environment=environment, phase=phase, dataloader_idx=dataloader_idx, optimizer_idx=optimizer_idx, ) def on_task_switch(self, task_id: Optional[int]): """Called when switching between tasks. Args: task_id (int, optional): the id of the new task. When None, we are basically being informed that there is a task boundary, but without knowing what task we're switching to. NOTE: You can check wether this task switch is occuring at train or test time using `self.training`. """ logger.info(f"Switching from task {self.current_task} -> {task_id}.") # TODO: Move these to the base model perhaps? (In case there is ever a # re-ordering of the mixins that make up the BaseModel) super().on_task_switch(task_id) self.previous_task = self.current_task self.current_task = task_id if task_id is not None and self.hp.multihead: # Switch the output head to use. self.output_head = self.get_or_create_output_head(task_id) def shared_modules(self) -> Dict[str, nn.Module]: """Returns any trainable modules in `self` that are shared across tasks. By giving this information, these weights can then be used in regularization-based auxiliary tasks like EWC, for example. This dict contains the encoder and output head, by default, as well as any shared modules in the auxiliary tasks. When using only multiple output heads (i.e. when `self.hp.multihead` is `True`), then we remove the output head from the dict before returning it. Returns ------- Dict[str, nn.Module]: Dictionary mapping from name to the shared modules, if any. """ shared_modules = super().shared_modules() if self.hp.multihead: shared_modules.pop("output_head") return shared_modules def load_state_dict( self, state_dict: Union[Dict[str, Tensor], Dict[str, Tensor]], strict: bool = True, ): if self.hp.multihead: # TODO: Figure out exactly where/when/how pytorch-lightning is # trying to load the model from, because there are some keys # missing (['output_heads.1.output.weight', 'output_heads.1.output.bias']) # For now, we're just gonna pretend it's not a problem, I guess? strict = False missing_keys, unexpected_keys = super().load_state_dict(state_dict=state_dict, strict=False) # TODO: Double-check that this makes sense and works properly. if self.hp.multihead and unexpected_keys: for i in range(self.setting.nb_tasks): # Try to load the output head weights logger.info(f"Creating a new output head for task {i}") new_output_head = self.create_output_head(self.setting, task_id=i) # FIXME: TODO: This is wrong. We should create all the # output heads if they aren't already created, and then try to # load the state_dict again. new_output_head.load_state_dict( {k: state_dict[k] for k in unexpected_keys}, strict=False, ) key = str(i) self.output_heads[key] = new_output_head.to(self.device) if missing_keys or unexpected_keys: logger.debug(f"Missing keys: {missing_keys}, unexpected keys: {unexpected_keys}") return missing_keys, unexpected_keys def get_or_create_output_head(self, task_id: int) -> nn.Module: """Retrieves or creates a new output head for the given task index. Also stores it in the `output_heads`, and adds its parameters to the optimizer. """ task_output_head: nn.Module assert self.hp.multihead, "This should get called when model isnt multi-headed!" if str(task_id) in self.output_heads.keys(): task_output_head = self.output_heads[str(task_id)] else: logger.info(f"Creating a new output head for task {task_id}.") # NOTE: This also takes care to add the output head's parameters to the # optimizer. task_output_head = self.create_output_head(task_id=task_id) self.output_heads[str(task_id)] = task_output_head return task_output_head def forward(self, observations: IncrementalAssumption.Observations) -> ForwardPass: """Smart forward pass with multi-head predictions and task inference. This forward pass can handle three different scenarios, depending on the contents of `observations.task_labels`: 1. Base case: task labels are present, and all examples are from the same task. - Perform the 'usual' forward pass (e.g. `super().forward(observations)`). 2. Task labels are present, and the batch contains a mix of samples from different tasks: - Create slices of the batch for each task, where all items in each 'sub-batch' come from the same task. - Perform a forward pass for each task, by calling `forward` recursively with the sub-batch for each task as an argument (Case 1). 3. Task labels are *not* present. Perform some type of task inference, using the `task_inference_forward_pass` method. Check its docstring for more info. Parameters ---------- observations : Observations Observations from an environment. As of right now, all Settings produce observations with (at least) the two following attributes: - x: Tensor (the images/inputs) - task_labels: Optional[Tensor] (The task labels, when available, else None) Returns ------- Tensor The outputs, which in this case are the classification logits. All three cases above produce the same kind of outputs. """ # TODO: Shouldn't have to do this here, since we have the @auto_move_data dec... # observations = observations.to(self.device) task_ids: Optional[Tensor] = observations.task_labels if isinstance(task_ids, np.ndarray) and task_ids.dtype == np.object: task_ids = task_ids.tolist() if len(task_ids) == 1: task_ids = task_ids[0] if task_ids is None: # Run the forward pass with task inference turned on. return self.task_inference_forward_pass(observations) task_ids = torch.as_tensor(task_ids, device=self.device, dtype=int) task_ids_present_in_batch = torch.unique(task_ids) if len(task_ids_present_in_batch) > 1: # Case 2: The batch contains data from more than one task. return self.split_forward_pass(observations) # Base case: "Normal" forward pass, where all items come from the same task. # - Setup the model for this task, however you want, and then do a forward pass, # as you normally would. # NOTE: If you want to reuse this cool multi-headed forward pass in your # own model, these lines here are what you'd want to change. task_id: int = task_ids_present_in_batch.item() if task_id != self.current_task and self.hp.multihead: # Setup the model for this task. For now we just switch the output head. self.output_head = self.get_or_create_output_head(task_id) return super().forward(observations) def setup_for_task(self, task_id: int) -> None: if task_id is not None and self.hp.multihead: # Setup the model for this task. For now we just switch the output head. self.output_head = self.get_or_create_output_head(task_id) def split_forward_pass(self, observations: Observations) -> ForwardPass: """Perform a forward pass for a batch of observations from different tasks. This is called in `forward` when there is more than one unique task label in the batch. This will call `forward` for each task id present in the batch, passing it a slice of the batch, in which all items are from that task. NOTE: This cannot cause recursion problems, because `forward`(d=2) will be called with a bach of items, all of which come from the same task. This makes it so `split_forward_pass` cannot then be called again. Parameters ---------- observations : Observations Observations, in which the task labels might not all be the same. Returns ------- Tensor The outputs/logits from each task, re-assembled into a single batch, with the task ordering from `observations` preserved. """ assert observations.task_labels is not None assert self.hp.multihead, "Can only use split forward pass with multiple heads." # We have task labels. task_labels = observations.task_labels if isinstance(task_labels, Tensor): task_labels = task_labels.cpu().numpy() # Get the indices of the items from each task. all_task_indices_dict: Dict[int, np.ndarray] = get_task_indices(task_labels) if len(all_task_indices_dict) == 1: # No need to split the input, since everything is from the same task. task_id: int = task_labels[0].item() self.setup_for_task(task_id) return self.forward(observations) # Placeholder for the predicitons for each item in the batch. # NOTE: We put each item in the batch in this list and then stack the results. batch_size = len(task_labels) task_outputs: List[Batch] = [None for _ in range(batch_size)] for task_id, task_indices in all_task_indices_dict.items(): # Take a slice of the observations, in which all items come from this task. task_observations = get_slice(observations, task_indices) # Perform a "normal" forward pass (Base case). task_output = self.forward(task_observations) # Store the outputs for the items from this task in the list. for i, index in enumerate(task_indices): task_outputs[index] = get_slice(task_output, i) # Stack the results. assert all(item is not None for item in task_outputs) merged_outputs = concatenate(task_outputs) return merged_outputs def task_inference_forward_pass(self, observations: Observations) -> Tensor: """Forward pass with a simple form of task inference.""" # We don't have access to task labels (`task_labels` is None). # --> Perform a simple kind of task inference: # 1. Perform a forward pass with each task's output head; # 2. Merge these predictions into a single prediction somehow. assert observations.task_labels is None or all(observations.task_labels == None) # NOTE: This assumes that the observations are batched. # These are used below to indicate the shape of the different tensors. B = observations.x.shape[0] T = n_known_tasks = len(self.output_heads) N = self.action_space.n # Tasks encountered previously and for which we have an output head. known_task_ids: list[int] = list(range(n_known_tasks)) assert known_task_ids # Placeholder for the predictions from each output head for each item in the # batch task_outputs = [None for _ in known_task_ids] # [T, B, N] # Get the forward pass for each task. for task_id in known_task_ids: # Create 'fake' Observations for this forward pass, with 'fake' task labels. # NOTE: We do this so we can call `self.forward` and not get an infinite # recursion. task_labels = torch.full([B], task_id, device=self.device, dtype=int) task_observations = replace(observations, task_labels=task_labels) # Setup the model for task `task_id`, and then do a forward pass. task_forward_pass = self.forward(task_observations) task_outputs[task_id] = task_forward_pass # 'Merge' the predictions from each output head using some kind of task # inference. assert all(item is not None for item in task_outputs) # Stack the predictions (logits) from each output head. stacked_forward_pass: ForwardPass = stack(task_outputs, dim=1) logits_from_each_head = stacked_forward_pass.actions.logits assert logits_from_each_head.shape == (B, T, N), (logits_from_each_head.shape, (B, T, N)) # Normalize the logits from each output head with softmax. # Example with batch size of 1, output heads = 2, and classes = 4: # logits from each head: [[[123, 456, 123, 123], [1, 1, 2, 1]]] # 'probs' from each head: [[[0.1, 0.6, 0.1, 0.1], [0.2, 0.2, 0.4, 0.2]]] probs_from_each_head = torch.softmax(logits_from_each_head, dim=-1) assert probs_from_each_head.shape == (B, T, N) # Simple kind of task inference: # For each item in the batch, use the class that has the highest probability # accross all output heads. max_probs_across_heads, chosen_head_per_class = probs_from_each_head.max(dim=1) assert max_probs_across_heads.shape == (B, N) assert chosen_head_per_class.shape == (B, N) # Example (continued): # max probs across heads: [[0.2, 0.6, 0.4, 0.2]] # chosen output heads per class: [[1, 0, 1, 1]] # Determine which output head has highest "confidence": max_prob_value, most_probable_class = max_probs_across_heads.max(dim=1) assert max_prob_value.shape == (B,) assert most_probable_class.shape == (B,) # Example (continued): # max_prob_value: [0.6] # max_prob_class: [1] # A bit of boolean trickery to get what we need, which is, for each item, the # index of the output head that gave the most confident prediction. mask = F.one_hot(most_probable_class, N).to(dtype=bool, device=self.device) chosen_output_head_per_item = chosen_head_per_class[mask] assert mask.shape == (B, N) assert chosen_output_head_per_item.shape == (B,) # Example (continued): # mask: [[False, True, False, True]] # chosen_output_head_per_item: [0] # Create a bool tensor to select items associated with the chosen output head. selected_mask = F.one_hot(chosen_output_head_per_item, T).to(dtype=bool, device=self.device) assert selected_mask.shape == (B, T) # Select the logits using the mask: selected_forward_pass = stacked_forward_pass[selected_mask] assert selected_forward_pass.actions.logits.shape == (B, N) return selected_forward_pass from typing import Dict, Tuple, TypeVar Dataclass = TypeVar("Dataclass", bound=Batch) def get_task_indices( task_labels: Union[List[Optional[int]], np.ndarray, Tensor] ) -> Dict[Optional[int], Union[np.ndarray, Tensor]]: """Given an array-like of task labels, gives back a dictionary mapping from task id to an array-like of indices for the corresponding indices in the batch. Parameters ---------- task_labels : Union[np.ndarray, Tensor] [description] Returns ------- Dict[Optional[int], Union[np.ndarray, Tensor]] Dictionary mapping from task index (int or None) to an ndarray or Tensor (depending on the type of `task_labels`) of indices corresponding to the indices in `task_labels` that correspond to that task. """ all_task_indices: Dict[Optional[int], Union[np.ndarray, Tensor]] = {} if task_labels is None: return {} output_type = np.asarray assert isinstance(task_labels, (np.ndarray, Tensor)) if isinstance(task_labels, Tensor): assert task_labels.ndim == 1 or task_labels.size() == 1, task_labels task_labels = task_labels.reshape(-1) else: assert task_labels.ndim == 1 or task_labels.size == 1, task_labels task_labels = task_labels.reshape(-1) unique_task_labels = list(set(task_labels.tolist())) batch_size = len(task_labels) # Get the indices for each task. for task_id in unique_task_labels: if isinstance(task_labels, np.ndarray): task_indices = np.arange(batch_size)[task_labels == task_id] else: assert isinstance(task_labels, Tensor), task_labels task_indices = torch.arange(batch_size, device=task_labels.device)[ task_labels == task_id ] all_task_indices[task_id] = task_indices return all_task_indices # TODO: Remove this, currently unused. def cleanup_task_labels( task_labels: Optional[Sequence[Optional[int]]], ) -> Optional[np.ndarray]: """'cleans up' the task labels, by returning either None or an integer numpy array. TODO: Not clear why we really have to do this in the first place. The point is, if we wanted to allow only a fraction of task labels for instance, then we have to deal with np.ndarrays with `object` dtypes. Parameters ---------- task_labels : Optional[Sequence[Optional[int]]] Some sort of array of task ids, or None. Returns ------- Optional[np.ndarray] None if there are no task ids, or an integer numpy array if there are. Raises ------ NotImplementedError If only a portion of the task labels are available. """ if isinstance(task_labels, np.ndarray): if task_labels.dtype == object: if all(task_labels == None): task_labels = None elif not any(task_labels == None): task_labels = torch.as_tensor(task_labels.astype(int)) else: raise NotImplementedError(f"TODO: Only given a portion of task labels?") # IDEA: Maybe set task_id to -1 in those cases, and return an int # ndarray as well? if task_labels is None: return None assert isinstance(task_labels, (np.ndarray, Tensor)), task_labels if not task_labels.shape: task_labels = task_labels.reshape([1]) if isinstance(task_labels, Tensor): task_labels = task_labels.cpu().numpy() if task_labels is not None: task_labels = task_labels.astype(int) assert task_labels is None or isinstance(task_labels, np.ndarray) return task_labels ================================================ FILE: sequoia/methods/models/base_model/multihead_model_test.py ================================================ """Tests for the class-incremental version of the Model class. """ # from sequoia.conftest import config from collections import defaultdict from typing import Dict, List, Optional, Tuple, Type import numpy as np import pytest import torch from continuum import ClassIncremental from continuum.datasets import MNIST from continuum.tasks import TaskSet from gym import spaces from torch import Tensor, nn from sequoia.common import Loss from sequoia.common.config import Config from sequoia.methods.base_method import BaseMethod from sequoia.methods.models.forward_pass import ForwardPass from sequoia.methods.models.output_heads.rl.episodic_a2c import EpisodicA2C from sequoia.settings import ClassIncrementalSetting, RLSetting, TraditionalRLSetting from sequoia.settings.rl import IncrementalRLSetting from .base_model import BaseModel from .multihead_model import MultiHeadModel, OutputHead, get_task_indices @pytest.fixture() def mixed_samples(config: Config): """Fixture that produces some samples from each task.""" dataset = MNIST(config.data_dir, download=True, train=True) datasets: List[TaskSet] = ClassIncremental(dataset, nb_tasks=5) n_samples_per_task = 10 indices = list(range(10)) samples_per_task: Dict[int, Tensor] = { i: tuple(map(torch.as_tensor, taskset.get_samples(indices))) for i, taskset in enumerate(datasets) } return samples_per_task class MockOutputHead(OutputHead): def __init__(self, *args, Actions: Type, task_id: int = -1, **kwargs): super().__init__(*args, **kwargs) self.task_id = task_id self.Actions = Actions self.name = f"task_{task_id}" def forward(self, observations, representations) -> Tensor: # type: ignore """This mock forward just creates an action that is related to the observation and the task id for this output head. """ x: Tensor = observations.x assert (observations.task_labels == self.task_id).all() h_x = representations # actions = torch.stack([h_i.mean() * self.task_id for h_i in h_z]) # actions = torch.stack([x_i.mean() * self.task_id for x_i in x]) actions = [x_i.mean() * self.task_id for x_i in x] actions = torch.stack(actions) fake_logits = torch.rand([actions.shape[0], self.action_space.n]) from sequoia.methods.models.output_heads.classification_head import ClassificationOutput # assert issubclass(ClassificationOutput, self.Actions) # TODO: Ideally self.Actions would already be a subclass of ClassificationActions! # return self.Actions(y_pred=actions, logits=fake_logits) return ClassificationOutput(y_pred=actions, logits=fake_logits) def get_loss(self, forward_pass, actions, rewards): return Loss(self.name, 0.0) # def mock_output_task(self: MultiHeadModel, x: Tensor, h_x: Tensor) -> Tensor: # return self.output_head(x) # def mock_encoder(self: MultiHeadModel, x: Tensor) -> Tensor: # return x.new_ones(self.hp.hidden_size) @pytest.mark.parametrize( "indices", [ slice(0, 10), # all the same task (0) slice(0, 20), # 10 from task 0, 10 from task 1 slice(0, 30), # 10 from task 0, 10 from task 1, 10 from task 2 slice(0, 50), # 10 from each task. ], ) def test_multiple_tasks_within_same_batch( mixed_samples: Dict[int, Tuple[Tensor, Tensor, Tensor]], indices: slice, monkeypatch, config: Config, ): """TODO: Write out a test that checks that when given a batch with data from different tasks, and when the model is multiheaded, it will use the right output head for each image. """ # Get a mixed batch xs, ys, ts = map(torch.cat, zip(*mixed_samples.values())) xs = xs[indices] ys = ys[indices] ts = ts[indices].int() obs = ClassIncrementalSetting.Observations(x=xs, task_labels=ts) setting = ClassIncrementalSetting() model = MultiHeadModel( setting=setting, hparams=MultiHeadModel.HParams(batch_size=30, multihead=True), config=config, ) class MockEncoder(nn.Module): def forward(self, x: Tensor): return x.new_ones([x.shape[0], model.hidden_size]) mock_encoder = MockEncoder() model.encoder = mock_encoder for i in range(5): model.output_heads[str(i)] = MockOutputHead( input_space=spaces.Box(0, 1, [model.hidden_size]), action_space=spaces.Discrete(2), Actions=setting.Actions, task_id=i, ) model.output_head = model.output_heads["0"] forward_pass = model(obs) y_preds = forward_pass["y_pred"] assert y_preds.shape == ts.shape assert torch.all(y_preds == ts * xs.view([xs.shape[0], -1]).mean(1)) def test_multitask_rl_bug_without_PL(monkeypatch): """TODO: on_task_switch is called on the new observation, but we need to produce a loss for the output head that we were just using! """ # NOTE: Tasks don't have anything to do with the task schedule. They are sampled at # each episode. max_episode_steps = 5 setting = TraditionalRLSetting( dataset="cartpole", batch_size=1, nb_tasks=2, train_max_steps=100, max_episode_steps=max_episode_steps, add_done_to_observations=True, ) assert setting.stationary_context # setting = RLSetting.load_benchmark("monsterkong") config = Config(debug=True, verbose=True, seed=123) config.seed_everything() model = BaseModel( setting=setting, hparams=MultiHeadModel.HParams( multihead=True, output_head=EpisodicA2C.HParams(accumulate_losses_before_backward=True), ), config=config, ) # TODO: Maybe add some kind of "hook" to check which losses get returned when? model.train() # from pytorch_lightning import Trainer # trainer = Trainer(fast_dev_run=True) # trainer.fit(model, train_dataloader=setting.train_dataloader()) # trainer.setup(model, stage="fit") # from pytorch_lightning import Trainer optimizer = torch.optim.Adam(model.parameters(), lr=0.01) episodes = 0 max_episodes = 5 # Dict mapping from step to loss at that step. losses: Dict[int, Loss] = {} with setting.train_dataloader() as env: env.seed(123) # env = TimeLimit(env, max_episode_steps=max_episode_steps) # Iterate over the environment, which yields one observation at a time: for step, obs in enumerate(env): assert isinstance(obs, RLSetting.Observations) if step == 0: assert not any(obs.done) start_task_label = obs["task_labels"][0] stored_steps_in_each_head_before = { task_key: output_head.num_stored_steps(0) for task_key, output_head in model.output_heads.items() } forward_pass: ForwardPass = model.forward(observations=obs) rewards = env.send(forward_pass.actions) loss: Loss = model.get_loss( forward_pass=forward_pass, rewards=rewards, loss_name="debug" ) stored_steps_in_each_head_after = { task_key: output_head.num_stored_steps(0) for task_key, output_head in model.output_heads.items() } # if step == 5: # assert False, (loss, stored_steps_in_each_head_before, stored_steps_in_each_head_after) if any(obs.done): assert loss.loss != 0.0, step assert loss.loss.requires_grad # Backpropagate the loss, update the models, etc etc. loss.loss.backward() model.on_after_backward() optimizer.step() model.on_before_zero_grad(optimizer) optimizer.zero_grad() # TODO: Need to let the model know than an update is happening so it can clear # buffers etc. episodes += sum(obs.done) losses[step] = loss else: assert loss.loss == 0.0 # TODO: print( f"Step {step}, episode {episodes}: x={obs.x}, done={obs.done}, reward={rewards} task labels: {obs.task_labels}, loss: {loss.losses.keys()}: {loss.loss}" ) if episodes > max_episodes: break # assert False, losses @pytest.mark.xfail(reason=f"TODO: Re-enable this test once the BaseMethod works in RL again.") def test_multitask_rl_bug_with_PL(monkeypatch, config: Config): """ """ # NOTE: Tasks don't have anything to do with the task schedule. They are sampled at # each episode. cpu_config = config # cpu_config = Config(device="cpu", num_workers=0) setting = TraditionalRLSetting( dataset="cartpole", batch_size=1, num_workers=0, nb_tasks=2, train_max_steps=200, test_max_steps=200, max_episode_steps=5, add_done_to_observations=True, config=cpu_config, ) assert setting.train_max_steps == 200 assert setting.test_max_steps == 200 assert setting.stationary_context # setting = RLSetting.load_benchmark("monsterkong") cpu_config.seed_everything() model = BaseModel( setting=setting, hparams=MultiHeadModel.HParams( multihead=True, output_head=EpisodicA2C.HParams(accumulate_losses_before_backward=True), ), config=cpu_config, ).to(device=config.device) # TODO: Maybe add some kind of "hook" to check which losses get returned when? model.train() assert not model.automatic_optimization # Import this and use it to create the Trainer, rather than creating the Trainer # directly, so we don't get the same bug (due to with_is_last in PL) from the # DataConnector. from sequoia.methods.base_method import TrainerConfig # NOTE: We only do this so that the Model has a self.trainer attribute and so the # model.training_step below can be used: if config.device.type == "cuda": trainer_config = TrainerConfig(fast_dev_run=True) else: trainer_config = TrainerConfig( fast_dev_run=True, gpus=0, distributed_backend=None, ) trainer = trainer_config.make_trainer(config=cpu_config) # Fit in 'fast_dev_run' mode, so just a single batch of train / valid / test data. with setting.train_dataloader() as temp_env: temp_env.seed(123) trainer.fit(model, train_dataloader=temp_env) # NOTE: If we don't clear the buffers, there is a bug because the things that get put # in buffers aren't on the same device as later. model.output_head.clear_all_buffers() optimizer = torch.optim.Adam(model.parameters(), lr=0.01) episodes = 0 max_episodes = 5 # Dict mapping from step to loss at that step. losses: Dict[int, List[Loss]] = defaultdict(list) with setting.train_dataloader() as env: env.seed(123) # TODO: Interesting bug/problem: Since the VectorEnvs always want to reset the # env at the end of the episode, they also also so on the individual envs. # In order to solve that, we need to NOT put any 'ActionLimit' on the inside # envs, but only on the outer env. for step, obs in enumerate(env): assert isinstance(obs, RLSetting.Observations) print(step, env.is_closed()) forward_pass = model.training_step(batch=obs, batch_idx=step) step_results: Optional[Loss] = model.training_step_end([forward_pass]) loss_tensor: Optional[Tensor] = None if step > 0 and step % 5 == 0: # We should get a loss at each episode end: assert all(obs.done), step # Since batch_size == 1 for now. assert step_results is not None, (step, obs.task_labels) loss_tensor = step_results["loss"] loss: Loss = step_results["loss_object"] print(f"Loss at step {step}: {loss}") losses[step].append(loss) else: assert step_results is None print( f"Step {step}, episode {episodes}: x={obs.x}, done={obs.done}, task labels: {obs.task_labels}, loss_tensor: {loss_tensor}" ) if step >= setting.train_max_steps: assert False, "Shouldn't the environment have closed at this point?" for step, step_losses in losses.items(): print(f"Losses at step {step}:") for loss in step_losses: print(f"\t{loss}") # assert False, losses @pytest.mark.parametrize( "input, expected", [ (np.array([0, 0, 0, 0]), {0: np.arange(4)}), (torch.as_tensor([0, 0, 0, 0]), {0: torch.arange(4)}), ( torch.as_tensor([0, 0, 1, 0]), {0: torch.LongTensor([0, 1, 3]), 1: torch.LongTensor([2])}, ), ( np.array([0, 0, 1, None]), {0: np.array([0, 1]), 1: np.array([2]), None: np.array([3])}, ), ], ) def test_get_task_indices(input, expected): actual = get_task_indices(input) assert str(actual) == str(expected) @pytest.mark.parametrize( "indices", [ slice(0, 10), # all the same task (0) slice(0, 20), # 10 from task 0, 10 from task 1 slice(0, 30), # 10 from task 0, 10 from task 1, 10 from task 2 slice(0, 50), # 10 from each task. ], ) def test_task_inference_sl( mixed_samples: Dict[int, Tuple[Tensor, Tensor, Tensor]], indices: slice, config: Config, ): """TODO: Write out a test that checks that when given a batch with data from different tasks, and when the model is multiheaded, it will use the right output head for each image. """ # Get a mixed batch xs, ys, ts = map(torch.cat, zip(*mixed_samples.values())) xs = xs[indices] ys = ys[indices] ts = ts[indices].int() obs = ClassIncrementalSetting.Observations(x=xs, task_labels=None) setting = ClassIncrementalSetting() model = MultiHeadModel( setting=setting, hparams=MultiHeadModel.HParams(batch_size=30, multihead=True), config=config, ) class MockEncoder(nn.Module): def forward(self, x: Tensor): return x.new_ones([x.shape[0], model.hidden_size]) mock_encoder = MockEncoder() model.encoder = mock_encoder for i in range(5): model.output_heads[str(i)] = MockOutputHead( input_space=spaces.Box(0, 1, [model.hidden_size]), action_space=spaces.Discrete(setting.action_space.n), Actions=setting.Actions, task_id=i, ) model.output_head = model.output_heads["0"] forward_pass = model(obs) y_preds = forward_pass.actions.y_pred assert y_preds.shape == ts.shape # TODO: Check that the task inference works by changing the logits to be based on # the assigned task in the Mock output head. # assert torch.all(y_preds == ts * xs.view([xs.shape[0], -1]).mean(1)) @pytest.mark.skip(reason=f"TODO: Re-enable this test once the BaseMethod works in RL again.") @pytest.mark.timeout(120) def test_task_inference_rl_easy(config: Config): from sequoia.methods.base_method import BaseMethod method = BaseMethod(config=config) from sequoia.settings.rl import IncrementalRLSetting setting = IncrementalRLSetting( dataset="cartpole", nb_tasks=2, max_episode_steps=20, train_max_steps=200, test_max_steps=200, config=config, ) results = setting.apply(method) assert results # assert False, results.to_log_dict() @pytest.mark.skip(reason=f"TODO: Re-enable this test once the BaseMethod works in RL again.") @pytest.mark.timeout(120) def test_task_inference_rl_hard(config: Config): method = BaseMethod(config=config) setting = IncrementalRLSetting( dataset="cartpole", nb_tasks=2, train_max_steps=1000, test_max_steps=1000, config=config, ) results = setting.apply(method) assert results # assert False, results.to_log_dict() from sequoia.methods.base_method import BaseMethod from sequoia.settings.sl import TraditionalSLSetting from sequoia.settings.sl.continual.setting import subset @pytest.mark.timeout(30) def test_task_inference_multi_task_sl(config: Config): setting = TraditionalSLSetting(dataset="mnist", nb_tasks=2, config=config) # TODO: Maybe add this kind of 'max_steps_per_task' argument even in supervised # settings: dataset_length = 1000 # TODO: Shorten the train/test datasets? method = BaseMethod(config=config, max_epochs=1) setting.setup() setting.train_datasets = [ subset(dataset, list(range(dataset_length))) for dataset in setting.train_datasets ] setting.val_datasets = [ subset(dataset, list(range(dataset_length))) for dataset in setting.val_datasets ] setting.test_datasets = [ subset(dataset, list(range(dataset_length))) for dataset in setting.test_datasets ] results = setting.apply(method) assert 0.80 <= results.average_final_performance.objective ================================================ FILE: sequoia/methods/models/base_model/self_supervised_model.py ================================================ """ Base class for a Self-Supervised model. This is meant to be a kind of 'Mixin' that you can use and extend in order to add self-supervised losses to your model. """ import warnings from dataclasses import dataclass from typing import Dict, Optional, TypeVar from torch import Tensor, nn from sequoia.common.config import Config from sequoia.common.loss import Loss from sequoia.methods.aux_tasks.auxiliary_task import AuxiliaryTask from sequoia.settings import Rewards, Setting, SettingType from sequoia.utils.logging_utils import get_logger from sequoia.utils.utils import flatten_dict from .model import Model # from sequoia.utils.module_dict import ModuleDict logger = get_logger(__name__) HParamsType = TypeVar("HParamsType", bound="SelfSupervisedModel.HParams") class SelfSupervisedModel(Model[SettingType]): """ Model 'mixin' that adds support for modular, configurable "auxiliary tasks". These auxiliary tasks are used to get a self-supervised loss to train on when labels aren't available. """ @dataclass class HParams(Model.HParams): """Hyperparameters of a Self-Supervised method.""" # vae: Optional[VAEReconstructionTask.Options] = None # ae: Optional[AEReconstructionTask.Options] = None def __init__(self, setting: Setting, hparams: HParams, config: Config): super().__init__(setting, hparams, config) self.hp: SelfSupervisedModel.HParams # Dictionary of auxiliary tasks. self.tasks: Dict[str, AuxiliaryTask] = self.create_auxiliary_tasks() def get_loss( self, forward_pass: Dict[str, Tensor], rewards: Rewards = None, loss_name: str = "", ) -> Loss: # Get the output task loss (the loss of the base model) loss: Loss = super().get_loss(forward_pass, rewards=rewards, loss_name=loss_name) # Add the self-supervised losses from all the enabled auxiliary tasks. for task_name, aux_task in self.tasks.items(): assert task_name, "Auxiliary tasks should have a name!" if aux_task.enabled: # TODO: Auxiliary tasks all share the same 'y' for now, but it # might make more sense to organize this differently. y = rewards.y if rewards else None aux_loss: Loss = aux_task.get_loss(forward_pass, y=y) # Scale the loss by the corresponding coefficient before adding # it to the total loss. loss += aux_task.coefficient * aux_loss.to(self.device) if self.config.debug and self.config.verbose: logger.debug(f"{task_name} loss: {aux_loss.total_loss}") return loss def add_auxiliary_task( self, aux_task: AuxiliaryTask, key: str = None, coefficient: float = None ) -> None: """Adds an auxiliary task to the self-supervised model.""" key = aux_task.name if key is None else key if key in self.tasks: raise RuntimeError(f"There is already an auxiliary task with name {key} in the model!") self.tasks[key] = aux_task.to(self.device) if coefficient is not None: aux_task.coefficient = coefficient elif not aux_task.coefficient: warnings.warn( UserWarning(f"Adding auxiliary task with name {key}, but with coefficient of 0.!") ) if aux_task.coefficient: aux_task.enable() def create_auxiliary_tasks(self) -> Dict[str, AuxiliaryTask]: # Share the relevant parameters with all the auxiliary tasks. # We do this by setting class attributes. # TODO: Make sure that we aren't duplicating all of the model's weights # by setting a class attribute. AuxiliaryTask._model = self AuxiliaryTask.hidden_size = self.hidden_size AuxiliaryTask.input_shape = self.input_shape AuxiliaryTask.encoder = self.encoder AuxiliaryTask.output_head = self.output_head # AuxiliaryTask.preprocessing = self.preprocess_batch tasks: Dict[str, AuxiliaryTask] = nn.ModuleDict() # TODO(@lebrice): Should we create the tasks even if they aren't used, # and then 'enable' them when they are needed? (I'm thinking that maybe # being enable/disable auxiliary tasks when needed might be useful # later?) # if self.hp.vae and self.hp.vae.coefficient: # tasks[VAEReconstructionTask.name] = VAEReconstructionTask(options=self.hp.vae) # if self.hp.ae and self.hp.ae.coefficient: # tasks[AEReconstructionTask.name] = AEReconstructionTask(options=self.hp.ae) # if self.hp.ewc and self.hp.ewc.coefficient: # tasks[EWCTask.name] = EWCTask(options=self.hp.ewc) return tasks def on_task_switch(self, task_id: Optional[int]) -> None: """Called when switching between tasks. Args: task_id (int): the Id of the task. """ for task_name, task in self.tasks.items(): if task.enabled: task.on_task_switch(task_id=task_id) super().on_task_switch(task_id=task_id) def shared_modules(self) -> Dict[str, nn.Module]: """Returns any trainable modules in `self` that are shared across tasks. By giving this information, these weights can then be used in regularization-based auxiliary tasks like EWC, for example. For the base model, this returns a dictionary with the encoder, for example. When using auxiliaryt tasks, they also add their shared weights, if any. Returns ------- Dict[str, nn.Module]: Dictionary mapping from name to the shared modules, if any. """ shared_modules = super().shared_modules() for task_name, task in self.tasks.items(): # TODO: What separator to use when dealing with nested dictionaries? I seem # to recall that ModuleDicts don't like some separators. sep = "." task_modules = task.shared_modules() flattened_task_modules = flatten_dict(task_modules, separator=sep) for module_name, module in flattened_task_modules.items(): shared_modules[f"{task_name}{sep}{module_name}"] = module return shared_modules ================================================ FILE: sequoia/methods/models/base_model/self_supervised_model_test.py ================================================ from typing import Dict, List, Tuple, Type import pytest from sequoia.conftest import id_fn, parametrize, slow from sequoia.methods.aux_tasks import AE, EWC, VAE from sequoia.methods.base_method import BaseMethod from sequoia.settings.base import Results, Setting from sequoia.settings.sl import TaskIncrementalSLSetting, TraditionalSLSetting from sequoia.settings.sl.incremental import ClassIncrementalSetting Method = BaseMethod # Use 'Method' as an alias for the actual Method subclass under test. (since at # the moment quite a few tests share some code. # List of datasets that are currently supported for this method. supported_datasets: List[str] = [ "mnist", "fashion_mnist", "cifar10", "cifar100", "kmnist", ] def test_get_applicable_settings(): settings = Method.get_applicable_settings() assert ClassIncrementalSetting in settings assert TaskIncrementalSLSetting in settings assert TraditionalSLSetting in settings @pytest.fixture( scope="module", params=[ {}, {VAE: 1}, {AE: 1}, {EWC: 1}, ], # no aux task. ids=id_fn, ) def method_and_coefficients(request, tmp_path_factory): """Fixture that creates a method to be reused for the tests below as well as return the coefficients for each auxiliary task. """ # Reuse the Method accross all tests below log_dir = tmp_path_factory.mktemp("log_dir") aux_task_coefficients = request.param args = f""" --debug --log_dir_root {log_dir} --default_root_dir {log_dir} --knn_samples 0 --seed 123 --fast_dev_run """ for aux_task_name, coef in aux_task_coefficients.items(): args += f"--{aux_task_name}.coef {coef} " return Method.from_args(args, strict=False), aux_task_coefficients # @parametrize("dataset", get_dataset_params(Method, supported_datasets)) from sequoia.methods.method_test import key_fn @slow @parametrize("setting_type", sorted(Method.get_applicable_settings(), key=key_fn)) def test_fast_dev_run( method_and_coefficients: Tuple[Method, Dict[str, float]], setting_type: Type[Setting], test_dataset: str, ): """Performs a quick run with only one batch of train / val / test data and check that the 'Results' objects are ok. """ method, aux_task_coefficients = method_and_coefficients if test_dataset not in setting_type.available_datasets: pytest.skip(msg=f"dataset {test_dataset} isn't available for this setting.") # Instantiate the setting setting: Setting = setting_type(dataset=test_dataset, nb_tasks=2) results: Results = setting.apply(method) validate_results(results, aux_task_coefficients) def validate_results(results: Results, aux_task_coefficients: Dict[str, float]): """Makes sure that the results make sense for the method being tested. Checks that the Loss object has losses for each 'enabled' auxiliary task. Args: results (Results): A given Results object. """ assert results is not None assert results.hparams is not None assert results.test_loss is not None for loss in results.task_losses: for aux_task_name, coef in aux_task_coefficients.items(): assert aux_task_name in loss.losses aux_task_loss = loss.losses[aux_task_name] assert aux_task_loss.loss >= 0.0 assert aux_task_loss._coefficient == coef ================================================ FILE: sequoia/methods/models/base_model/semi_supervised_model.py ================================================ """ Addon that enables training on semi-supervised batches. NOTE: Not used at the moment, but should work just fine. """ from dataclasses import dataclass from typing import Dict, Optional, Sequence, Union import numpy as np from torch import Tensor # from sequoia.common.callbacks import KnnCallback from sequoia.common.loss import Loss from sequoia.settings import Rewards, SettingType from sequoia.utils.logging_utils import get_logger from .model import Model logger = get_logger(__name__) class SemiSupervisedModel(Model[SettingType]): @dataclass class HParams(Model.HParams): """Hyperparameters of a Self-Supervised method.""" # Adds Options for a KNN classifier callback, which is used to evaluate # the quality of the representations on each task after each training # epoch. # TODO: Debug/test this callback to make sure it still works fine. # knn_callback: KnnCallback = mutable_field(KnnCallback) def get_loss( self, forward_pass: Dict[str, Tensor], rewards: Optional[Rewards] = None, loss_name: str = "", ) -> Loss: """Trains the model on a batch of (potentially partially labeled) data. Args: forward_pass (Dict[str, Tensor]): WIP: The results of the forward pass (processed input, predictions, etc.) rewards (Union[Optional[Tensor], List[Optional[Tensor]]]): Labels associated with the data. Can either be: - None: fully unlabeled batch - Tensor: fully labeled batch - List[Optional[Tensor]]: Partially labeled batch. loss_name (str, optional): Name of the resulting loss object. Defaults to "Train". Returns: Loss: a loss object made from both the unsupervised and supervised losses. """ # TODO: We could also just use '-1' instead as the 'no-label' val: this # would make it a bit simpler than having both numpy arrays and tensors # in the batch y: Union[Optional[Tensor], Sequence[Optional[Tensor]]] = rewards.y if y is None or all(y_i is not None for y_i in y): # Fully labeled/unlabeled batch # NOTE: Tensors can't have None items, so if we get a Tensor that # means that we have all task labels. labeled_ratio = float(y is not None) return super().get_loss(forward_pass, rewards, loss_name=loss_name) is_labeled: np.ndarray = np.asarray([y_i is not None for y_i in y]) # Batch is maybe a mix of labeled / unlabeled data. labeled_y = y[is_labeled] # TODO: Might have to somehow re-order the results based on the indices? # TODO: Join (merge) the metrics? or keep them separate? labeled_forward_pass = {k: v[is_labeled] for k, v in forward_pass.items()} unlabeled_forward_pass = {k: v[~is_labeled] for k, v in forward_pass.items()} labeled_ratio = len(labeled_y) / len(y) logger.debug(f"Labeled ratio: {labeled_ratio}") # Create the 'total' loss for the batch, with the required name. # We will then create two 'sublosses', one named 'unsupervised' and one # named 'supervised', each containing the respective losses and metrics. # TODO: Make sure that this doesn't make it harder to get the metrics # from the Loss object. If it does, then we could maybe just fuse the # labeled and unlabeled losses and metrics, but that might also cause # issues. loss = Loss(name=loss_name) if unlabeled_forward_pass: # TODO: Setting a different loss name for the for this is definitely going to cause trouble! unsupervised_loss = super().get_loss( unlabeled_forward_pass, rewards=None, loss_name="unsupervised", ) loss += unsupervised_loss if labeled_forward_pass: supervised_loss = super().get_loss( labeled_forward_pass, rewards=labeled_y, loss_name="supervised", ) loss += supervised_loss return loss ================================================ FILE: sequoia/methods/models/baseline_model.puml ================================================ @startuml base_model ' !include output_heads.puml package base_model { package model { abstract class Model { + hparams: Model.HParams + encoder: nn.Module + output_head: OutputHead + forward(Observations): ForwardPass + get_loss(ForwardPass, Rewards): Loss + get_actions(observations: Observations, action_space: Space): Actions } ' class Model.HParams extends BaseHParams {} ' class BaseHParams { class Model.HParams { {static} + available_optimizers: Dict[str, Type[Optimizer]] {static} + available_encoders: Dict[str, Type[nn.Module]] + learning_rate: float = 0.001 + weight_decay: float = 1e-6 + optimizer: str = "adam" + encoder: str = "resnet18" + batch_size: Optional[int] + train_from_scratch: bool = False + freeze_pretrained_encoder_weights: bool = False + output_head: OutputHead.HParams + detach_output_head: bool = False } } together { package semi_supervised_model { abstract class SemiSupervisedModel extends Model { + forward(Observations): ForwardPass + get_loss(ForwardPass, Optional[Rewards]): Loss } abstract class SemiSupervisedModel.HParams extends Model.HParams { + knn_callback: KnnCallback note (todo: unused atm) } } package self_supervised_model { abstract class SelfSupervisedModel extends Model { + hparams: SelfSupervisedModel.HParams + tasks: dict[str, AuxiliaryTask] + add_auxiliary_task(task AuxiliaryTask) } abstract class SelfSupervisedModel.HParams extends Model.HParams { + simclr: Optional[SimCLRTask.Options] + vae: Optional[VAEReconstructionTask.Options] + ae: Optional[AEReconstructionTask.Options] + ewc: Optional[EWCTask.Options] } } package multihead_model { abstract class MultiHeadModel extends Model { + output_heads: dict[str, OutputHead] + forward(Observations): ForwardPass + on_task_switch(task_id: Optional[int]) } abstract class MultiHeadModel.HParams extends Model.HParams { + multihead: Optional[bool] } } } package base_model as base_model.base_model { class BaseModel extends SemiSupervisedModel, SelfSupervisedModel, MultiHeadModel { + hparams: BaseModel.HParams } class BaseModel.HParams extends SelfSupervisedModel.HParams, MultiHeadModel.HParams, SemiSupervisedModel.HParams { } } Model "1" *-- "1" OutputHead ' Model *-- Model.HParams ' BaseModel *-- BaseModel.HParams ' SemiSupervisedModel *-- SemiSupervisedModel.HParams ' SelfSupervisedModel *-- SelfSupervisedModel.HParams ' MultiHeadModel *-- MultiHeadModel.HParams SelfSupervisedModel "1" o-- "many" aux_tasks.AuxiliaryTask ' BaseMethod "1" *--> "1" BaseModel : uses MultiHeadModel "1" *-- "many" OutputHead ' MultiHeadModel "1" *-- "1" OutputHead } @enduml ================================================ FILE: sequoia/methods/models/fcnet.py ================================================ """ TODO: Take out the dense network from the OutputHead. """ from dataclasses import dataclass from typing import ClassVar, Dict, List, Optional, Type, Union, overload from torch import nn from sequoia.common.hparams import HyperParameters, categorical, uniform class FCNet(nn.Sequential): """Fully-connected network.""" @dataclass class HParams(HyperParameters): """Hyper-parameters of a fully-connected network.""" available_activations: ClassVar[Dict[str, Type[nn.Module]]] = { "relu": nn.ReLU, "tanh": nn.Tanh, "elu": nn.ELU, # No idea what these do, but hey, they are available! "gelu": nn.GELU, "relu6": nn.ReLU6, } # Number of hidden layers in the output head. hidden_layers: int = uniform(0, 10, default=3) # Number of neurons in each hidden layer of the output head. # If a single value is given, than each of the `hidden_layers` layers # will have that number of neurons. # If `n > 1` values are given, then `hidden_layers` must either be 0 or # `n`, otherwise a RuntimeError will be raised. hidden_neurons: Union[int, List[int]] = uniform(16, 512, default=64) activation: Type[nn.Module] = categorical(available_activations, default=nn.Tanh) # Dropout probability. Dropout is applied after each layer. # Set to None or 0 for no dropout. # TODO: Not sure if this is how it's typically used. Need to check. dropout_prob: Optional[float] = uniform(0, 0.8, default=0.2) def __post_init__(self): super().__post_init__() if isinstance(self.activation, str): self.activation = self.available_activations[self.activation.lower()] if isinstance(self.hidden_neurons, int): self.hidden_neurons = [self.hidden_neurons] # no value passed to --hidden_layers if self.hidden_layers == 0: if len(self.hidden_neurons) == 1: # Default Setting: No hidden layers. self.hidden_neurons = [] elif len(self.hidden_neurons) > 1: # Set the number of hidden layers to the number of passed values. self.hidden_layers = len(self.hidden_neurons) elif self.hidden_layers > 0 and len(self.hidden_neurons) == 1: # Duplicate that value for each of the `hidden_layers` layers. self.hidden_neurons *= self.hidden_layers elif self.hidden_layers == 1 and not self.hidden_neurons: self.hidden_layers = 0 if self.hidden_layers != len(self.hidden_neurons): raise RuntimeError( f"Invalid values: hidden_layers ({self.hidden_layers}) != " f"len(hidden_neurons) ({len(self.hidden_neurons)})." ) @overload def __init__(self, in_features: int, out_features: int, hparams: HParams = None): ... @overload def __init__( self, in_features: int, out_features: int, hidden_layers: int = 1, hidden_neurons: List[int] = None, activation: Type[nn.Module] = nn.Tanh, ): ... def __init__(self, in_features: int, out_features: int, hparams: HParams = None, **kwargs): self.in_features = in_features self.out_features = out_features self.hparams = hparams or self.HParams(**kwargs) hidden_layers: List[nn.Module] = [] output_size = out_features assert isinstance(self.hparams.hidden_neurons, list) for i, neurons in enumerate(self.hparams.hidden_neurons): out_features = neurons if self.hparams.dropout_prob: hidden_layers.append(nn.Dropout(p=self.hparams.dropout_prob)) hidden_layers.append(nn.Linear(in_features, out_features)) hidden_layers.append(self.hparams.activation()) in_features = out_features # next input size is output size of prev. super().__init__(nn.Flatten(), *hidden_layers, nn.Linear(in_features, output_size)) # TODO: IDEA: use @singledispatchmethod to add a `forward` implementation # for mapping input space to output space. # def forward(self, input: Any) ================================================ FILE: sequoia/methods/models/forward_pass.py ================================================ """ Typed object that represents the outputs of the forward pass of a model. """ from dataclasses import dataclass from typing import Any, Optional from simple_parsing.helpers.flatten import FlattenedAccess from torch import Tensor from sequoia.common import Batch from sequoia.settings.base.objects import Actions, Observations, Rewards @dataclass(frozen=True) class ForwardPass(Batch, FlattenedAccess): """Typed version of the result of a forward pass through a model. FlattenedAccess is pretty cool, but potentially confusing. We can get any attributes in the children by getting them directly on the parent. So if the `observation` has an `x` attribute, we can get on this object directly with `self.x`, and it will fetch the attribute from the observation. """ observations: Observations representations: Tensor actions: Actions rewards: Optional[Rewards] = None # Note: Might be annoying later if there is a need for subclasses of ForwardPass, # since dataclass fields without a default value can't follow fields that have one. @property def h_x(self) -> Any: return self.representations ================================================ FILE: sequoia/methods/models/output_heads/__init__.py ================================================ from .classification_head import ClassificationHead from .output_head import OutputHead from .regression_head import RegressionHead from .rl import ActorCriticHead, PolicyHead ================================================ FILE: sequoia/methods/models/output_heads/classification_head.py ================================================ from dataclasses import dataclass from typing import ClassVar, Dict, List, Optional, Type, Union import gym import torch from gym import spaces from torch import LongTensor, Tensor, nn from sequoia.common import ClassificationMetrics, Loss from sequoia.common.hparams import categorical, uniform from sequoia.settings import Actions, Observations, Rewards from ..fcnet import FCNet from ..forward_pass import ForwardPass from .output_head import OutputHead # TODO: This is based on 'Actions' which is currently basically the same for all settings # However, there should probably have a different `Action` class on a # IncrementalSLSetting("mnist") vs IncrementalSLSetting("some_regression_dataset")! # IDEA: What if Settings were actually meta-classes, where the 'instances' were for a # particular choice of dataset? (e.g. `IncrementalSLSetting("mnist")` -> ) # This would maybe look a bit like the 'fully compositional' approach as well? @dataclass(frozen=True) class ClassificationOutput(Actions): """Typed dict-like class that represents the 'forward pass'/output of a classification head, which correspond to the 'actions' to be sent to the environment, in the general formulation. """ y_pred: Union[LongTensor, Tensor] logits: Tensor @property def action(self) -> LongTensor: return self.y_pred @property def y_pred_log_prob(self) -> Tensor: """returns the log probabilities for the chosen actions/predictions.""" return self.logits[:, self.y_pred] @property def y_pred_prob(self) -> Tensor: """returns the log probabilities for the chosen actions/predictions.""" return self.probabilities[self.y_pred] @property def probabilities(self) -> Tensor: """Returns the normalized probabilies for each class, i.e. the softmax-ed version of `self.logits`. """ return self.logits.softmax(-1) class ClassificationHead(OutputHead): @dataclass class HParams(FCNet.HParams, OutputHead.HParams): """Hyper-parameters of the OutputHead used for classification.""" # NOTE: These hparams were basically copied over from FCNet.HParams, just so its a # bit more visible. available_activations: ClassVar[Dict[str, Type[nn.Module]]] = { "relu": nn.ReLU, "tanh": nn.Tanh, "elu": nn.ELU, # No idea what these do, but hey, they are available! "gelu": nn.GELU, "relu6": nn.ReLU6, } # Number of hidden layers in the output head. hidden_layers: int = uniform(0, 3, default=0) # Number of neurons in each hidden layer of the output head. # If a single value is given, than each of the `hidden_layers` layers # will have that number of neurons. # If `n > 1` values are given, then `hidden_layers` must either be 0 or # `n`, otherwise a RuntimeError will be raised. hidden_neurons: Union[int, List[int]] = uniform(16, 512, default=64) activation: Type[nn.Module] = categorical(available_activations, default=nn.Tanh) # Dropout probability. Dropout is applied after each layer. # Set to None or 0 for no dropout. # TODO: Not sure if this is how it's typically used. Need to check. dropout_prob: Optional[float] = uniform(0, 0.8, default=0.2) def __init__( self, input_space: gym.Space, action_space: gym.Space, reward_space: gym.Space = None, hparams: "ClassificationHead.HParams" = None, name: str = "classification", ): super().__init__( input_space=input_space, action_space=action_space, reward_space=reward_space, hparams=hparams, name=name, ) self.hparams: ClassificationHead.HParams assert isinstance(action_space, spaces.Discrete) output_size = action_space.n self.dense = FCNet( in_features=self.input_size, out_features=output_size, hparams=self.hparams, ) # if output_size == 2: # # TODO: Should we be using this loss instead? # self.loss_fn = nn.BCEWithLogitsLoss() self.loss_fn = nn.CrossEntropyLoss() def forward(self, observations: Observations, representations: Tensor) -> ClassificationOutput: # TODO: This should probably take in a dict and return a dict, or something like that? # TODO: We should maybe convert this to also return a dict instead # of a Tensor, just to be consistent with everything else. This could # also maybe help with having multiple different output heads, each # having a different name and giving back a dictionary of their own # forward pass tensors (if needed) and predictions? logits = self.dense(representations) y_pred = logits.argmax(dim=-1) return ClassificationOutput( logits=logits, y_pred=y_pred, ) def get_loss( self, forward_pass: ForwardPass, actions: ClassificationOutput, rewards: Rewards ) -> Loss: logits: Tensor = actions.logits y_pred: Tensor = actions.y_pred rewards = rewards.to(logits.device) y: Tensor = rewards.y n_classes = logits.shape[-1] # Could remove these: just used for debugging. assert len(y.shape) == 1, y.shape assert not torch.is_floating_point(y), y.dtype assert 0 <= y.min(), y assert y.max() < n_classes, y loss = self.loss_fn(logits, y) assert loss.shape == () metrics = ClassificationMetrics(y_pred=logits, y=y) assert self.name, "Output Heads should have a name!" loss_object = Loss( name=self.name, loss=loss, # NOTE: we're passing the tensors to the Loss object because we let # it create the Metrics for us automatically. metrics={self.name: metrics}, ) return loss_object ================================================ FILE: sequoia/methods/models/output_heads/output_head.py ================================================ """ Abstract base class for an output head of the BaseModel. """ import dataclasses from abc import ABC, abstractmethod from dataclasses import dataclass from typing import ClassVar, List, Sequence, Type import gym import numpy as np from gym import spaces from gym.spaces.utils import flatdim from torch import Tensor, nn from torch.nn import Flatten # type: ignore from torch.optim.optimizer import Optimizer from sequoia.common.hparams import HyperParameters from sequoia.common.loss import Loss from sequoia.settings import Actions, Rewards, Setting from sequoia.utils import Parseable, get_logger from ..forward_pass import ForwardPass logger = get_logger(__name__) class OutputHead(nn.Module, ABC): """Module for the output head of the model. This output head is meant for classification, but you could inherit from it and customize it for doing something different like RL or reconstruction, for instance. """ # TODO: Rename this to 'output' and create some ClassificationHead, # RegressionHead, ValueHead, etc. subclasses with the corresponding names. name: ClassVar[str] = "classification" # Reference to the optimizer of the BaseModel. base_model_optimizer: ClassVar[Optimizer] @dataclass class HParams(HyperParameters, Parseable): """Hyperparameters of the output head.""" def __init__( self, input_space: gym.Space, action_space: gym.Space, reward_space: gym.Space = None, hparams: "OutputHead.HParams" = None, name: str = "", ): super().__init__() self.input_space = input_space self.action_space = action_space self.reward_space = reward_space or spaces.Box(-np.inf, np.inf, ()) self.input_size = flatdim(input_space) self.hparams = hparams or self.HParams() if not isinstance(self.hparams, self.HParams): # Upgrade the hparams to the right type, if needed. self.hparams = self.upgrade_hparams() self.name = name or type(self).name def make_dense_network( self, in_features: int, hidden_neurons: Sequence[int], out_features: int, activation: Type[nn.Module] = nn.ReLU, ): hidden_layers: List[nn.Module] = [] output_size = out_features for i, neurons in enumerate(hidden_neurons): out_features = neurons hidden_layers.append(nn.Linear(in_features, out_features)) hidden_layers.append(activation()) in_features = out_features # next input size is output size of prev. return nn.Sequential(nn.Flatten(), *hidden_layers, nn.Linear(in_features, output_size)) @abstractmethod def forward( self, observations: Setting.Observations, representations: Tensor ) -> Setting.Actions: """Given the observations and their representations, produce "actions". Parameters ---------- observations : Observations Object containing the input examples. representations : Any The results of encoding the input examples. Returns ------- Actions An object containing the action to take, and which can be used to calculate the loss later on. """ @abstractmethod def get_loss(self, forward_pass: ForwardPass, actions: Actions, rewards: Rewards) -> Loss: """Given the forward pass,(a dict-like object that includes the observations, representations and actions, the actions produced by this output head and the resulting rewards, returns a Loss to use. """ def clear_all_buffers(self) -> None: """Optional method that gets called when using multiple output heads, to prevent keeping stale gradients around after the model that produced them gets updated during training. """ def upgrade_hparams(self): """Upgrades the hparams at `self.hparams` to the right type for this output head (`type(self).HParams`), filling in any missing values by parsing them from the command-line. Returns ------- type(self).HParams Hparams of the type `self.HParams`, with the original values preserved and any new values parsed from the command-line. """ # NOTE: This (getting the wrong hparams class) could happen for # instance when parsing a BaseMethod from the command-line, the # default type of hparams on the method is BaseModel.HParams, # whose `output_head` field doesn't have the right type exactly. current_hparams = self.hparams.to_dict() # TODO: If a value is not at its current default, keep it. default_hparams = self.HParams() missing_fields = [ f.name for f in dataclasses.fields(self.HParams) if f.name not in current_hparams or current_hparams[f.name] == getattr(type(self.hparams)(), f.name, None) or current_hparams[f.name] == getattr(default_hparams, f.name) ] logger.warning( RuntimeWarning( f"Upgrading the hparams from type {type(self.hparams)} to " f"type {self.HParams}. This will try to fetch the values for " f"the missing fields {missing_fields} from the command-line. " ) ) # Get the missing values if self.hparams._argv: return self.HParams.from_args(argv=self.hparams._argv, strict=False) hparams = self.HParams.from_args(argv=self.hparams._argv, strict=False) for missing_field in missing_fields: current_hparams[missing_field] = getattr(hparams, missing_field) return self.HParams(**current_hparams) ================================================ FILE: sequoia/methods/models/output_heads/regression_head.py ================================================ from dataclasses import dataclass from typing import List import gym from gym import spaces from torch import Tensor, nn from sequoia.common import Loss, RegressionMetrics from sequoia.settings import Actions, Observations, Rewards from sequoia.utils.utils import prod from ..fcnet import FCNet from ..forward_pass import ForwardPass from .output_head import OutputHead class RegressionHead(OutputHead): """Output head used for regression problems.""" @dataclass class HParams(FCNet.HParams, OutputHead.HParams): """Hyper-parameters of the regression output head.""" def __init__( self, input_space: gym.Space, action_space: gym.Space, reward_space: gym.Space = None, hparams: OutputHead.HParams = None, name: str = "regression", ): assert isinstance(action_space, spaces.Box) if len(action_space.shape) > 1: raise NotImplementedError( f"TODO: Regression head doesn't support output shapes that are " f"more than 1d for atm, (output space: {action_space})." ) # TODO: Add support for something like a "decoder head" (maybe as a # subclass of RegressionHead)? super().__init__( input_space=input_space, action_space=action_space, reward_space=reward_space, hparams=hparams, name=name, ) assert isinstance(action_space, spaces.Box) output_size = prod(action_space.shape) hidden_layers: List[nn.Module] = [] in_features = self.input_size for i, neurons in enumerate(self.hparams.hidden_neurons): out_features = neurons hidden_layers.append(nn.Linear(in_features, out_features)) hidden_layers.append(nn.ReLU()) in_features = out_features # next input size is output size of prev. self.dense = nn.Sequential( nn.Flatten(), *hidden_layers, nn.Linear(in_features, output_size) ) self.loss_fn = nn.MSELoss() def forward(self, observations: Observations, representations: Tensor) -> Actions: y_pred = self.dense(representations) return Actions(y_pred) def get_loss(self, forward_pass: ForwardPass, actions: Actions, rewards: Rewards) -> Loss: actions: Actions = forward_pass.actions y_pred: Tensor = actions.y_pred y: Tensor = rewards.y loss = self.loss_fn(y_pred, y) metrics = RegressionMetrics(y_pred=y_pred, y=y) assert self.name, "Output Heads should have a name!" loss = Loss( name=self.name, loss=loss, # NOTE: we're passing the tensors to the Loss object because we let # it create the Metrics for us automatically. metrics={self.name: metrics}, ) return loss ================================================ FILE: sequoia/methods/models/output_heads/rl/__init__.py ================================================ from .actor_critic_head import ActorCriticHead from .policy_head import PolicyHead ================================================ FILE: sequoia/methods/models/output_heads/rl/actor_critic_head.py ================================================ """ An output head for RL based on Advantage Actor Critic. NOTE: This is the 'online' version of an Advantage Actor Critic, based on the following blog: https://medium.com/deeplearningmadeeasy/advantage-actor-critic-a2c-implementation-944e98616b """ from dataclasses import dataclass from typing import Optional, Tuple import torch from gym import spaces from gym.spaces.utils import flatdim from torch import Tensor, nn from sequoia.common import Loss from sequoia.settings import ContinualRLSetting from sequoia.utils import get_logger from ...forward_pass import ForwardPass from ..classification_head import ClassificationHead from .policy_head import Categorical, PolicyHeadOutput logger = get_logger(__name__) class ActorCriticHead(ClassificationHead): @dataclass class HParams(ClassificationHead.HParams): """Hyper-parameters of the Actor-Critic head.""" gamma: float = 0.95 learning_rate: float = 1e-3 def __init__( self, input_space: spaces.Space, action_space: spaces.Discrete, reward_space: spaces.Box, hparams: "ActorCriticHead.HParams" = None, name: str = "actor_critic", ): assert isinstance(action_space, spaces.Discrete), "Only support discrete space for now." super().__init__( input_space=input_space, action_space=action_space, reward_space=reward_space, hparams=hparams, name=name, ) if not isinstance(self.hparams, self.HParams): self.hparams = self.upgrade_hparams() action_dims = flatdim(action_space) # Critic takes in state-action pairs? or just state? self.critic_input_dims = self.input_size # self.critic_input_dims = self.input_size + action_dims self.critic_output_dims = 1 self.critic = nn.Sequential( # Lambda(concat_obs_and_action), nn.Flatten(), nn.Linear(self.critic_input_dims, 32), nn.ReLU(), nn.Linear(32, self.critic_output_dims), ) self.actor_input_dims = self.input_size self.actor_output_dims = action_dims self.actor = nn.Sequential( nn.Flatten(), nn.Linear(self.actor_input_dims, 32), nn.ReLU(), nn.Linear(32, self.actor_output_dims), ) self._current_state: Optional[Tensor] = None self._previous_state: Optional[Tensor] = None self._step = 0 self.optimizer = torch.optim.Adam(self.actor.parameters(), lr=self.hparams.learning_rate) self.optimizer_critic = torch.optim.Adam( self.critic.parameters(), lr=self.hparams.learning_rate ) def forward( self, observations: ContinualRLSetting.Observations, representations: Tensor ) -> PolicyHeadOutput: # NOTE: Here we could probably use either as the 'state': # state = observations.x # state = representations representations = representations.float() if len(representations.shape) != 2: representations = representations.reshape([-1, self.actor_input_dims]) self._previous_state = self._current_state self._current_state = representations # TODO: Actually implement the actor-critic forward pass. # predicted_reward = self.critic([state, action]) # Do we want to detach the representations? or not? logits = self.actor(representations) # The policy is the distribution over actions given the current state. action_dist = Categorical(logits=logits) if action_dist.has_rsample: sample = action_dist.rsample() else: sample = action_dist.sample() actions = PolicyHeadOutput( y_pred=sample, logits=logits, action_dist=action_dist, ) return actions def get_loss( self, forward_pass: ForwardPass, actions: PolicyHeadOutput, rewards: ContinualRLSetting.Rewards, ) -> Loss: action_dist: Categorical = actions.action_dist rewards = rewards.to(device=actions.device) env_reward = torch.as_tensor(rewards.y, device=actions.device) observations: ContinualRLSetting.Observations = forward_pass.observations done = observations.done assert done is not None, "Need the end-of-episode signal!" done = torch.as_tensor(done, device=actions.device) assert self._current_state is not None if self._previous_state is None: # Only allow this once! assert self._step == 0 self._previous_state = self._current_state self._step += 1 # TODO: Need to detach something here, right? advantage: Tensor = ( env_reward + (~done) * self.hparams.gamma * self.critic(self._current_state) - self.critic(self._previous_state) # detach previous representations? ) total_loss = Loss(self.name) if self.training: self.optimizer_critic.zero_grad() critic_loss_tensor = (advantage**2).mean() critic_loss = Loss("critic", loss=critic_loss_tensor) if self.training: critic_loss_tensor.backward() self.optimizer_critic.step() total_loss += critic_loss.detach() if self.training: self.optimizer.zero_grad() actor_loss_tensor = -action_dist.log_prob(actions.action) * advantage.detach() actor_loss_tensor = actor_loss_tensor.mean() actor_loss = Loss("actor", loss=actor_loss_tensor) if self.training: actor_loss_tensor.backward() self.optimizer.step() total_loss += actor_loss.detach() return total_loss def concat_obs_and_action(observation_action: Tuple[Tensor, Tensor]) -> Tensor: observation, action = observation_action batch_size = observation.shape[0] observation = observation.reshape([batch_size, -1]) action = action.reshape([batch_size, -1]) return torch.cat([observation, action], dim=-1) ================================================ FILE: sequoia/methods/models/output_heads/rl/episodic_a2c.py ================================================ """ TODO: IDEA: Similar to ActorCriticHead, but episodic, i.e. only gives a Loss at the end of the episode, rather than at each step. """ from dataclasses import dataclass from typing import ClassVar, Deque, List, Optional import numpy as np import torch from gym import spaces from torch import Tensor, nn from torch.nn import functional as F from sequoia.common import Loss from sequoia.common.hparams import categorical, uniform from sequoia.common.metrics.rl_metrics import EpisodeMetrics from sequoia.settings import ContinualRLSetting from sequoia.settings.base import Rewards from sequoia.utils import get_logger from .policy_head import PolicyHead, PolicyHeadOutput, normalize logger = get_logger(__name__) @dataclass(frozen=True) class A2CHeadOutput(PolicyHeadOutput): """Output produced by the A2C output head.""" # The value estimate coming from the critic. value: Tensor class EpisodicA2C(PolicyHead): """Advantage-Actor-Critic output head that produces a loss only at end of episode. TODO: This could actually produce a loss every N steps, rather than just at the end of the episode. """ name: ClassVar[str] = "episodic_a2c" @dataclass class HParams(PolicyHead.HParams): """Hyper-parameters of the episodic A2C output head.""" # Wether to normalize the advantages for each episode. normalize_advantages: bool = categorical(True, False, default=False) actor_loss_coef: float = uniform(0.1, 1, default=0.5) critic_loss_coef: float = uniform(0.1, 1, default=0.5) entropy_loss_coef: float = uniform(0, 1, default=0.1) # Maximum norm of the policy gradient. max_policy_grad_norm: Optional[float] = None # The discount factor. gamma: float = uniform(0.9, 0.999, default=0.99) def __init__( self, input_space: spaces.Box, action_space: spaces.Discrete, reward_space: spaces.Box, hparams: HParams = None, name: str = "episodic_a2c", ): super().__init__( input_space=input_space, action_space=action_space, reward_space=reward_space, hparams=hparams, name=name, ) self.hparams: EpisodicA2C.HParams # Critic takes in state-action pairs? or just state? self.critic_input_dims = self.input_size # self.critic_input_dims = self.input_size + action_dims self.critic_output_dims = 1 self.critic = self.make_dense_network( in_features=self.critic_input_dims, hidden_neurons=self.hparams.hidden_neurons, out_features=self.critic_output_dims, activation=self.hparams.activation, ) self.actions: List[Deque[A2CHeadOutput]] self._current_state: Optional[Tensor] = None self._previous_state: Optional[Tensor] = None self._step = 0 @property def actor(self) -> nn.Module: return self.dense def forward( self, observations: ContinualRLSetting.Observations, representations: Tensor ) -> A2CHeadOutput: actions: PolicyHeadOutput = super().forward(observations, representations) # TODO: Shouldn't the critic also take the actor's action as an input? value = self.critic(representations) # We just need to add the value to the actions of the PolicyHead. # This works, because `self.actor` :== `self.dense`, which is what's used by # the PolicyHead. actions = A2CHeadOutput( y_pred=actions.y_pred, logits=actions.logits, action_dist=actions.action_dist, value=value, ) return actions def num_stored_steps(self, env_index: int) -> Optional[int]: """Returns the number of steps stored in the buffer for the given environment index. If there are no buffers for the given env, returns None """ if not self.actions or env_index >= len(self.actions): return None return len(self.actions[env_index]) def get_episode_loss(self, env_index: int, done: bool) -> Optional[Loss]: # IDEA: Actually, now that I think about it, instead of detaching the # tensors, we could instead use the critic's 'value' estimate and get a # loss for that incomplete episode using the tensors in the buffer, # rather than detaching them! if not done: return None # TODO: Add something like a 'num_steps_since_update' for each env? (it # would actually be a num_steps_since_backward) # if self.num_steps_since_update? n_stored_steps = self.num_stored_steps(env_index) if n_stored_steps < 5: # For now, we only give back a loss at the end of the episode. # TODO: Test if giving back a loss at each step or every few steps # would work better! logger.warning( RuntimeWarning( f"Returning None as the episode loss, because only have " f"{n_stored_steps} steps stored for that environment." ) ) return None inputs: Tensor actions: A2CHeadOutput rewards: Rewards inputs, actions, rewards = self.stack_buffers(env_index) logits: Tensor = actions.logits action_log_probs: Tensor = actions.action_log_prob values: Tensor = actions.value assert rewards.y is not None episode_rewards: Tensor = rewards.y # target values are calculated backward # it's super important to handle correctly done states, # for those cases we want our to target to be equal to the reward only episode_length = len(episode_rewards) dones = torch.zeros(episode_length, dtype=torch.bool) dones[-1] = bool(done) returns = self.get_returns(episode_rewards, gamma=self.hparams.gamma).type_as(values) advantages = returns - values # Normalize advantage (not present in the original implementation) if self.hparams.normalize_advantages: advantages = normalize(advantages) # Create the Loss to be returned. loss = Loss(self.name) # Policy gradient loss (actor loss) policy_gradient_loss = -(advantages.detach() * action_log_probs).mean() actor_loss = Loss("actor", policy_gradient_loss) loss += self.hparams.actor_loss_coef * actor_loss # Value loss: Try to get the critic's values close to the actual return, # which means the advantages should be close to zero. value_loss_tensor = F.mse_loss(values, returns.reshape(values.shape)) critic_loss = Loss("critic", value_loss_tensor) loss += self.hparams.critic_loss_coef * critic_loss # Entropy loss, to "favor exploration". entropy_loss_tensor = -actions.action_dist.entropy().mean() entropy_loss = Loss("entropy", entropy_loss_tensor) loss += self.hparams.entropy_loss_coef * entropy_loss if done: episode_rewards_array = episode_rewards.reshape([-1]) loss.metric = EpisodeMetrics( n_samples=1, mean_episode_reward=float(episode_rewards_array.sum()), mean_episode_length=len(episode_rewards_array), ) loss.metrics["gradient_usage"] = self.get_gradient_usage_metrics(env_index) return loss def optimizer_step(self): # Clip grad norm if desired. if self.hparams.max_policy_grad_norm is not None: original_norm: Tensor = torch.nn.utils.clip_grad_norm_( self.actor.parameters(), self.hparams.max_policy_grad_norm, ) self.loss.metrics["policy_gradient_norm"] = original_norm.item() super().optimizer_step() def compute_returns_and_advantage(self, last_values: Tensor, dones: np.ndarray) -> None: """ TODO: Adapting this snippet from SB3's common/buffers.py RolloutBuffer. Post-processing step: compute the returns (sum of discounted rewards) and GAE advantage. Adapted from Stable-Baselines PPO2. Uses Generalized Advantage Estimation (https://arxiv.org/abs/1506.02438) to compute the advantage. To obtain vanilla advantage (A(s) = R - V(S)) where R is the discounted reward with value bootstrap, set ``gae_lambda=1.0`` during initialization. :param last_values: :param dones: """ buffer_size: int = self.buffer_size dones: np.ndarray = self.dones rewards: np.ndarray = self.rewards values: np.ndarray = self.values gamma: float = self.gamma gae_lambda: float = 1.0 # convert to numpy last_values = last_values.clone().cpu().numpy().flatten() advantages = np.zeros_like(rewards) last_gae_lam = 0 for step in reversed(range(buffer_size)): if step == buffer_size - 1: next_non_terminal = 1.0 - dones next_values = last_values else: next_non_terminal = 1.0 - dones[step + 1] next_values = values[step + 1] delta = rewards[step] + gamma * next_values * next_non_terminal - values[step] last_gae_lam = delta + gamma * gae_lambda * next_non_terminal * last_gae_lam self.advantages[step] = last_gae_lam self.returns = self.advantages + self.values ================================================ FILE: sequoia/methods/models/output_heads/rl/episodic_a2c_test.py ================================================ from functools import partial from typing import Callable, Optional, Sequence import gym import numpy as np import pytest import torch from gym import spaces from gym.spaces.utils import flatdim from gym.vector import SyncVectorEnv from gym.vector.utils import batch_space from torch import Tensor, nn from sequoia.common.gym_wrappers import AddDoneToObservation, ConvertToFromTensors, EnvDataset from sequoia.common.loss import Loss from sequoia.conftest import DummyEnvironment from sequoia.methods.models.forward_pass import ForwardPass from sequoia.settings.rl.continual import ContinualRLSetting from .episodic_a2c import EpisodicA2C from .policy_head import PolicyHead class FakeEnvironment(SyncVectorEnv): def __init__( self, env_fn: Callable[[], gym.Env], batch_size: int, new_episode_length: Callable[[int], int], episode_lengths: Sequence[int] = None, ): super().__init__([env_fn for _ in range(batch_size)]) self.new_episode_length = new_episode_length self.batch_size = batch_size self.episode_lengths = np.array( episode_lengths or [new_episode_length(i) for i in range(self.num_envs)] ) self.steps_left_in_episode = self.episode_lengths.copy() reward_space = spaces.Box(*self.reward_range, shape=()) self.single_reward_space = reward_space self.reward_space = batch_space(reward_space, batch_size) def step(self, actions): self.steps_left_in_episode[:] -= 1 # obs, reward, done, info = super().step(actions) obs = self.observation_space.sample() reward = np.ones(self.batch_size) assert not any(self.steps_left_in_episode < 0) done = self.steps_left_in_episode == 0 info = np.array([{} for _ in range(self.batch_size)]) for env_index, env_done in enumerate(done): if env_done: next_episode_length = self.new_episode_length(env_index) self.episode_lengths[env_index] = next_episode_length self.steps_left_in_episode[env_index] = next_episode_length return obs, reward, done, info @pytest.mark.xfail(reason="TODO: Adapt this test for EpisodicA2C (copied form policy_head_test.py)") @pytest.mark.parametrize("batch_size", [1, 2, 5]) def test_with_controllable_episode_lengths(batch_size: int, monkeypatch): """TODO: Test out the EpisodicA2C output head in a very controlled environment, where we know exactly the lengths of each episode. """ env = FakeEnvironment( partial(gym.make, "CartPole-v0"), batch_size=batch_size, episode_lengths=[5, *(10 for _ in range(batch_size - 1))], new_episode_length=lambda env_index: 10, ) env = AddDoneToObservation(env) env = ConvertToFromTensors(env) env = EnvDataset(env) obs_space = env.single_observation_space x_dim = flatdim(obs_space["x"]) # Create some dummy encoder. encoder = nn.Linear(x_dim, x_dim) representation_space = obs_space["x"] output_head = EpisodicA2C( input_space=representation_space, action_space=env.single_action_space, reward_space=env.single_reward_space, hparams=PolicyHead.HParams( max_episode_window_length=100, min_episodes_before_update=1, accumulate_losses_before_backward=False, ), ) # TODO: Simplify the loss function somehow using monkeypatch so we know exactly what # the loss should be at each step. batch_size = env.batch_size obs = env.reset() step_done = np.zeros(batch_size, dtype=np.bool) for step in range(200): x, obs_done = obs # The done from the obs should always be the same as the 'done' from the 'step' function. assert np.array_equal(obs_done, step_done) representations = encoder(x) observations = ContinualRLSetting.Observations( x=x, done=obs_done, ) actions_obj = output_head(observations, representations) actions = actions_obj.y_pred # TODO: kinda useless to wrap a single tensor in an object.. forward_pass = ForwardPass( observations=observations, representations=representations, actions=actions, ) obs, rewards, step_done, info = env.step(actions) rewards_obj = ContinualRLSetting.Rewards(y=rewards) loss = output_head.get_loss( forward_pass=forward_pass, actions=actions_obj, rewards=rewards_obj, ) print(f"Step {step}") print(f"num episodes since update: {output_head.num_episodes_since_update}") print(f"steps left in episode: {env.steps_left_in_episode}") print(f"Loss for that step: {loss}") if any(obs_done): assert loss != 0.0 if step == 5.0: # Env 0 first episode from steps 0 -> 5 assert loss.loss == 5.0 assert loss.metrics["gradient_usage"].used_gradients == 5.0 assert loss.metrics["gradient_usage"].wasted_gradients == 0.0 elif step == 10: # Envs[1:batch_size], first episode, from steps 0 -> 10 # NOTE: At this point, both envs have reached the required number of episodes. # This means that the gradient usage on the next time any env reaches # an end-of-episode will be one less than the total number of items. assert loss.loss == 10.0 * (batch_size - 1) assert loss.metrics["gradient_usage"].used_gradients == 10.0 * (batch_size - 1) assert loss.metrics["gradient_usage"].wasted_gradients == 0.0 elif step == 15: # Env 0 second episode from steps 5 -> 15 assert loss.loss == 10.0 assert loss.metrics["gradient_usage"].used_gradients == 4 assert loss.metrics["gradient_usage"].wasted_gradients == 6 elif step == 20: # Envs[1:batch_size]: second episode, from steps 0 -> 10 # NOTE: At this point, both envs have reached the required number of episodes. # This means that the gradient usage on the next time any env reaches # an end-of-episode will be one less than the total number of items. assert loss.loss == 10.0 * (batch_size - 1) assert loss.metrics["gradient_usage"].used_gradients == 9 * (batch_size - 1) assert loss.metrics["gradient_usage"].wasted_gradients == 1 * (batch_size - 1) elif step == 25: # Env 0 third episode from steps 5 -> 15 assert loss.loss == 10.0 assert loss.metrics["gradient_usage"].used_gradients == 4 assert loss.metrics["gradient_usage"].wasted_gradients == 6 elif step > 0 and step % 10 == 0: # Same pattern as step 20 above assert loss.loss == 10.0 * (batch_size - 1), step assert loss.metrics["gradient_usage"].used_gradients == 9 * (batch_size - 1) assert loss.metrics["gradient_usage"].wasted_gradients == 1 * (batch_size - 1) elif step > 0 and step % 5 == 0: # Same pattern as step 25 above assert loss.loss == 10.0 assert loss.metrics["gradient_usage"].used_gradients == 4 assert loss.metrics["gradient_usage"].wasted_gradients == 6 else: assert loss.loss == 0.0, step @pytest.mark.parametrize( "batch_size", [ 1, 2, 5, ], ) def test_loss_is_nonzero_at_episode_end(batch_size: int): """Test that when stepping through the env, when the episode ends, a non-zero loss is returned by the output head. """ with gym.make("CartPole-v0") as temp_env: temp_env = AddDoneToObservation(temp_env) obs_space = temp_env.observation_space action_space = temp_env.action_space reward_space = getattr( temp_env, "reward_space", spaces.Box(*temp_env.reward_range, shape=()) ) env = gym.vector.make("CartPole-v0", num_envs=batch_size, asynchronous=False) env = AddDoneToObservation(env) env = ConvertToFromTensors(env) env = EnvDataset(env) head = EpisodicA2C( input_space=obs_space["x"], action_space=action_space, reward_space=reward_space, hparams=EpisodicA2C.HParams(accumulate_losses_before_backward=False), ) head.train() env.seed(123) obs = env.reset() # obs = torch.as_tensor(obs, dtype=torch.float32) done = torch.zeros(batch_size, dtype=bool) info = np.array([{} for _ in range(batch_size)]) loss = None non_zero_losses = 0 encoder = nn.Linear(4, 4) encoder.train() for i in range(100): representations = encoder(obs["x"]) observations = ContinualRLSetting.Observations( x=obs["x"], done=done, # info=info, ) head_output = head.forward(observations, representations=representations) actions = head_output.actions.numpy().tolist() # actions = np.zeros(batch_size, dtype=int).tolist() obs, rewards, done, info = env.step(actions) done = torch.as_tensor(done, dtype=bool) rewards = ContinualRLSetting.Rewards(rewards) assert len(info) == batch_size print(f"Step {i}, obs: {obs}, done: {done}, info: {info}") forward_pass = ForwardPass( observations=observations, representations=representations, actions=head_output, ) loss = head.get_loss(forward_pass, actions=head_output, rewards=rewards) print("loss:", loss) assert observations.done is not None for env_index, env_is_done in enumerate(observations.done): if env_is_done: print(f"Episode ended for env {env_index} at step {i}") assert loss.loss != 0.0 non_zero_losses += 1 break else: print(f"No episode ended on step {i}, expecting no loss.") assert loss is None or loss.loss == 0.0 assert non_zero_losses > 0 @pytest.mark.xfail(reason="TODO: Adapt this test for EpisodicA2C (copied form policy_head_test.py)") @pytest.mark.parametrize("batch_size", [1, 2, 5]) def test_loss_is_nonzero_at_episode_end_iterate(batch_size: int): """Test that when *iterating* through the env (active-dataloader style), when the episode ends, a non-zero loss is returned by the output head. """ with gym.make("CartPole-v0") as temp_env: temp_env = AddDoneToObservation(temp_env) obs_space = temp_env.observation_space action_space = temp_env.action_space reward_space = getattr( temp_env, "reward_space", spaces.Box(*temp_env.reward_range, shape=()) ) env = gym.vector.make("CartPole-v0", num_envs=batch_size, asynchronous=False) env = AddDoneToObservation(env) env = ConvertToFromTensors(env) env = EnvDataset(env) head = EpisodicA2C( # observation_space=obs_space, input_space=obs_space["x"], action_space=action_space, reward_space=reward_space, hparams=EpisodicA2C.HParams(accumulate_losses_before_backward=False), ) env.seed(123) non_zero_losses = 0 for i, obs in zip(range(100), env): print(i, obs) x = obs["x"] done = obs[1] representations = x assert isinstance(x, Tensor) assert isinstance(done, Tensor) observations = ContinualRLSetting.Observations( x=x, done=done, # info=info, ) head_output = head.forward(observations, representations=representations) actions = head_output.actions.numpy().tolist() # actions = np.zeros(batch_size, dtype=int).tolist() rewards = env.send(actions) # print(f"Step {i}, obs: {obs}, done: {done}") assert isinstance(representations, Tensor) forward_pass = ForwardPass( observations=observations, representations=representations, actions=head_output, ) rewards = ContinualRLSetting.Rewards(rewards) loss = head.get_loss(forward_pass, actions=head_output, rewards=rewards) print("loss:", loss) for env_index, env_is_done in enumerate(observations.done): if env_is_done: print(f"Episode ended for env {env_index} at step {i}") assert loss.total_loss != 0.0 non_zero_losses += 1 break else: print(f"No episode ended on step {i}, expecting no loss.") assert loss.total_loss == 0.0 assert non_zero_losses > 0 @pytest.mark.xfail(reason="TODO: Adapt this test for EpisodicA2C (copied form policy_head_test.py)") @pytest.mark.xfail(reason="TODO: Fix this test") def test_buffers_are_stacked_correctly(monkeypatch): """TODO: Test that when "de-synced" episodes, when fed to the output head, get passed, re-stacked correctly, to the get_episode_loss function. """ batch_size = 5 starting_values = [i for i in range(batch_size)] targets = [10 for i in range(batch_size)] env = SyncVectorEnv( [ partial(DummyEnvironment, start=start, target=target, max_value=10 * 2) for start, target in zip(starting_values, targets) ] ) obs = env.reset() assert obs.tolist() == list(range(batch_size)) reward_space = spaces.Box(*env.reward_range, shape=()) output_head = PolicyHead( # observation_space=spaces.Tuple([env.observation_space, # spaces.Box(False, True, [batch_size], np.bool)]), input_space=spaces.Box(0, 1, (1,)), action_space=env.single_action_space, reward_space=reward_space, ) # Set the max window length, for testing. output_head.hparams.max_episode_window_length = 100 obs = initial_obs = env.reset() done = np.zeros(batch_size, dtype=bool) obs = torch.from_numpy(obs) done = torch.from_numpy(done) def mock_get_episode_loss( self: PolicyHead, env_index: int, inputs: Tensor, actions: ContinualRLSetting.Observations, rewards: ContinualRLSetting.Rewards, done: bool, ) -> Optional[Loss]: print(f"Environment at index {env_index}, episode ended: {done}") if done: print(f"Full episode: {inputs}") else: print(f"Episode so far: {inputs}") n_observations = len(inputs) assert inputs.flatten().tolist() == (env_index + np.arange(n_observations)).tolist() if done: # Unfortunately, we don't get the final state, because of how # VectorEnv works atm. assert inputs[-1] == targets[env_index] - 1 monkeypatch.setattr(PolicyHead, "get_episode_loss", mock_get_episode_loss) # perform 10 iterations, incrementing each DummyEnvironment's counter at # each step (action of 1). # Therefore, at first, the counters should be [0, 1, 2, ... batch-size-1]. info = [{} for _ in range(batch_size)] for step in range(10): print(f"Step {step}.") # Wrap up the obs to pretend that this is the data coming from a # ContinualRLSetting. observations = ContinualRLSetting.Observations(x=obs, done=done) # , info=info) # We don't use an encoder for testing, so the representations is just x. representations = obs.reshape([batch_size, 1]) assert observations.task_labels is None actions = output_head(observations.float(), representations.float()) # Wrap things up to pretend like the output head is being used in the # BaseModel: forward_pass = ForwardPass( observations=observations, representations=representations, actions=actions, ) action_np = actions.actions_np obs, rewards, done, info = env.step(action_np) obs = torch.from_numpy(obs) rewards = torch.from_numpy(rewards) done = torch.from_numpy(done) rewards = ContinualRLSetting.Rewards(y=rewards) loss = output_head.get_loss(forward_pass, actions=actions, rewards=rewards) # Check the contents of the episode buffers. assert len(output_head.representations) == batch_size for env_index in range(batch_size): # obs_buffer = output_head.observations[env_index] representations_buffer = output_head.representations[env_index] action_buffer = output_head.actions[env_index] reward_buffer = output_head.rewards[env_index] if step >= batch_size: if step + env_index == targets[env_index]: assert len(representations_buffer) == 1 and output_head.done[env_index] == False # if env_index == step - batch_size: continue assert len(representations_buffer) == step + 1 # Check to see that the last entry in the episode buffer for this # environment corresponds to the slice of the most recent # observations/actions/rewards at the index corresponding to this # environment. # observation_tuple = input_buffer[-1] step_action = action_buffer[-1] step_reward = reward_buffer[-1] # assert observation_tuple.x == observations.x[env_index] # assert observation_tuple.task_labels is None # assert observation_tuple.done == observations.done[env_index] # The last element in the buffer should be the slice in the batch # for that environment. assert step_action.y_pred == actions.y_pred[env_index] assert step_reward.y == rewards.y[env_index] if step < batch_size: assert obs.tolist() == (np.arange(batch_size) + step + 1).tolist() # if step >= batch_size: # if step + env_index == targets[env_index]: # assert done # assert False, (obs, rewards, done, info) # loss: Loss = output_head.get_loss(forward_pass, actions=actions, rewards=rewards) ================================================ FILE: sequoia/methods/models/output_heads/rl/policy_head.py ================================================ """ Defines a (hopefully general enough) Output Head class to be used by the BaseMethod when applied on an RL setting. NOTE: The training procedure is fundamentally on-policy atm, i.e. the observation is a single state, not a rollout, and the reward is the immediate reward at the current step. Therefore, what we do here is to first split things up and push the observations/actions/rewards into a per-environment buffer, of max length `self.hparams.max_episode_window_length`. These buffers get cleared when starting a new episode in their corresponding environment. The contents of this buffer are then rearranged and presented to the `get_episode_loss` method in order to get a loss for the given episode. The `get_episode_loss` method is also given the environment index, and is passed a boolean `done` that indicates wether the last items in the sequences it received mark the end of the episode. TODO: My hope is that this will allow us to implement RL methods that need a complete episode in order to give a loss to train with, as well as methods (like A2C, I think) which can give a Loss even when the episode isn't over yet. Also, standard supervised learning could be recovered by setting the maximum length of the 'episode buffer' to 1, and consider all observations as final, i.e., when episode length == 1 """ from collections import deque from dataclasses import dataclass from typing import ClassVar, Deque, List, Optional, Sequence, Tuple, TypeVar, Union import numpy as np import torch from gym import spaces from gym.spaces.utils import flatdim from simple_parsing import list_field from torch import Tensor from sequoia.common import Loss from sequoia.common.metrics.rl_metrics import EpisodeMetrics, GradientUsageMetric from sequoia.methods.models.forward_pass import ForwardPass from sequoia.settings.rl.continual import ContinualRLSetting from sequoia.utils.categorical import Categorical from sequoia.utils.generic_functions import stack from sequoia.utils.logging_utils import get_logger from sequoia.utils.utils import flag from ..classification_head import ClassificationHead, ClassificationOutput logger = get_logger(__name__) T = TypeVar("T") @dataclass(frozen=True) class PolicyHeadOutput(ClassificationOutput): """WIP: Adds the action pdf to ClassificationOutput.""" # The distribution over the actions, either as a single # (batched) distribution or as a list of distributions, one for each # environment in the batch. action_dist: Categorical @property def y_pred_prob(self) -> Tensor: """returns the probabilities for the chosen actions/predictions.""" return self.action_dist.probs(self.y_pred) @property def y_pred_log_prob(self) -> Tensor: """returns the log probabilities for the chosen actions/predictions.""" return self.action_dist.log_prob(self.y_pred) @property def action_log_prob(self) -> Tensor: return self.y_pred_log_prob @property def action_prob(self) -> Tensor: return self.y_pred_log_prob ## NOTE: Since the gym VectorEnvs actually auto-reset the individual ## environments (and also discard the final state, for some weird ## reason), I added a way to save it into the 'info' dict at the key ## 'final_state'. Assuming that the env this output head gets applied ## on adds the info dict to the observations (using the ## AddInfoToObservations wrapper, for instance), then the 'final' ## observation would be stored in the dict for this environment in ## the Observations object, while the 'observation' you get from step ## is the 'initial' observation of the new episode. class PolicyHead(ClassificationHead): """[WIP] Output head for RL settings. Uses the REINFORCE algorithm to calculate its loss. TODOs/issues: - Only currently works with batch_size == 1 - The buffers are common to training/validation/testing atm.. """ name: ClassVar[str] = "policy" @dataclass class HParams(ClassificationHead.HParams): hidden_layers: int = 0 hidden_neurons: List[int] = list_field() # The discount factor for the Return term. gamma: float = 0.99 # The maximum length of the buffer that will hold the most recent # states/actions/rewards of the current episode. max_episode_window_length: int = 1000 # Minumum number of epidodes that need to be completed in each env # before we update the parameters of the output head. min_episodes_before_update: int = 1 # TODO: Add this mechanism, so that this method could work even when # episodes are very long. max_steps_between_updates: Optional[int] = None # NOTE: Here we have two options: # 1- `True`: sum up all the losses and do one larger backward pass, # and have `retrain_graph=False`, or # 2- `False`: Perform multiple little backward passes, one for each # end-of-episode in a single env, w/ `retain_graph=True`. # Option 1 is maybe more performant, as it might only require # unrolling the graph once, but would use more memory to store all the # intermediate graphs. accumulate_losses_before_backward: bool = flag(True) def __init__( self, input_space: spaces.Space, action_space: spaces.Discrete, reward_space: spaces.Box, hparams: "PolicyHead.HParams" = None, name: str = "policy", ): assert isinstance( input_space, spaces.Box ), f"Only support Tensor (box) input space. (got {input_space})." assert isinstance( action_space, spaces.Discrete ), f"Only support discrete action space (got {action_space})." assert isinstance( reward_space, spaces.Box ), f"Reward space should be a Box (scalar rewards) (got {reward_space})." super().__init__( input_space=input_space, action_space=action_space, reward_space=reward_space, hparams=hparams, name=name, ) logger.debug("New Output head with hparams: " + self.hparams.dumps_json(indent="\t")) self.hparams: PolicyHead.HParams # Type hints for the spaces; self.input_space: spaces.Box self.action_space: spaces.Discrete self.reward_space: spaces.Box # List of buffers for each environment that will hold some items. # TODO: Won't use the 'observations' anymore, will only use the # representations from the encoder, so renaming 'representations' to # 'observations' in this case. # (Should probably come up with another name so this isn't ambiguous). # TODO: Perhaps we should register these as buffers so they get # persisted correclty? But then we also need to make sure that the grad # stuff would work the same way.. self.representations: List[Deque[Tensor]] = [] # self.representations: List[deque] = [] self.actions: List[Deque[PolicyHeadOutput]] = [] self.rewards: List[Deque[ContinualRLSetting.Rewards]] = [] # The actual "internal" loss we use for training. self.loss: Loss = Loss(self.name) self.batch_size: int = 0 self.num_episodes_since_update: np.ndarray = np.zeros(1) self.num_steps_in_episode: np.ndarray = np.zeros(1) self._training: bool = True self.device: Optional[Union[str, torch.device]] = None def create_buffers(self): """Creates the buffers to hold the items from each env.""" logger.debug(f"Creating buffers (batch size={self.batch_size})") logger.debug(f"Maximum buffer length: {self.hparams.max_episode_window_length}") self.representations = self._make_buffers() self.actions = self._make_buffers() self.rewards = self._make_buffers() self.num_steps_in_episode = np.zeros(self.batch_size, dtype=int) self.num_episodes_since_update = np.zeros(self.batch_size, dtype=int) def forward( self, observations: ContinualRLSetting.Observations, representations: Tensor ) -> PolicyHeadOutput: """Forward pass of a Policy head. TODO: Do we actually need the observations here? It is here so we have access to the 'done' from the env, but do we really need it here? or would there be another (cleaner) way to do this? """ if len(representations.shape) < 2: # Flatten the representations. representations = representations.reshape([-1, flatdim(self.input_space)]) # Setup the buffers, which will hold the most recent observations, # actions and rewards within the current episode for each environment. if not self.batch_size: self.batch_size = representations.shape[0] self.create_buffers() representations = representations.float() logits = self.dense(representations) # The policy is the distribution over actions given the current state. action_dist = Categorical(logits=logits) sample = action_dist.sample() actions = PolicyHeadOutput( y_pred=sample, logits=logits, action_dist=action_dist, ) return actions T = TypeVar("T") def to(self: T, device: Optional[Union[int, torch.device]] = None, **kwargs) -> T: result = super().to(device=device, **kwargs) if device is not None: result.device = torch.device(device) return result def get_loss( self, forward_pass: ForwardPass, actions: PolicyHeadOutput, rewards: ContinualRLSetting.Rewards, ) -> Loss: """Given the forward pass, the actions produced by this output head and the corresponding rewards for the current step, get a Loss to use for training. TODO: Replace the `forward_pass` argument with just `observations` and `representations` and provide the right (augmented) observations to the aux tasks. (Need to design that part later). NOTE: If an end of episode was reached in a given environment, we always calculate the losses and clear the buffers before adding in the new observation. """ observations: ContinualRLSetting.Observations = forward_pass.observations representations: Tensor = forward_pass.representations assert self.batch_size, "forward() should have been called before this." if not self.hparams.accumulate_losses_before_backward: # Reset the loss for the current step, if we're not accumulating it. self.loss = Loss(self.name) observations = forward_pass.observations representations = forward_pass.representations assert observations.done is not None, "need the end-of-episode signal" # Calculate the loss for each environment. for env_index, done in enumerate(observations.done): env_loss = self.get_episode_loss(env_index, done=done) if env_loss is not None: self.loss += env_loss if done: # End of episode reached in that env! if self.training: # BUG: This seems to be failing, during testing: # assert env_loss is not None, (self.name) pass self.on_episode_end(env_index) if self.batch_size != forward_pass.batch_size: raise NotImplementedError( "TODO: The batch size changed, because the batch contains different " "tasks. The BaseModel isn't yet applicable in the setup where " "there are multiple different tasks in the same batch in RL. " ) # IDEA: Need to get access to the 'original' env indices (before slicing), # so that even when one more environment is in this task, the other # environment's buffers remain at the same index.. Something like a # remapping of env indices? assert len(representations.shape) == 2, ( f"Need batched representations, with a shape [16, 128] or similar, but " f"representations have shape {representations.shape}." ) self.batch_size = representations.shape[0] self.create_buffers() for env_index in range(self.batch_size): # Take a slice across the first dimension # env_observations = get_slice(observations, env_index) env_representations = representations[env_index] env_actions = actions.slice(env_index) # env_actions = actions[env_index, ...] # TODO: Is this nicer? env_rewards = rewards.slice(env_index) # BUG: Seems to be some issue of things in the buffers not all being on the # same device # assert self.device is not None # # TODO: Should we be storing these tensors in GPU memory though? Not sure if # # this makes sense. # env_representations = move(env_representations, device=self.device) # env_actions = move(env_actions, device=self.device) # env_rewards = move(env_rewards, device=self.device) self.representations[env_index].append(env_representations) self.actions[env_index].append(env_actions) self.rewards[env_index].append(env_rewards) self.num_steps_in_episode += 1 # TODO: # If we want to accumulate the losses before backward, then we just return self.loss # If we DONT want to accumulate the losses before backward, then we do the # 'small' backward pass, and return a detached loss. if self.hparams.accumulate_losses_before_backward: if all(self.num_episodes_since_update >= self.hparams.min_episodes_before_update): # Every environment has seen the required number of episodes. # We return the accumulated loss, so that the model can do the backward # pass and update the weights. returned_loss = self.loss self.loss = Loss(self.name) self.detach_all_buffers() self.num_episodes_since_update[:] = 0 return returned_loss return Loss(self.name) # Perform the backward pass as soon as a loss is available (with # retain_graph=True). if all(self.num_episodes_since_update >= self.hparams.min_episodes_before_update): # Every environment has seen the required number of episodes. # We return the loss for this step, with gradients, to indicate to the # Model that it can perform the backward pass and update the weights. returned_loss = self.loss self.loss = Loss(self.name) self.detach_all_buffers() self.num_episodes_since_update[:] = 0 return returned_loss if self.loss.requires_grad: # Not all environments are done, but we have a Loss from one of them. self.loss.backward(retain_graph=True) # self.loss will be reset at each step in the `forward` method above. return self.loss.detach() # TODO: Why is self.loss non-zero here? if self.loss.loss != 0.0: # BUG: This is a weird edge-case, where at least one env produced # a loss, but that loss doesn't require grad. # This should only happen if the model isn't in training mode, for # instance. # assert not self.training, self.loss # return self.loss pass return self.loss def on_episode_end(self, env_index: int) -> None: self.num_episodes_since_update[env_index] += 1 self.num_steps_in_episode[env_index] = 0 self.clear_buffers(env_index) def get_episode_loss(self, env_index: int, done: bool) -> Optional[Loss]: """Calculate a loss to train with, given the last (up to max_episode_window_length) observations/actions/rewards of the current episode in the environment at the given index in the batch. If `done` is True, then this is for the end of an episode. If `done` is False, the episode is still underway. NOTE: While the Batch Observations/Actions/Rewards objects usually contain the "batches" of data coming from the N different environments, now they are actually a sequence of items coming from this single environment. For more info on how this is done, see the """ inputs: Tensor actions: PolicyHeadOutput rewards: ContinualRLSetting.Rewards if not done: # This particular algorithm (REINFORCE) can't give a loss until the # end of the episode is reached. return None if len(self.actions[env_index]) == 0: logger.error( f"Weird, asked to get episode loss, but there is " f"nothing in the buffer?" ) return None inputs, actions, rewards = self.stack_buffers(env_index) episode_length = actions.batch_size assert len(inputs) == len(actions.y_pred) == len(rewards.y) if episode_length <= 1: # TODO: If the episode has len of 1, we can't really get a loss! logger.error("Episode is too short!") return None log_probabilities = actions.y_pred_log_prob rewards = rewards.y loss_tensor = self.policy_gradient( rewards=rewards, log_probs=log_probabilities, gamma=self.hparams.gamma, ) loss = Loss(self.name, loss_tensor) loss.metric = EpisodeMetrics( n_samples=1, mean_episode_reward=float(rewards.sum()), mean_episode_length=len(rewards), ) # TODO: add something like `add_metric(self, metric: Metrics, name: str=None)` # to `Loss`. loss.metrics["gradient_usage"] = self.get_gradient_usage_metrics(env_index) return loss def get_gradient_usage_metrics(self, env_index: int) -> GradientUsageMetric: """Returns a Metrics object that describes how many of the actions from an episode that are used to calculate a loss still have their graphs, versus ones that don't have them (due to being created before the last model update, and therefore having been detached.) Does this by inspecting the contents of `self.actions[env_index]`. """ episode_actions = self.actions[env_index] n_stored_items = len(self.actions[env_index]) n_items_with_grad = sum(v.logits.requires_grad for v in episode_actions) n_items_without_grad = n_stored_items - n_items_with_grad return GradientUsageMetric( used_gradients=n_items_with_grad, wasted_gradients=n_items_without_grad, ) @staticmethod def get_returns(rewards: Union[Tensor, List[Tensor]], gamma: float) -> Tensor: """Calculates the returns, as the sum of discounted future rewards at each step. """ return discounted_sum_of_future_rewards(rewards, gamma=gamma) @staticmethod def policy_gradient( rewards: List[float], log_probs: Union[Tensor, List[Tensor]], gamma: float = 0.95 ): """Implementation of the REINFORCE algorithm. Adapted from https://medium.com/@thechrisyoon/deriving-policy-gradients-and-implementing-reinforce-f887949bd63 Parameters ---------- - episode_rewards : List[Tensor] The rewards at each step in an episode - episode_log_probs : List[Tensor] The log probabilities associated with the actions that were taken at each step. Returns ------- Tensor The "vanilla policy gradient" / REINFORCE gradient resulting from that episode. """ return vanilla_policy_gradient(rewards, log_probs, gamma=gamma) @property def training(self) -> bool: return self._training @training.setter def training(self, value: bool) -> None: # logger.debug(f"setting training to {value} on the Policy output head") if hasattr(self, "_training") and value != self._training: before = "train" if self._training else "test" after = "train" if value else "test" logger.debug( f"Clearing buffers, since we're transitioning between from {before}->{after}" ) self.clear_all_buffers() self.batch_size = None self.num_episodes_since_update[:] = 0 self._training = value def clear_all_buffers(self) -> None: if self.batch_size is None: assert not self.rewards assert not self.representations assert not self.actions return for env_id in range(self.batch_size): self.clear_buffers(env_id) self.rewards.clear() self.representations.clear() self.actions.clear() self.batch_size = None def clear_buffers(self, env_index: int) -> None: """Clear the buffers associated with the environment at env_index.""" self.representations[env_index].clear() self.actions[env_index].clear() self.rewards[env_index].clear() def detach_all_buffers(self): if not self.batch_size: assert not self.actions # No buffers to detach! return for env_index in range(self.batch_size): self.detach_buffers(env_index) def detach_buffers(self, env_index: int) -> None: """Detach all the tensors in the buffers for a given environment. We have to do this when we update the model while an episode in one of the enviroment isn't done. """ # detached_representations = map(detach, ) # detached_actions = map(detach, self.actions[env_index]) # detached_rewards = map(detach, self.rewards[env_index]) self.representations[env_index] = self._detach_buffer(self.representations[env_index]) self.actions[env_index] = self._detach_buffer(self.actions[env_index]) self.rewards[env_index] = self._detach_buffer(self.rewards[env_index]) # assert False, (self.representations[0], self.representations[-1]) def _detach_buffer(self, old_buffer: Sequence[Tensor]) -> deque: new_items = self._make_buffer() for item in old_buffer: detached = item.detach() new_items.append(detached) return new_items def _make_buffer(self, elements: Sequence[T] = None) -> Deque[T]: buffer: Deque[T] = deque(maxlen=self.hparams.max_episode_window_length) if elements: buffer.extend(elements) return buffer def _make_buffers(self) -> List[deque]: return [self._make_buffer() for _ in range(self.batch_size)] def stack_buffers(self, env_index: int): """Stack the observations/actions/rewards for this env and return them.""" # episode_observations = tuple(self.observations[env_index]) episode_representations = tuple(self.representations[env_index]) episode_actions = tuple(self.actions[env_index]) episode_rewards = tuple(self.rewards[env_index]) assert len(episode_representations) assert len(episode_actions) assert len(episode_rewards) # BUG: Need to make sure that all tensors are on the same device: # assert self.device is not None # episode_representations = [ # move(item, device=self.device) for item in episode_representations # ] # episode_actions = [ # move(item, device=self.device) for item in episode_actions # ] # episode_rewards = [ # move(item, device=self.device) for item in episode_rewards # ] stacked_inputs = stack(episode_representations) stacked_actions = stack(episode_actions) stacked_rewards = stack(episode_rewards) return stacked_inputs, stacked_actions, stacked_rewards def discounted_sum_of_future_rewards(rewards: Union[Tensor, List[Tensor]], gamma: float) -> Tensor: """Calculates the returns, as the sum of discounted future rewards at each step. """ T = len(rewards) if not isinstance(rewards, Tensor): rewards = torch.as_tensor(rewards) # Construct a reward matrix, with previous rewards masked out (with each # row as a step along the trajectory). reward_matrix = rewards.expand([T, T]).triu() # Get the gamma matrix (upper triangular), see make_gamma_matrix for # more info. gamma_matrix = make_gamma_matrix(gamma, T, device=reward_matrix.device) # Multiplying by the gamma coefficients gives the discounted rewards. discounted_rewards = reward_matrix * gamma_matrix # Summing up over time gives the return at each step. return discounted_rewards.sum(-1) def vanilla_policy_gradient( rewards: Sequence[float], log_probs: Union[Tensor, List[Tensor]], gamma: float = 0.95 ): """Implementation of the REINFORCE algorithm. Adapted from https://medium.com/@thechrisyoon/deriving-policy-gradients-and-implementing-reinforce-f887949bd63 Parameters ---------- - episode_rewards : Sequence[float] The rewards at each step in an episode - episode_log_probs : List[Tensor] The log probabilities associated with the actions that were taken at each step. Returns ------- Tensor The "vanilla policy gradient" / REINFORCE gradient resulting from that episode. """ if isinstance(log_probs, Tensor): action_log_probs = log_probs else: action_log_probs = torch.stack(log_probs) reward_tensor = torch.as_tensor(rewards).type_as(action_log_probs) returns = PolicyHead.get_returns(reward_tensor, gamma=gamma) # Need both tensors to be 1-dimensional for the dot-product below. action_log_probs = action_log_probs.reshape(returns.shape) policy_gradient = -action_log_probs.dot(returns) return policy_gradient # @torch.jit.script # @lru_cache() def make_gamma_matrix(gamma: float, T: int, device=None) -> Tensor: """ Create an upper-triangular matrix [T, T] with the gamma factors, starting at 1.0 on the diagonal, and decreasing exponentially towards the right. """ gamma_matrix = torch.empty([T, T]).triu_() # Neat indexing trick to fill up the upper triangle of the matrix: rows, cols = torch.triu_indices(T, T) # Precompute all the powers of gamma in range [0, T] all_gammas = gamma ** torch.arange(T) # Put the right value at each entry in the upper triangular matrix. gamma_matrix[rows, cols] = all_gammas[cols - rows] return gamma_matrix.to(device) if device else gamma_matrix def normalize(x: Tensor): return (x - x.mean()) / (x.std() + 1e-9) T = TypeVar("T") def tuple_of_lists(list_of_tuples: List[Tuple[T, ...]]) -> Tuple[List[T], ...]: return tuple(map(list, zip(*list_of_tuples))) def list_of_tuples(tuple_of_lists: Tuple[List[T], ...]) -> List[Tuple[T, ...]]: return list(zip(*tuple_of_lists)) ================================================ FILE: sequoia/methods/models/output_heads/rl/policy_head_test.py ================================================ from functools import partial from typing import Callable, Optional, Sequence import gym import numpy as np import pytest import torch from gym import spaces from gym.spaces.utils import flatdim from gym.vector import SyncVectorEnv from gym.vector.utils import batch_space from torch import Tensor, nn from sequoia.common.gym_wrappers import ( AddDoneToObservation, ConvertToFromTensors, EnvDataset, PixelObservationWrapper, ) from sequoia.common.loss import Loss from sequoia.conftest import DummyEnvironment from sequoia.methods.models.forward_pass import ForwardPass from sequoia.settings.rl.continual import ContinualRLSetting from sequoia.settings.rl.continual.make_env import make_batched_env from .policy_head import PolicyHead class FakeEnvironment(SyncVectorEnv): def __init__( self, env_fn: Callable[[], gym.Env], batch_size: int, new_episode_length: Callable[[int], int], episode_lengths: Sequence[int] = None, ): super().__init__([env_fn for _ in range(batch_size)]) self.new_episode_length = new_episode_length self.batch_size = batch_size self.episode_lengths = np.array( episode_lengths or [new_episode_length(i) for i in range(self.num_envs)] ) self.steps_left_in_episode = self.episode_lengths.copy() reward_space = spaces.Box(*self.reward_range, shape=()) self.single_reward_space = reward_space self.reward_space = batch_space(reward_space, batch_size) def step(self, actions): self.steps_left_in_episode[:] -= 1 # obs, reward, done, info = super().step(actions) obs = self.observation_space.sample() reward = np.ones(self.batch_size) assert not any(self.steps_left_in_episode < 0) done = self.steps_left_in_episode == 0 info = np.array([{} for _ in range(self.batch_size)]) for env_index, env_done in enumerate(done): if env_done: next_episode_length = self.new_episode_length(env_index) self.episode_lengths[env_index] = next_episode_length self.steps_left_in_episode[env_index] = next_episode_length return obs, reward, done, info @pytest.mark.parametrize("batch_size", [2, 5]) def test_with_controllable_episode_lengths(batch_size: int, monkeypatch): """TODO: Test out the PolicyHead in a very controlled environment, where we know exactly the lengths of each episode. """ env = FakeEnvironment( partial(gym.make, "CartPole-v0"), batch_size=batch_size, episode_lengths=[5, *(10 for _ in range(batch_size - 1))], new_episode_length=lambda env_index: 10, ) env = AddDoneToObservation(env) env = ConvertToFromTensors(env) env = EnvDataset(env) obs_space = env.single_observation_space x_dim = flatdim(obs_space["x"]) # Create some dummy encoder. encoder = nn.Linear(x_dim, x_dim) representation_space = obs_space["x"] output_head = PolicyHead( input_space=representation_space, action_space=env.single_action_space, reward_space=env.single_reward_space, hparams=PolicyHead.HParams( max_episode_window_length=100, min_episodes_before_update=1, accumulate_losses_before_backward=False, ), ) # TODO: Simulating as if the output head were attached to a BaseModel. PolicyHead.base_model_optimizer = torch.optim.Adam(output_head.parameters(), lr=1e-3) # Simplify the loss function so we know exactly what the loss should be at # each step. def mock_policy_gradient( rewards: Sequence[float], log_probs: Sequence[float], gamma: float = 0.95 ) -> Optional[Loss]: log_probs = (log_probs - log_probs.clone()) + 1 # Return the length of the episode, but with a "gradient" flowing back into log_probs. return len(rewards) * log_probs.mean() monkeypatch.setattr(output_head, "policy_gradient", mock_policy_gradient) batch_size = env.batch_size obs = env.reset() step_done = np.zeros(batch_size, dtype=np.bool) for step in range(200): x, obs_done = obs["x"], obs["done"] # The done from the obs should always be the same as the 'done' from the 'step' function. assert np.array_equal(obs_done, step_done) representations = encoder(x) observations = ContinualRLSetting.Observations( x=x, done=obs_done, ) actions_obj = output_head(observations, representations) actions = actions_obj.y_pred # TODO: kinda useless to wrap a single tensor in an object.. forward_pass = ForwardPass( observations=observations, representations=representations, actions=actions, ) obs, rewards, step_done, info = env.step(actions) rewards_obj = ContinualRLSetting.Rewards(y=rewards) loss = output_head.get_loss( forward_pass=forward_pass, actions=actions_obj, rewards=rewards_obj, ) print(f"Step {step}") print(f"num episodes since update: {output_head.num_episodes_since_update}") print(f"steps left in episode: {env.steps_left_in_episode}") print(f"Loss for that step: {loss}") if any(obs_done): assert loss != 0.0 if step == 5.0: # Env 0 first episode from steps 0 -> 5 assert loss.loss == 5.0 assert loss.metrics["gradient_usage"].used_gradients == 5.0 assert loss.metrics["gradient_usage"].wasted_gradients == 0.0 elif step == 10: # Envs[1:batch_size], first episode, from steps 0 -> 10 # NOTE: At this point, both envs have reached the required number of episodes. # This means that the gradient usage on the next time any env reaches # an end-of-episode will be one less than the total number of items. assert loss.loss == 10.0 * (batch_size - 1) assert loss.metrics["gradient_usage"].used_gradients == 10.0 * (batch_size - 1) assert loss.metrics["gradient_usage"].wasted_gradients == 0.0 elif step == 15: # Env 0 second episode from steps 5 -> 15 assert loss.loss == 10.0 assert loss.metrics["gradient_usage"].used_gradients == 4 assert loss.metrics["gradient_usage"].wasted_gradients == 6 elif step == 20: # Envs[1:batch_size]: second episode, from steps 0 -> 10 # NOTE: At this point, both envs have reached the required number of episodes. # This means that the gradient usage on the next time any env reaches # an end-of-episode will be one less than the total number of items. assert loss.loss == 10.0 * (batch_size - 1) assert loss.metrics["gradient_usage"].used_gradients == 9 * (batch_size - 1) assert loss.metrics["gradient_usage"].wasted_gradients == 1 * (batch_size - 1) elif step == 25: # Env 0 third episode from steps 5 -> 15 assert loss.loss == 10.0 assert loss.metrics["gradient_usage"].used_gradients == 4 assert loss.metrics["gradient_usage"].wasted_gradients == 6 elif step > 0 and step % 10 == 0: # Same pattern as step 20 above assert loss.loss == 10.0 * (batch_size - 1), step assert loss.metrics["gradient_usage"].used_gradients == 9 * (batch_size - 1) assert loss.metrics["gradient_usage"].wasted_gradients == 1 * (batch_size - 1) elif step > 0 and step % 5 == 0: # Same pattern as step 25 above assert loss.loss == 10.0 assert loss.metrics["gradient_usage"].used_gradients == 4 assert loss.metrics["gradient_usage"].wasted_gradients == 6 else: assert loss.loss == 0.0, step @pytest.mark.parametrize("batch_size", [1, 2, 5]) def test_loss_is_nonzero_at_episode_end(batch_size: int): """Test that when stepping through the env, when the episode ends, a non-zero loss is returned by the output head. """ with gym.make("CartPole-v0") as temp_env: temp_env = AddDoneToObservation(temp_env) obs_space = temp_env.observation_space action_space = temp_env.action_space reward_space = getattr( temp_env, "reward_space", spaces.Box(*temp_env.reward_range, shape=()) ) env = gym.vector.make("CartPole-v0", num_envs=batch_size, asynchronous=False) env = AddDoneToObservation(env) env = ConvertToFromTensors(env) env = EnvDataset(env) head = PolicyHead( input_space=obs_space.x, action_space=action_space, reward_space=reward_space, hparams=PolicyHead.HParams(accumulate_losses_before_backward=False), ) # TODO: Simulating as if the output head were attached to a BaseModel. PolicyHead.base_model_optimizer = torch.optim.Adam(head.parameters(), lr=1e-3) head.train() env.seed(123) obs = env.reset() # obs = torch.as_tensor(obs, dtype=torch.float32) done = torch.zeros(batch_size, dtype=bool) info = np.array([{} for _ in range(batch_size)]) loss = None non_zero_losses = 0 encoder = nn.Linear(4, 4) encoder.train() for i in range(100): representations = encoder(obs["x"]) observations = ContinualRLSetting.Observations( x=obs["x"], done=done, # info=info, ) head_output = head.forward(observations, representations=representations) actions = head_output.actions.numpy().tolist() # actions = np.zeros(batch_size, dtype=int).tolist() obs, rewards, done, info = env.step(actions) done = torch.as_tensor(done, dtype=bool) rewards = ContinualRLSetting.Rewards(rewards) assert len(info) == batch_size print(f"Step {i}, obs: {obs}, done: {done}, info: {info}") forward_pass = ForwardPass( observations=observations, representations=representations, actions=head_output, ) loss = head.get_loss(forward_pass, actions=head_output, rewards=rewards) print("loss:", loss) assert observations.done is not None for env_index, env_is_done in enumerate(observations.done): if env_is_done: print(f"Episode ended for env {env_index} at step {i}") assert loss.loss != 0.0 non_zero_losses += 1 break else: print(f"No episode ended on step {i}, expecting no loss.") assert loss is None or loss.loss == 0.0 assert non_zero_losses > 0 @pytest.mark.parametrize("batch_size", [1, 2, 5]) def test_done_is_sometimes_True_when_iterating_through_env(batch_size: int): """Test that when *iterating* through the env, done is sometimes 'True'.""" env = gym.vector.make("CartPole-v0", num_envs=batch_size, asynchronous=True) env = AddDoneToObservation(env) env = ConvertToFromTensors(env) env = EnvDataset(env) for i, obs in zip(range(100), env): print(i, obs) _ = env.send(env.action_space.sample()) if any(obs["done"]): break else: pytest.fail(reason="Never encountered done=True!") @pytest.mark.parametrize("batch_size", [1, 2, 5]) def test_loss_is_nonzero_at_episode_end_iterate(batch_size: int): """Test that when *iterating* through the env (active-dataloader style), when the episode ends, a non-zero loss is returned by the output head. """ with gym.make("CartPole-v0") as temp_env: temp_env = AddDoneToObservation(temp_env) obs_space = temp_env.observation_space action_space = temp_env.action_space reward_space = getattr( temp_env, "reward_space", spaces.Box(*temp_env.reward_range, shape=()) ) env = gym.vector.make("CartPole-v0", num_envs=batch_size, asynchronous=False) env = AddDoneToObservation(env) env = ConvertToFromTensors(env) env = EnvDataset(env) head = PolicyHead( # observation_space=obs_space, input_space=obs_space["x"], action_space=action_space, reward_space=reward_space, hparams=PolicyHead.HParams(accumulate_losses_before_backward=False), ) env.seed(123) non_zero_losses = 0 for i, obs in zip(range(100), env): print(i, obs) x = obs["x"] done = obs["done"] representations = x assert isinstance(x, Tensor) assert isinstance(done, Tensor) observations = ContinualRLSetting.Observations( x=x, done=done, # info=info, ) head_output = head.forward(observations, representations=representations) actions = head_output.actions.numpy().tolist() # actions = np.zeros(batch_size, dtype=int).tolist() rewards = env.send(actions) # print(f"Step {i}, obs: {obs}, done: {done}") assert isinstance(representations, Tensor) forward_pass = ForwardPass( observations=observations, representations=representations, actions=head_output, ) rewards = ContinualRLSetting.Rewards(rewards) loss = head.get_loss(forward_pass, actions=head_output, rewards=rewards) print("loss:", loss) for env_index, env_is_done in enumerate(observations.done): if env_is_done: print(f"Episode ended for env {env_index} at step {i}") assert loss.total_loss != 0.0 non_zero_losses += 1 break else: print(f"No episode ended on step {i}, expecting no loss.") assert loss.total_loss == 0.0 assert non_zero_losses > 0 @pytest.mark.xfail(reason="TODO: Fix this test") def test_buffers_are_stacked_correctly(monkeypatch): """TODO: Test that when "de-synced" episodes, when fed to the output head, get passed, re-stacked correctly, to the get_episode_loss function. """ batch_size = 5 starting_values = [i for i in range(batch_size)] targets = [10 for i in range(batch_size)] env = SyncVectorEnv( [ partial(DummyEnvironment, start=start, target=target, max_value=10 * 2) for start, target in zip(starting_values, targets) ] ) obs = env.reset() assert obs.tolist() == list(range(batch_size)) reward_space = spaces.Box(*env.reward_range, shape=()) output_head = PolicyHead( # observation_space=spaces.Tuple([env.observation_space, # spaces.Box(False, True, [batch_size], np.bool)]), input_space=spaces.Box(0, 1, (1,)), action_space=env.single_action_space, reward_space=reward_space, ) # Set the max window length, for testing. output_head.hparams.max_episode_window_length = 100 obs = env.reset() done = np.zeros(batch_size, dtype=bool) obs = torch.from_numpy(obs) done = torch.from_numpy(done) def mock_get_episode_loss( self: PolicyHead, env_index: int, inputs: Tensor, actions: ContinualRLSetting.Observations, rewards: ContinualRLSetting.Rewards, done: bool, ) -> Optional[Loss]: print(f"Environment at index {env_index}, episode ended: {done}") if done: print(f"Full episode: {inputs}") else: print(f"Episode so far: {inputs}") n_observations = len(inputs) assert inputs.flatten().tolist() == (env_index + np.arange(n_observations)).tolist() if done: # Unfortunately, we don't get the final state, because of how # VectorEnv works atm. assert inputs[-1] == targets[env_index] - 1 monkeypatch.setattr(PolicyHead, "get_episode_loss", mock_get_episode_loss) # perform 10 iterations, incrementing each DummyEnvironment's counter at # each step (action of 1). # Therefore, at first, the counters should be [0, 1, 2, ... batch-size-1]. info = [{} for _ in range(batch_size)] for step in range(10): print(f"Step {step}.") # Wrap up the obs to pretend that this is the data coming from a # ContinualRLSetting. observations = ContinualRLSetting.Observations(x=obs, done=done) # , info=info) # We don't use an encoder for testing, so the representations is just x. representations = obs.reshape([batch_size, 1]) assert observations.task_labels is None actions = output_head(observations.float(), representations.float()) # Wrap things up to pretend like the output head is being used in the # BaseModel: forward_pass = ForwardPass( observations=observations, representations=representations, actions=actions, ) action_np = actions.actions_np obs, rewards, done, info = env.step(action_np) obs = torch.from_numpy(obs) rewards = torch.from_numpy(rewards) done = torch.from_numpy(done) rewards = ContinualRLSetting.Rewards(y=rewards) _ = output_head.get_loss(forward_pass, actions=actions, rewards=rewards) # Check the contents of the episode buffers. assert len(output_head.representations) == batch_size for env_index in range(batch_size): # obs_buffer = output_head.observations[env_index] representations_buffer = output_head.representations[env_index] action_buffer = output_head.actions[env_index] reward_buffer = output_head.rewards[env_index] if step >= batch_size: if step + env_index == targets[env_index]: assert len(representations_buffer) == 1 and not output_head.done[env_index] # if env_index == step - batch_size: continue assert len(representations_buffer) == step + 1 # Check to see that the last entry in the episode buffer for this # environment corresponds to the slice of the most recent # observations/actions/rewards at the index corresponding to this # environment. # observation_tuple = input_buffer[-1] step_action = action_buffer[-1] step_reward = reward_buffer[-1] # assert observation_tuple.x == observations.x[env_index] # assert observation_tuple.task_labels is None # assert observation_tuple.done == observations.done[env_index] # The last element in the buffer should be the slice in the batch # for that environment. assert step_action.y_pred == actions.y_pred[env_index] assert step_reward.y == rewards.y[env_index] if step < batch_size: assert obs.tolist() == (np.arange(batch_size) + step + 1).tolist() # if step >= batch_size: # if step + env_index == targets[env_index]: # assert done # assert False, (obs, rewards, done, info) # loss: Loss = output_head.get_loss(forward_pass, actions=actions, rewards=rewards) @pytest.mark.no_xvfb def test_sanity_check_cartpole_done_vector(): """TODO: Sanity check, make sure that cartpole has done=True at some point when using a BatchedEnv. """ env = make_batched_env("CartPole-v0", batch_size=5, wrappers=[PixelObservationWrapper]) env = AddDoneToObservation(env) obs = env.reset() for i in range(100): obs, rewards, done, info = env.step(env.action_space.sample()) assert all(obs["done"] == done), i if any(done): break else: assert False, "Should have had at least one done=True, over the 100 steps!" ================================================ FILE: sequoia/methods/models/output_heads/rl/wasted_steps_calc.py ================================================ from typing import Callable, List import numpy as np import tqdm as tqdm def get_fraction_of_observations_with_grad( n_envs: int, new_episode_length: Callable[[], int], n_updates: int = 10, min_episodes_before_update: int = 1, ): n_used_steps = 0 n_wasted_steps = 0 # min_episode_length = 0 # max_episode_length = 10 # n_envs = 10 # new_episode_length = lambda: 10 # The starting episode lengths for each env. # new_episode_length = lambda: 10 # episode_lengths = [5, 10] # n_envs = 2 episode_lengths = np.array([new_episode_length() for _ in range(n_envs)]) steps_left_in_episode = episode_lengths.copy() num_finished_episodes = np.zeros(n_envs) for step in tqdm.tqdm(range(n_updates), leave=False): # print(f"Step {step}") steps_since_last_update = np.zeros(n_envs) finished_episodes_since_last_update = np.zeros(n_envs) # Loop over all the envs, until all of them have produced a loss (reached # the end of an episode). while not all(finished_episodes_since_last_update >= min_episodes_before_update): # print(f"Episode lengths: {episode_lengths}") # print(f"Steps left: {steps_left_in_episode}") # print(f"Completed episodes: {num_finished_episodes}") # print(f"Used steps: {n_used_steps}") # print(f"Wasted steps: {n_wasted_steps}") # print(steps_left_in_episode) for env in range(n_envs): if steps_left_in_episode[env] == 0: # Perform the "backward()" for that env. # This will use all steps since the last update (with grads). used = steps_since_last_update[env] n_used_steps += used wasted = episode_lengths[env] - steps_since_last_update # print(f"Step {step}, doing backward for env {env} using {used} steps.") steps_since_last_update[env] = 0 finished_episodes_since_last_update[env] += 1 num_finished_episodes[env] += 1 # Sample the length of the next episode randomly. length_of_next_episode = new_episode_length() steps_left_in_episode[env] = length_of_next_episode else: steps_left_in_episode[env] -= 1 steps_since_last_update[env] += 1 # Perform the "optimizer step" for the model. # This 'wastes' all the prediction tensors (actions) in unfinished episodes # because it would detach them. wasted_per_env = steps_since_last_update n_wasted_steps += int(wasted_per_env.sum()) # print(f"Updating model at step {step}, wasting {wasted_per_env} grads") # exit() # print(f"Ratio of used vs wasted so far: {n_used_steps}/{n_wasted_steps+n_used_steps}") # print(f"n episodes per env: {num_finished_episodes}") total_steps = n_used_steps + n_wasted_steps used_ratio = n_used_steps / total_steps wasted_ratio = n_wasted_steps / total_steps # print(f"Total steps: {total_steps}") # print(f"n_envs: {n_envs}") # print(f"n_updates: {n_updates}") # print(f"Used steps: {n_used_steps} \t{used_ratio:.2%}") # print(f"Wasted steps: {n_wasted_steps} \t{wasted_ratio:.2%}") return n_used_steps, n_wasted_steps if __name__ == "__main__": import matplotlib.pyplot as plt fig: plt.Figure axes: List[plt.Axes] n_updates_per_run: int = 20 fig, axes = plt.subplots(1, 2) import textwrap # x: np.ndarray = np.random.randint(1, 32, size=100) x: np.ndarray = np.arange(63, dtype=int) + 1 min_episodes_before_update = 3 # min_episodes_before_updates = [1, 3, 5] min_episode_length: int = 5 max_episode_length: int = 100 episode_len_dist = f"U[{min_episode_length},{max_episode_length}]" # Normally distributed episode lengths: # episode_length_mean = (max_episode_length + min_episode_length) / 2 episode_length_mean = 50 # episode_length_std = np.sqrt(max_episode_length - episode_length_mean) # episode_len_dist = f"N({episode_length_mean:.1f}, {episode_length_std:.1f})" episode_length_stds = [1.0, 3.0, 5.0, 10.0] episode_len_dist = f"N({episode_length_mean:.1f}, {episode_length_stds})" s = "s" if min_episodes_before_update > 1 else "" fig.suptitle( textwrap.dedent( f"""\ Episode length ~ {episode_len_dist}, Updating model when all envs have finished at least {min_episodes_before_update} episode{s}, {n_updates_per_run} total updates per run. """ ) ) # for min_episodes_before_update in min_episodes_before_updates: for episode_length_std in episode_length_stds: label = f"episode_length_std={episode_length_std:.1f}" # label = f"min_episodes_before_update={min_episodes_before_update}" # new_episode_length = lambda: np.random.randint(min_episode_length, max_episode_length) new_episode_length = lambda: int(np.random.normal(episode_length_mean, episode_length_std)) # x.sort() used_ = [] wasted_ = [] for n_envs in tqdm.tqdm(x, desc="n_envs"): used, wasted = get_fraction_of_observations_with_grad( n_envs=n_envs, new_episode_length=new_episode_length, min_episodes_before_update=min_episodes_before_update, n_updates=n_updates_per_run, ) used_.append(used) wasted_.append(wasted) y_used = np.array(used_) y_wasted = np.array(wasted_) used_ratio = y_used / (y_used + y_wasted) wasted_ratio = 1 - used_ratio axes[0].set_title(f"Percentage of used vs 'wasted' gradients w.r.t. batch size") axes[0].scatter(x, used_ratio, label=label) axes[0].set_ylim(0.0, 1.0) used_per_env = y_used / x / n_updates_per_run axes[1].scatter(x, used_per_env) fig.legend() # xs, ys = x, used_ratio # # zip joins x and y coordinates in pairs # for x_i, y_i in zip(xs, ys): # label = f"({int(x_i)}, {y_i:.2f})" # axes[0].annotate(label, # this is the text # (x_i, y_i), # this is the point to label # textcoords="offset points", # how to position the text # xytext=(0,10), # distance from text to points (x,y) # ha='center') # horizontal alignment can be left, right or center axes[0].set_ylabel("% of used gradients") axes[0].set_xlabel("batch size (number of environments)") axes[1].set_title(f"''Data efficiency'': Average number of used steps per update per env") axes[1].set_xlabel(f"# of environments") axes[1].set_ylabel(f"# of used steps per env") plt.show() ================================================ FILE: sequoia/methods/models/output_heads.puml ================================================ @startuml output_heads package output_heads { package output_head { abstract class OutputHead { + hparams: OutputHead.HParams {abstract} + forward(observations: Observations representations: Tensor): Actions {abstract} + get_loss(ForwardPass, Actions, Rewards) -> Loss } abstract class OutputHead.HParams { + {static} available_activations: ClassVar[Dict[str, Type[nn.Module]]] + hidden_layers: int + hidden_neurons: List[int] + activation: Type[nn.Module] = "tanh" } } package classification { class ClassificationHead implements OutputHead { + forward(Observations representations: Tensor): ClassificationHeadOutput + get_loss(ForwardPass, ClassificationOutput, Rewards): Loss } class ClassificationHead.HParams extends OutputHead.HParams {} class ClassificationHeadOutput extends settings.base.Actions { + y_pred: Tensor + logits: Tensor } } package regression { class RegressionHead implements OutputHead {} } package rl { package policy_head { class PolicyHead extends ClassificationHead { + forward(observations: Observations representations: Tensor): PolicyHeadOutput + hparams: PolicyHead.HParams } class PolicyHead.HParams extends ClassificationHead.HParams { + forward(observations: Observations representations: Tensor): PolicyHeadOutput } class PolicyHeadOutput extends ClassificationHeadOutput { action_dist: Distribution } } package episodic_a2c { class EpisodicA2C extends PolicyHead { + actor: nn.Module + critic: nn.Module + get_episode_loss(Observations, Actions, Rewards, done: bool): Loss } class EpisodicA2C.HParams extends PolicyHead.HParams { + normalize_advantages: bool = False + actor_loss_coef: float = 0.5 + critic_loss_coef: float = 0.5 + entropy_loss_coef: float = 0.1 + max_policy_grad_norm: Optional[float] = None + gamma: float = 0.99 + learning_rate: float = 1e-2 } class A2CHeadOutput extends PolicyHeadOutput { + value: Tensor } } package actor_critic_head { class ActorCriticHead extends ClassificationHead { + hparams: ActorCriticHead.HParams + actor: nn.Module + critic: nn.Module } class ActorCriticHead.HParams extends ClassificationHead.HParams { + gamma: float = 0.95 + learning_rate: float = 1e-3 } } } ' OutputHead *-- OutputHead.HParams ' ClassificationHead *-- ClassificationHead.HParams ' PolicyHead *-- PolicyHead.HParams ' ActorCriticHead *-- ActorCriticHead.HParams ' EpisodicA2C *-- EpisodicA2C.HParams ' OutputHead *-- Actions : outputs ' ClassificationHead *-- ClassificationHeadOutput : outputs ' PolicyHead *-- PolicyHeadOutput : outputs ' EpisodicA2C *-- A2CHeadOutput : outputs } @enduml ================================================ FILE: sequoia/methods/models/simple_convnet.py ================================================ from torch import Tensor, nn class SimpleConvNet(nn.Module): def __init__(self, in_channels: int = 3, n_classes: int = 10): super().__init__() self.features = nn.Sequential( nn.Conv2d(in_channels, 6, kernel_size=5, stride=1, padding=1, bias=False), nn.BatchNorm2d(6), nn.ReLU(inplace=True), nn.Conv2d(6, 16, kernel_size=5, stride=1, padding=1, bias=False), nn.BatchNorm2d(16), nn.ReLU(inplace=True), nn.Conv2d(16, 16, kernel_size=3, stride=1, padding=1, bias=False), nn.BatchNorm2d(16), nn.AdaptiveAvgPool2d(output_size=(8, 8)), # [16, 8, 8] nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=0, bias=False), # [32, 6, 6] nn.BatchNorm2d(32), nn.ReLU(inplace=True), nn.Conv2d(32, 32, kernel_size=3, stride=1, padding=0, bias=False), # [32, 4, 4] nn.BatchNorm2d(32), nn.Flatten(), ) self.fc = nn.Sequential( nn.Flatten(), nn.Linear(512, 120), # NOTE: This '512' is what gets used as the # hidden size of the encoder. nn.ReLU(), nn.Linear(120, 84), nn.ReLU(), nn.Linear(84, n_classes), ) def forward(self, x: Tensor) -> Tensor: return self.fc(self.features(x)) ================================================ FILE: sequoia/methods/models.puml ================================================ @startuml models package models { class ForwardPass extends Batch { + observations: Observations + representations: Tensor + actions: Actions } ' TODO: Idk why, but this doesn't work if placed inside the 'models' package ' above. !include ./models/output_heads.puml !include ./models/base_model.puml } @enduml ================================================ FILE: sequoia/methods/packnet_method.py ================================================ from dataclasses import dataclass from typing import Any, Dict, Iterable, List, Mapping, Optional, Sequence, Tuple, Type, Union import torch from pytorch_lightning import Callback, LightningModule, Trainer from pytorch_lightning.callbacks import EarlyStopping from simple_parsing.helpers import mutable_field from simple_parsing.helpers.hparams import HyperParameters, uniform from torch import Tensor, nn from sequoia.common.config import Config from sequoia.methods.base_method import BaseMethod, BaseModel from sequoia.methods.trainer import TrainerConfig from sequoia.settings import Setting from sequoia.settings.assumptions import IncrementalAssumption as IncrementalSetting from sequoia.settings.sl import IncrementalSLSetting, TaskIncrementalSLSetting class PackNet(Callback, nn.Module): """PyTorch-Lightning Callback that implements the PackNet algorithm for CL. TODO: Add a citation for the PackNet paper. """ @dataclass class HParams(HyperParameters): """Hyper-parameters of the Packnet callback.""" prune_instructions: Union[float, List[float]] = uniform(0.1, 0.9, default=0.5) train_epochs: int = uniform(1, 5, default=1) fine_tune_epochs: int = uniform(0, 5, default=1) def __init__( self, n_tasks: int, hparams: Optional["PackNet.HParams"] = None, prunable_types: Sequence[Type[nn.Module]] = (nn.Conv2d, nn.Linear), ignore_modules: Sequence[str] = None, ignore_parameters: Sequence[str] = ("bias",), ): """Create the PackNet callback. Parameters ---------- n_tasks : int Number of tasks. hparams : PackNet.HParams Configuration options (hyper-parameters) of the PackNet algorithm. prunable_types : Sequence[Type[nn.Module]], optional The types of nn.Modules to consider for pruning. By default, only consideres layers of types `nn.Conv2d` and `nn.Linear`. ignore_modules : Sequence[str], optional List of flags for module names that should be ignored by PackNet. When one of these values is found within the name of a module, it is ignored. Doesn't ignore any modules by default. parameters_to_ignore : List[str], optional List of flags for parameter names that should be ignored by PackNet. When one of these values is found within the name of a parameter, it is ignored. Defaults to ["bias"]. """ super().__init__() hparams = hparams or self.HParams() self.n_tasks = n_tasks self.prune_instructions = hparams.prune_instructions self.prunable_types = prunable_types or [nn.Conv2d, nn.Linear] self.ignore_modules = list(ignore_modules or []) self.ignore_parameters = list(ignore_parameters or []) # Set up an array of quantiles for pruning procedure if n_tasks: self.config_instructions() self.PATH = None self.epoch_split = (hparams.train_epochs, hparams.fine_tune_epochs) self.current_task = 0 # 3-dimensions: task, layer, parameter mask self.masks: List[Dict[str, Tensor]] = [] self.mode: str = None self.params_dict: dict = None def filtered_parameter_iterator(self, module: nn.Module) -> Iterable[Tuple[str, nn.Parameter]]: """Iterator that, given a module, yields tuples with the full name of the parameters that will be modified by the PackNet callback, as well as the parameters themselves. This is used to remove a bit of boilerplate code in the for loops below. Parameters ---------- module : nn.Module The module to iterate over. Returns ------- Iterable[Tuple[str, nn.Parameter]] An Iterator of tuples containing parameter names ('{mod_name}.{param_name}') and parameters. """ for mod_name, mod in module.named_modules(): if not isinstance(mod, self.prunable_types): continue if any(ignored in mod_name for ignored in self.ignore_modules): continue for param_name, param in mod.named_parameters(): if any(ignored in param_name for ignored in self.ignore_parameters): continue param_full_name = f"{mod_name}.{param_name}" yield param_full_name, param @torch.no_grad() def prune(self, model: nn.Module, prune_quantile: float) -> Dict[str, Tensor]: """Create task-specific mask and prune least relevant weights [extended_summary] Parameters ---------- model : nn.Module The model to be pruned. prune_quantile : float The percentage of weights to prune as a decimal. Returns ------- Dict[str, Tensor] The masks to use to prune the layers of the given model. """ # Calculate Quantile all_prunable_tensors: List[Tensor] = [] for param_full_name, param_layer in self.filtered_parameter_iterator(model): # get fixed weights for this layer (on the same device) prev_mask = torch.zeros_like(param_layer, dtype=torch.bool) for task_masks in self.masks: if param_full_name in task_masks: prev_mask |= task_masks[param_full_name] p = param_layer.masked_select(~prev_mask) if p is not None: all_prunable_tensors.append(p) all_parameters_tensor = torch.cat(all_prunable_tensors, -1) cutoff = torch.quantile(torch.abs(all_parameters_tensor), q=prune_quantile) masks = {} # create mask for this task for param_full_name, param_layer in self.filtered_parameter_iterator(model): # get weight mask for this layer # p prev_mask = torch.zeros_like(param_layer, dtype=torch.bool) for task_masks in self.masks: # TODO: check for bug here # if param_full_name in task_masks: prev_mask |= task_masks[param_full_name] curr_mask = torch.abs(param_layer).ge(cutoff) # q curr_mask &= ~prev_mask # (q & ~p) # Zero non masked weights param_layer *= curr_mask | prev_mask masks[param_full_name] = curr_mask return masks def fine_tune_mask(self, model: nn.Module): """ Zero the gradient of pruned weights this task as well as previously fixed weights Apply this mask before each optimizer step during fine-tuning """ assert len(self.masks) > self.current_task for param_full_name, param in self.filtered_parameter_iterator(model): param.grad *= self.masks[self.current_task][param_full_name] def training_mask(self, model: nn.Module): """ Zero the gradient of only fixed weights for previous tasks Apply this mask after .backward() and before optimizer.step() at every batch of training a new task """ if len(self.masks) == 0: return for param_full_name, param in self.filtered_parameter_iterator(model): # get mask of weights from previous tasks prev_mask = torch.zeros_like(param, dtype=torch.bool) for task_masks in self.masks: # FIXME: Get the mask if it exists, otherwise set one and move on. # if param_full_name not in task_masks: # task_masks[param_full_name] = torch.zeros_like(param, dtype=torch.bool) prev_mask |= task_masks[param_full_name] # zero grad of previous fixed weights # param.grad[prev_mask] = 0. # (NOTE: Equivalent) param.grad *= ~prev_mask def fix_biases(self, model: nn.Module): """ Fix the gradient of prunable bias parameters """ for mod_name, mod in model.named_modules(): if not isinstance(mod, self.prunable_types): continue if any(ignore in mod_name for ignore in self.ignore_modules): continue for name, param_layer in mod.named_parameters(): if "bias" in name: param_layer.requires_grad = False def fix_batch_norm(self, model: nn.Module): """ Fix batch norm gain, bias, running mean and variance """ for mod_name, mod in model.named_modules(): if isinstance(mod, nn.BatchNorm2d): mod.affine = False for param_layer in mod.parameters(): param_layer.requires_grad = False def set_params_dict(self, model: nn.Module): """ Set a dictionary containing all prunable parameters useful for fixing all layers, but may be wasted memory """ # TODO: This dict actually doesn't copy the parameters, it saves references. self.params_dict = dict() for param_full_name, param in self.filtered_parameter_iterator(model): self.params_dict[param_full_name] = param def fix_all_layers(self, model: nn.Module): """ Fix grad of all parameters outside of params_dict """ self.set_params_dict(model) # Not necessary for fixed model # Fix grad of all non-prunable layers in this for mod_name, mod in model.named_modules(): for param_name, param_layer in mod.named_parameters(): key = f"{mod_name}.{param_name}" if key not in self.params_dict: param_layer.requires_grad = False @torch.no_grad() def apply_eval_mask(self, model: nn.Module, task_idx: int): """ Revert to final trained network state and apply mask for given task :param model: the model to apply the eval mask to :param task_idx: the task id to be evaluated (0 - > n_tasks) """ assert len(self.masks) > task_idx for param_full_name, param in self.filtered_parameter_iterator(model): # get indices of all weights from previous masks prev_mask = torch.zeros_like(param, dtype=torch.bool) for task_id in range(0, task_idx + 1): prev_mask |= self.masks[task_id][param_full_name] # zero out all weights that are not in the mask for this task # param[prev_mask] = 0. (NOTE: Equivalent) param *= prev_mask def mask_remaining_params(self, model: nn.Module) -> Dict[str, Tensor]: """ Create mask for remaining parameters """ masks = {} for param_full_name, param in self.filtered_parameter_iterator(model): # Get mask of all weights assigned to previous tasks prev_mask = torch.zeros_like(param, dtype=torch.bool) for task_masks in self.masks: prev_mask |= task_masks[param_full_name] # Create mask of remaining parameters layer_mask = ~prev_mask masks[param_full_name] = layer_mask return masks # self.masks.append(mask) def total_epochs(self) -> int: return self.epoch_split[0] + self.epoch_split[1] def config_instructions(self): """ Create pruning instructions for this task split :return: None """ assert self.n_tasks is not None if not isinstance(self.prune_instructions, list): # if a float is passed in assert 0 < self.prune_instructions < 1 self.prune_instructions = [self.prune_instructions] * (self.n_tasks - 1) assert ( len(self.prune_instructions) == self.n_tasks - 1 ), "Must give prune instructions for every task" def save_final_state(self, model, PATH="model_weights.pth"): """ Save the final weights of the model after training :param model: pl_module :param PATH: The path to weights file """ self.PATH = PATH torch.save(model.state_dict(), PATH) def load_final_state(self, model): """ Load the final state of the model """ device = model.device model.load_state_dict(torch.load(self.PATH)) model = model.to(device) def on_init_end(self, trainer: Trainer): self.mode = "train" def on_after_backward(self, trainer: Trainer, pl_module: LightningModule): if self.mode == "train": self.training_mask(pl_module) elif self.mode == "fine_tune": self.fine_tune_mask(pl_module) def on_train_epoch_end(self, trainer: Trainer, pl_module: LightningModule, *args, **kwargs): super().on_train_epoch_end(trainer, pl_module) if pl_module.current_epoch == self.epoch_split[0] - 1: # Train epochs completed self.mode = "fine_tune" new_masks: Dict[str, Tensor] if self.current_task == self.n_tasks - 1: new_masks = self.mask_remaining_params(pl_module) else: new_masks = self.prune( model=pl_module, prune_quantile=self.prune_instructions[self.current_task], ) self.masks.append(new_masks) def on_fit_end(self, trainer: Trainer, pl_module: LightningModule): self.fix_biases(pl_module) # Fix biases after first task self.fix_batch_norm(pl_module) # Fix batch norm mean, var, and params # TODO: This may cause issues with output heads # self.fix_all_layers(pl_module) # Fix all other layers -> may not be necessary? self.save_final_state(pl_module) self.mode = "train" # TODO: Reset this to IncrementalAssumption after the fixes are made to BaseMethod in RL. @dataclass class PackNetMethod(BaseMethod, target_setting=IncrementalSLSetting): # NOTE: these two fields are also used to create the command-line arguments. # HyperParameters of the method. hparams: BaseModel.HParams = mutable_field(BaseModel.HParams) # Configuration options. config: Config = mutable_field(Config) # Options for the Trainer object. trainer_options: TrainerConfig = mutable_field(TrainerConfig) # Hyper-Parameters of the PackNet callback packnet_hparams: PackNet.HParams = mutable_field(PackNet.HParams) def __init__( self, hparams: BaseModel.HParams = None, config: Config = None, trainer_options: TrainerConfig = None, packnet_hparams: PackNet.HParams = None, **kwargs, ): super().__init__(hparams=hparams, config=config, trainer_options=trainer_options) self.packnet_hparams = packnet_hparams or PackNet.HParams() self.p_net: PackNet # This gets set in configure def configure(self, setting: Setting): # NOTE: super().configure creates the Trainer and calls `configure_callbacks()`, # so we have to create `self.p_net` before calling `super().configure`. # Ignore all the modules that are task-specific when the setting gives task ids: # NOTE: Always ignore the `output_heads` dict, as it contains output heads for # each task. # NOTE: `model.output_heads[]` is the same as `model.output_head`. ignored_modules: List[str] = ["output_heads"] if setting.task_labels_at_test_time: # Also ignore the main output_head. ignored_modules.append("output_head") self.p_net = PackNet( n_tasks=setting.nb_tasks, hparams=self.packnet_hparams, ignore_modules=ignored_modules, ) self.p_net.current_task = -1 self.p_net.config_instructions() super().configure(setting) def fit(self, train_env, valid_env): super().fit(train_env=train_env, valid_env=valid_env) def on_task_switch(self, task_id: Optional[int]) -> None: """Called when switching between tasks. Args: task_id (int, optional): the id of the new task. When None, we are basically being informed that there is a task boundary, but without knowing what task we're switching to. """ super().on_task_switch(task_id=task_id) if task_id is not None and len(self.p_net.masks) > task_id: self.p_net.load_final_state(model=self.model) self.p_net.apply_eval_mask(task_idx=task_id, model=self.model) self.p_net.current_task = task_id def configure_callbacks(self, setting: TaskIncrementalSLSetting = None) -> List[Callback]: """Create the PyTorch-Lightning Callbacks for this Setting. These callbacks will get added to the Trainer in `create_trainer`. Parameters ---------- setting : SettingType The `Setting` on which this Method is going to be applied. Returns ------- List[Callback] A List of `Callback` objects to use during training. """ callbacks = super().configure_callbacks(setting=setting) assert self.p_net not in callbacks for i in range(len(callbacks)): if isinstance(callbacks[i], EarlyStopping): callbacks.pop(i) print(callbacks) if not setting.stationary_context: callbacks.append(self.p_net) return callbacks def create_trainer(self, setting) -> Trainer: """Creates a Trainer object from pytorch-lightning for the given setting. Returns: Trainer: the Trainer object. """ self.trainer_options.max_epochs = ( self.packnet_hparams.train_epochs + self.packnet_hparams.fine_tune_epochs ) return super().create_trainer(setting) def adapt_to_new_hparams(self, new_hparams: Dict[str, Any]) -> None: """Adapts the Method when it receives new Hyper-Parameters to try for a new run. It is required that this method be implemented if you want to perform HPO sweeps with Orion. Parameters ---------- new_hparams : Dict[str, Any] The new hyper-parameters being recommended by the HPO algorithm. These will have the same structure as the search space. """ self.hparams = self.hparams.replace(**new_hparams) self.packnet_hparams = self.packnet_hparams.replace(**new_hparams["packnet_hparams"]) def get_search_space(self, setting: Setting) -> Mapping[str, Union[str, Dict]]: """Returns the search space to use for HPO in the given Setting. Parameters ---------- setting : Setting The Setting on which the run of HPO will take place. Returns ------- Mapping[str, Union[str, Dict]] An orion-formatted search space dictionary, mapping from hyper-parameter names (str) to their priors (str), or to nested dicts of the same form. """ hparam_priors: Dict = super().get_search_space(setting=setting) hparam_priors["packnet_hparams"] = self.packnet_hparams.get_orion_space_dict() return hparam_priors ================================================ FILE: sequoia/methods/packnet_method_test.py ================================================ from typing import ClassVar, Type from sequoia.methods.base_method_test import TestBaseMethod as BaseMethodTests from sequoia.methods.packnet_method import PackNetMethod class TestPackNetMethod(BaseMethodTests): Method: ClassVar[Type[PackNetMethod]] = PackNetMethod def validate_results(self, setting, method, results): """Called at the end of each test run to check that the results make sense for the given setting and method. """ super().validate_results(setting, method, results) # TODO: Add checks to make sure that the packnet callback's state makes sense # for the given setting. ================================================ FILE: sequoia/methods/pl_bolts_methods/__init__.py ================================================ """ TODO: Add some of the pytorch lightning bolts models and such as Methods targetting the IID Setting. TODO: Also figure out a way to consider LightningDataModules that aren't Settings as 'IID' settings, so we can get all the methods and models and datamodules from pl_bolts for free. """ ================================================ FILE: sequoia/methods/pl_dqn.py ================================================ # Copyright The PyTorch Lightning team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Deep Reinforcement Learning: Deep Q-network (DQN) The template illustrates using Lightning for Reinforcement Learning. The example builds a basic DQN using the classic CartPole environment. To run the template, just run: `python template/methods/rl/dqn_pl.py` After ~1500 steps, you will see the total_reward hitting the max score of 475+. Open up TensorBoard to see the metrics: `tensorboard --logdir default` References ---------- [1] https://github.com/PacktPublishing/Deep-Reinforcement-Learning-Hands-On- Second-Edition/blob/master/Chapter06/02_dqn_pong.py """ import dataclasses from collections import defaultdict, deque from dataclasses import dataclass from typing import ( Any, Callable, Container, Deque, Generic, Iterator, List, Optional, Sequence, SupportsFloat, SupportsInt, Tuple, Type, TypeVar, Union, ) import gym import numpy as np import pytorch_lightning as pl import simple_parsing import torch import torch.nn as nn import torch.optim as optim import tqdm from gym.spaces import Discrete from sequoia.common.spaces.typed_dict import TypedDictSpace from simple_parsing import ArgumentParser, Serializable from torch import Tensor from torch.nn import functional as F from torch.optim.optimizer import Optimizer from torch.utils.data import DataLoader from torch.utils.data.dataset import IterableDataset class DQN(nn.Module): """Simple MLP network.""" def __init__(self, obs_size: int, n_actions: int, hidden_size: int = 128): """ Args: obs_size: observation/state size of the environment n_actions: number of discrete actions available in the environment hidden_size: size of hidden layers """ super().__init__() self.net = nn.Sequential( nn.Linear(obs_size, hidden_size), nn.ReLU(), nn.Linear(hidden_size, n_actions), ) def forward(self, x: Tensor) -> Tensor: return self.net(torch.as_tensor(x, dtype=torch.float32)) T = TypeVar("T", np.ndarray, Tensor) V = TypeVar("V", np.ndarray, Tensor) @dataclass class Experience(Generic[T]): """Experience for one step.""" state: T action: SupportsInt reward: SupportsFloat done: bool new_state: T @dataclass class ExperienceBatch(Generic[T]): """Experience for more than one step. Note: neighbouring indices can be independant, i.e. this isn't a sequence of actions in an env. """ states: T actions: T rewards: T dones: T new_states: T def __len__(self) -> int: return len(self.dones) def __getitem__(self, index: Union[int, slice]) -> Union[Experience[T], "ExperienceBatch[T]"]: if isinstance(index, int): return Experience( # type: ignore state=self.states[index], action=self.actions[index], reward=self.rewards[index], done=bool(self.dones[index]), new_state=self.new_states[index], ) return ExperienceBatch( states=self.states[index], actions=self.actions[index], rewards=self.rewards[index], dones=self.dones[index], new_states=self.new_states[index], ) @classmethod def stack(cls, items: Sequence["Experience[T]"]) -> "ExperienceBatch[T]": field_names = set(f.name for item in items for f in dataclasses.fields(item)) field_values = defaultdict(list) for item in items: for field_name in field_names: f_value = getattr(item, field_name) field_values[field_name].append(f_value) stack_fn = np.stack if isinstance(items[0].state, np.ndarray) else torch.stack return cls( # type: ignore **{f_name + "s": stack_fn(f_values) for f_name, f_values in field_values.items()} # states=np.concatenate(states), # actions=np.concatenate(actions), # rewards=np.concatenate(rewards, dtype=np.float32), # dones=np.concatenate(dones, dtype=bool), # new_states=np.concatenate(next_states), ) def _map(self, fn: Callable[[T], V]) -> "ExperienceBatch[V]": return type(self)( # type: ignore **{f.name: fn(getattr(self, f.name)) for f in dataclasses.fields(self)} ) def numpy(self) -> "ExperienceBatch[np.ndarray]": def _numpy(v) -> np.ndarray: return v.detach().cpu().numpy() if isinstance(v, Tensor) else np.array(v) return self._map(_numpy) def to(self, device: torch.device = None, **kwargs) -> "ExperienceBatch[Tensor]": return self._map(lambda v: torch.as_tensor(v, device=device, **kwargs)) E = TypeVar("E", bound=Experience) class ReplayBuffer(Generic[T]): """Replay Buffer for storing past experiences allowing the agent to learn from them. >>> buffer = ReplayBuffer(5) """ def __init__(self, capacity: int) -> None: """ Args: capacity: size of the buffer """ self.buffer: Deque[Experience[T]] = deque(maxlen=capacity) def __len__(self) -> int: return len(self.buffer) def append(self, experience: Experience[T]) -> None: """Add experience to the buffer. Args: experience: tuple (state, action, reward, done, new_state) """ self.buffer.append(experience) def sample( self, batch_size: int, ) -> ExperienceBatch[T]: indices = np.random.choice(len(self.buffer), batch_size, replace=False) samples: List[Experience[T]] = [self.buffer[idx] for idx in indices] return ExperienceBatch.stack(samples) class RLDataset(IterableDataset[ExperienceBatch[T]]): """Iterable Dataset containing the buffer which will be updated with new experiences during training. >>> dataset = RLDataset(ReplayBuffer(5)) """ def __init__(self, buffer: ReplayBuffer, sample_size: int = 200) -> None: """ Args: buffer: replay buffer sample_size: number of experiences to sample at a time """ self.buffer = buffer self.sample_size = sample_size def __iter__(self) -> Iterator[Experience[T]]: sampled_experience_batch = self.buffer.sample(self.sample_size) for sampled_experience in sampled_experience_batch: assert isinstance(sampled_experience, Experience), sampled_experience yield sampled_experience class Agent: """Base Agent class handling the interaction with the environment. ```python env = gym.make("CartPole-v1") buffer = ReplayBuffer(10) agent = Agent(env, buffer) ``` """ def __init__(self, env: gym.Env, replay_buffer: ReplayBuffer) -> None: """ Args: env: training environment replay_buffer: replay buffer storing experiences """ self.env = env self.replay_buffer = replay_buffer self.reset() self.state = self.env.reset() def reset(self) -> None: """Resets the environment and updates the state.""" self.state = self.env.reset() def get_action(self, state: Tensor, net: nn.Module, epsilon: float) -> int: """Using the given network, decide what action to carry out using an epsilon-greedy policy. Args: net: DQN network epsilon: value to determine likelihood of taking a random action device: current device Returns: action """ if np.random.random() < epsilon: action = self.env.action_space.sample() else: q_values = net(state) _, action = torch.max(q_values, dim=-1) # TODO: Adapt this for batched actions. action = int(action.item()) return action @torch.no_grad() def play_step( self, net: nn.Module, epsilon: float = 0.0, device: Union[str, torch.device] = "cpu", ) -> Tuple[float, bool]: """Carries out a single interaction step between the agent and the environment. Args: net: DQN network epsilon: value to determine likelihood of taking a random action device: current device Returns: reward, done """ state = torch.as_tensor([self.state], device=torch.device(device)) action = self.get_action(state=state, net=net, epsilon=epsilon) # do step in the environment new_state, reward, done, _ = self.env.step(action) exp = Experience( state=self.state, action=action, reward=reward, done=done, new_state=new_state, ) self.replay_buffer.append(exp) self.state = new_state if done: self.state = self.env.reset() return reward, done class DQNLightning(pl.LightningModule): """Basic DQN Model. ```python DQNLightning(env="CartPole-v1") ``` """ @dataclass class HParams(Serializable): # Size of the batches. batch_size: int = 16 # learning rate. lr: float = 1e-2 # Discount factor. gamma: float = 0.99 # Interval at which we update the target network. sync_rate: int = 10 # Capacity of the replay buffer. replay_size: int = 1000 # How many samples do we use to fill our buffer at the start of training. warm_start_steps: int = 1000 # The frame at which epsilon should stop decaying. eps_last_frame: int = 1000 # Starting value of epsilon. eps_start: float = 1.0 # Final value of epsilon eps_end: float = 0.01 # Max length of an episode. episode_length: int = 200 def __init__(self, env: Union[str, gym.Env[np.ndarray, int]], hp: HParams = None) -> None: super().__init__() self.hp = hp or self.HParams() self.save_hyperparameters({"hp": self.hp.to_dict()}) self.env = gym.make(env) if isinstance(env, str) else env from gym.spaces import Box, Discrete self.episode_length: Optional[int] = get_max_episode_length(self.env) if not isinstance(self.env.observation_space, Box): raise RuntimeError( f"Only works on envs with Box observation space, not {self.env.observation_space}." ) if not isinstance(self.env.action_space, Discrete): raise RuntimeError( f"Only works on envs with Discrete action space, not {self.env.action_space}." ) from gym.spaces.utils import flatdim # TODO: Adapt this to also work with image observations. obs_size = flatdim(self.env.observation_space) n_actions = self.env.action_space.n self.net = DQN(obs_size, n_actions) self.target_net = DQN(obs_size, n_actions) self.buffer = ReplayBuffer(self.hp.replay_size) self.agent = Agent(self.env, self.buffer) self.total_reward = 0 self.episode_reward = 0 self.trainer: Optional[pl.Trainer] self.populate(self.hp.warm_start_steps) def populate(self, steps: int = 1000) -> None: """Carries out several random steps through the environment to initially fill up the replay buffer with experiences. Args: steps: number of random steps to populate the buffer with """ for i in range(steps): try: self.agent.play_step(self.net, epsilon=1.0) except gym.error.ClosedEnvironmentError as err: print(f"Unable to add more data to the buffer: env closed after {i} steps.") break def forward(self, x: torch.Tensor) -> torch.Tensor: """Passes in a state `x` through the network and gets the `q_values` of each action as an output. Args: x: environment state Returns: q values """ output = self.net(x) return output def dqn_mse_loss(self, batch: ExperienceBatch[Tensor]) -> torch.Tensor: """Calculates the mse loss using a mini batch from the replay buffer. Args: batch: current mini batch of replay data Returns: loss """ states = batch.states actions = batch.actions rewards = batch.rewards.type(dtype=torch.float32) dones = batch.dones next_states = batch.new_states values: Tensor = self.net(states) state_action_values = values.gather(1, actions.unsqueeze(-1)).squeeze(-1) with torch.no_grad(): next_state_values: Tensor = self.target_net(next_states).max(1)[0] next_state_values[dones] = 0.0 next_state_values = next_state_values.detach() expected_state_action_values = next_state_values * self.hp.gamma + rewards return F.mse_loss(state_action_values, expected_state_action_values) def training_step(self, batch: ExperienceBatch[Tensor], batch_idx: int) -> Optional[Tensor]: """Carries out a single step through the environment to update the replay buffer. Then calculates loss based on the minibatch received. Args: batch: current mini batch of replay data batch_idx: batch index Returns: Training loss and log metrics """ device = batch.states.device epsilon = max( self.hp.eps_end, self.hp.eps_start - (self.global_step + 1) / self.hp.eps_last_frame, ) try: # step through environment with agent reward, done = self.agent.play_step(self.net, epsilon, device) except gym.error.ClosedEnvironmentError: print(f"Environment closed at batch {batch_idx}") assert self.trainer is not None self.trainer.should_stop = True return self.episode_reward += reward # calculates training loss loss = self.dqn_mse_loss(batch) if done: self.total_reward = self.episode_reward self.episode_reward = 0 # Soft update of target network if self.global_step % self.hp.sync_rate == 0: self.target_net.load_state_dict(self.net.state_dict()) self.log_dict( { "total_reward": self.total_reward, "reward": reward, "steps": float(self.global_step), }, prog_bar=True, ) return loss def configure_optimizers(self) -> List[Optimizer]: """Initialize Adam optimizer.""" optimizer = optim.Adam(self.net.parameters(), lr=self.hp.lr) return [optimizer] def __dataloader(self) -> DataLoader: """Initialize the Replay Buffer dataset used for retrieving experiences.""" dataset = RLDataset(self.buffer, sample_size=self.episode_length or 200) dataloader = DataLoader( dataset=dataset, batch_size=self.hp.batch_size, sampler=None, collate_fn=ExperienceBatch.stack, ) return dataloader def train_dataloader(self) -> DataLoader: """Get train loader.""" return self.__dataloader() def get_device(self, batch) -> str: """Retrieve device currently being used by minibatch.""" return batch[0].device.index if self.on_gpu else "cpu" @classmethod def add_model_specific_args(cls, parent_parser: ArgumentParser): # pragma: no-cover parent_parser.add_arguments(cls.HParams, "hp") return parent_parser def get_max_episode_length(env: Union[gym.Env, gym.Wrapper]) -> Optional[int]: """Inspects the env to get the max episode length, if it is wrapped with a `gym.wrappers.TimeLimit` wrapper. If the env isn't wrapped with a TimeLimit, then returns None. """ while isinstance(env, gym.Wrapper): if isinstance(env, gym.wrappers.TimeLimit): return env._max_episode_steps env = env.env if env.spec is not None: return env.spec.max_episode_steps return None from sequoia import Method from sequoia.settings.rl import RLEnvironment, RLSetting from sequoia.settings.rl.objects import Actions, Observations, Rewards class PlDqnMethod(Method, target_setting=RLSetting): def __init__(self, hp: DQNLightning.HParams = None) -> None: super().__init__() self.hp = hp or DQNLightning.HParams() self.model: Optional[DQNLightning] = None def configure(self, setting: RLSetting) -> None: self.model = None self.train_max_steps = setting.train_max_steps def fit(self, train_env: gym.Env, valid_env: gym.Env): from sequoia.common.gym_wrappers import ( TransformAction, TransformObservation, TransformReward, ) # Our simple DQN model expects to get arrays / integer actions, so we adapt the env a bit # using some wrappers. train_env = TransformObservation(train_env, lambda obs: obs.x) train_env = TransformReward(train_env, lambda rew: rew.y) if isinstance(train_env.action_space, TypedDictSpace): actions_type: Type[Actions] = train_env.action_space.dtype # Make it possible to send just ints to the env, and wrap them up into an Actions object. train_env = TransformAction(train_env, lambda act: actions_type(y_pred=act)) if self.model is None: self.model = DQNLightning(env=train_env, hp=self.hp) trainer = pl.Trainer( gpus=1, strategy="dp", val_check_interval=100, max_steps=self.train_max_steps, ) trainer.fit(self.model) def get_actions(self, observations: Observations, action_space: Discrete) -> Actions: assert self.model is not None with torch.no_grad(): obs = torch.as_tensor( observations.x, device=torch.device(self.model.device), dtype=self.model.dtype, ) v = self.model.forward(obs) selected_action = v.argmax(-1).cpu().numpy() return selected_action def main() -> None: parser = ArgumentParser() parser = DQNLightning.add_model_specific_args(parser) parser.add_argument("--seed", type=int, default=None, help="Random seed") args = parser.parse_args() # env = gym.make("CartPole-v1") # hp: DQNLightning.HParams = args.hp # model = DQNLightning(env=env, hp=hp) # pl.seed_everything(args.seed) # trainer = pl.Trainer(gpus=1, strategy="dp", val_check_interval=100) # trainer.fit(model) from sequoia.settings.rl import TraditionalRLSetting, MultiTaskRLSetting setting = MultiTaskRLSetting( dataset="CartPole-v1", nb_tasks=1, train_max_steps=2_000, ) setting.prepare_data() setting.setup() setting.train_dataloader() setting.test_dataloader() method = PlDqnMethod() from sequoia.common.config import Config results = setting.apply(method, config=Config(debug=True)) print(results) return if __name__ == "__main__": main() ================================================ FILE: sequoia/methods/pnn/__init__.py ================================================ from .layers import PNNConvLayer, PNNLinearBlock from .model_rl import PnnA2CAgent from .model_sl import PnnClassifier from .pnn_method import PnnMethod ================================================ FILE: sequoia/methods/pnn/layers.py ================================================ import torch.nn as nn import torch.nn.functional as F """ Based on https://github.com/TomVeniat/ProgressiveNeuralNetworks.pytorch """ class PNNConvLayer(nn.Module): def __init__(self, col, depth, n_in, n_out, kernel_size=3): super(PNNConvLayer, self).__init__() self.col = col self.layer = nn.Conv2d(n_in, n_out, kernel_size, stride=2, padding=1) self.u = nn.ModuleList() if depth > 0: self.u.extend( [nn.Conv2d(n_in, n_out, kernel_size, stride=2, padding=1) for _ in range(col)] ) def forward(self, inputs): if not isinstance(inputs, list): inputs = [inputs] cur_column_out = self.layer(inputs[-1]) prev_columns_out = [mod(x) for mod, x in zip(self.u, inputs)] return F.relu(cur_column_out + sum(prev_columns_out)) class PNNLinearBlock(nn.Module): def __init__(self, col: int, depth: int, n_in: int, n_out: int): super(PNNLinearBlock, self).__init__() self.layer = nn.Linear(n_in, n_out) self.u = nn.ModuleList() if depth > 0: self.u.extend([nn.Linear(n_in, n_out) for _ in range(col)]) def forward(self, inputs): if not isinstance(inputs, list): inputs = [inputs] cur_column_out = self.layer(inputs[-1]) prev_columns_out = [mod(x) for mod, x in zip(self.u, inputs)] return F.relu(cur_column_out + sum(prev_columns_out)) ================================================ FILE: sequoia/methods/pnn/model_rl.py ================================================ from typing import List import torch import torch.nn as nn import torch.nn.functional as F from torchvision import transforms from .layers import PNNConvLayer, PNNLinearBlock class PnnA2CAgent(nn.Module): """ @article{rusu2016progressive, title={Progressive neural networks}, author={Rusu, Andrei A and Rabinowitz, Neil C and Desjardins, Guillaume and Soyer, Hubert and Kirkpatrick, James and Kavukcuoglu, Koray and Pascanu, Razvan and Hadsell, Raia}, journal={arXiv preprint arXiv:1606.04671}, year={2016} } """ def __init__(self, arch="mlp", hidden_size=256): super(PnnA2CAgent, self).__init__() self.columns_actor = nn.ModuleList([]) self.columns_critic = nn.ModuleList([]) self.columns_conv = nn.ModuleList([]) self.arch = arch self.hidden_size = hidden_size # TODO: This doesn't take the observation space into account at all! # Only works for Pixel Cartpole at the moment. # Original size 3 x 400 x 600 self.transformation = transforms.Compose( [ transforms.ToPILImage(), transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), ] ) def forward(self, observations): assert ( self.columns_actor ), "PNN should at least have one column (missing call to `new_task` ?)" t = observations.task_labels if self.arch == "mlp": x = torch.from_numpy(observations.x).unsqueeze(0).float() inputs_critic = [c[1](c[0](x)) for c in self.columns_critic] inputs_actor = [c[1](c[0](x)) for c in self.columns_actor] outputs_critic = [] outputs_actor = [] for i, column in enumerate(self.columns_critic): outputs_critic.append(column[2](inputs_critic[: i + 1])) outputs_actor.append(self.columns_actor[i][2](inputs_actor[: i + 1])) ind_depth = 3 else: x = self.transfor_img(observations.x).unsqueeze(0).float() inputs = [c[1](c[0](x)) for c in self.columns_conv] outputs = [] for i, column in enumerate(self.columns_conv): outputs.append(column[3](column[2](inputs[: i + 1]))) inputs = outputs outputs = [] for i, column in enumerate(self.columns_conv): outputs.append(column[5](column[4](inputs[: i + 1]))) inputs_critic = [c[6](outputs[i]).view(1, -1) for i, c in enumerate(self.columns_conv)] inputs_actor = inputs_critic[:] outputs_critic = [] outputs_actor = [] for i, column in enumerate(self.columns_critic): outputs_critic.append(column[0](inputs_critic[: i + 1])) outputs_actor.append(self.columns_actor[i][0](inputs_actor[: i + 1])) ind_depth = 1 critic = [] for i, column in enumerate(self.columns_critic): critic.append(column[ind_depth](outputs_critic[i])) actor = [] for i, column in enumerate(self.columns_actor): actor.append(F.softmax(column[ind_depth](outputs_actor[i]), dim=1)) return critic[t], actor[t] def new_task(self, device, num_inputs, num_actions=5): task_id = len(self.columns_actor) if self.arch == "conv": sizes = [num_inputs, 32, 64, self.hidden_size] modules_conv = nn.Sequential() modules_conv.add_module("Conv1", PNNConvLayer(task_id, 0, sizes[0], sizes[1])) modules_conv.add_module("MaxPool1", nn.MaxPool2d(3)) modules_conv.add_module("Conv2", PNNConvLayer(task_id, 1, sizes[1], sizes[2])) modules_conv.add_module("MaxPool2", nn.MaxPool2d(3)) modules_conv.add_module("Conv3", PNNConvLayer(task_id, 2, sizes[2], sizes[3])) modules_conv.add_module("MaxPool3", nn.MaxPool2d(3)) modules_conv.add_module("globavgpool2d", nn.AdaptiveAvgPool2d((1, 1))) self.columns_conv.append(modules_conv) modules_actor = nn.Sequential() modules_critic = nn.Sequential() if self.arch == "mlp": modules_actor.add_module("linAc1", nn.Linear(num_inputs, self.hidden_size)) modules_actor.add_module("relAc", nn.ReLU(inplace=True)) modules_actor.add_module( "linAc2", PNNLinearBlock(task_id, 1, self.hidden_size, self.hidden_size) ) modules_actor.add_module("linAc3", nn.Linear(self.hidden_size, num_actions)) if self.arch == "mlp": modules_critic.add_module("linCr1", nn.Linear(num_inputs, self.hidden_size)) modules_critic.add_module("relCr", nn.ReLU(inplace=True)) modules_critic.add_module( "linCr2", PNNLinearBlock(task_id, 1, self.hidden_size, self.hidden_size) ) modules_critic.add_module("linCr3", nn.Linear(self.hidden_size, 1)) self.columns_actor.append(modules_actor) self.columns_critic.append(modules_critic) print("Add column of the new task") def unfreeze_columns(self): for i, c in enumerate(self.columns_actor): for params in c.parameters(): params.requires_grad = True for params in self.columns_critic[i].parameters(): params.requires_grad = True for i, c in enumerate(self.columns_conv): for params in c.parameters(): params.requires_grad = True def freeze_columns(self, skip: List[int] = None): if skip is None: skip = [] self.unfreeze_columns() for i, c in enumerate(self.columns_actor): if i not in skip: for params in c.parameters(): params.requires_grad = False for params in self.columns_critic[i].parameters(): params.requires_grad = False for i, c in enumerate(self.columns_conv): if i not in skip: for params in c.parameters(): params.requires_grad = False print("Freeze columns from previous tasks") def parameters(self, task_id): param = [] for p in self.columns_critic[task_id].parameters(): param.append(p) for p in self.columns_actor[task_id].parameters(): param.append(p) if len(self.columns_conv) > 0: for p in self.columns_conv[task_id].parameters(): param.append(p) return param def transfor_img(self, img): return self.transformation(img) # return lambda img: imresize(img[35:195].mean(2), (80,80)).astype(np.float32).reshape(1,80,80)/255. ================================================ FILE: sequoia/methods/pnn/model_sl.py ================================================ from typing import List, Optional, Tuple import torch import torch.nn as nn from torch import Tensor from sequoia.settings import Actions, PassiveEnvironment from sequoia.settings.sl.incremental.objects import Observations, Rewards from sequoia.utils.logging_utils import get_logger from .layers import PNNLinearBlock logger = get_logger(__name__) class PnnClassifier(nn.Module): """ @article{rusu2016progressive, title={Progressive neural networks}, author={Rusu, Andrei A and Rabinowitz, Neil C and Desjardins, Guillaume and Soyer, Hubert and Kirkpatrick, James and Kavukcuoglu, Koray and Pascanu, Razvan and Hadsell, Raia}, journal={arXiv preprint arXiv:1606.04671}, year={2016} } """ def __init__(self, n_layers): super().__init__() self.n_layers = n_layers self.columns = nn.ModuleList([]) self.loss = torch.nn.CrossEntropyLoss() self.device = None self.n_tasks = 0 self.n_classes_per_task: List[int] = [] def forward(self, observations: Observations): assert self.columns, "PNN should at least have one column (missing call to `new_task` ?)" x = observations.x x = torch.flatten(x, start_dim=1) task_labels: Optional[Tensor] = observations.task_labels batch_size = x.shape[0] n_known_tasks = len(self.columns) last_known_task_id = n_known_tasks - 1 if task_labels is None: # TODO: Use random output heads per item? logger.warning( f"Encoutering None task labels, assigning a fake random task id for each sample." ) task_labels = torch.randint(n_known_tasks, (batch_size,)) # task_labels = np.array([None for _ in range(len(x))]) unique_task_labels = set(task_labels.tolist()) # TODO: Debug this: column_outputs = [ column[0](x) + n_classes_in_task for n_classes_in_task, column in zip(self.n_classes_per_task, self.columns) ] inputs = column_outputs for layer in range(1, self.n_layers): outputs = [] for i, column in enumerate(self.columns): outputs.append(column[layer](inputs[: i + 1])) inputs = outputs y_logits: Optional[Tensor] = None task_masks = {} # BUG: Can't apply PNN to the ClassIncrementalSetting at the moment. for task_id in unique_task_labels: task_mask = task_labels == task_id task_masks[task_id] = task_mask if task_id is None or task_id >= n_known_tasks: logger.warning( f"Task id {task_id} is encountered, but we haven't trained for it yet!" ) task_id = last_known_task_id if y_logits is None: y_logits = inputs[task_id] else: y_logits[task_mask] = inputs[task_id][task_mask] assert y_logits is not None, "Can't get prediction in model PNN" return y_logits # def new_task(self, device, num_inputs, num_actions = 5): def new_task(self, device, sizes: List[int]): assert len(sizes) == self.n_layers + 1, ( f"Should have the out size for each layer + input size (got {len(sizes)} " f"sizes but {self.n_layers} layers)." ) self.n_tasks += 1 # TODO: Fix this to use the actual number of classes per task. n_outputs = sizes[-1] self.n_classes_per_task.append(n_outputs) task_id = len(self.columns) modules = [] # TODO: Would it also be possible to use convolutional layers here? for i in range(0, self.n_layers): modules.append(PNNLinearBlock(col=task_id, depth=i, n_in=sizes[i], n_out=sizes[i + 1])) new_column = nn.ModuleList(modules).to(device) self.columns.append(new_column) self.device = device print("Add column of the new task") def freeze_columns(self, skip: List[int] = None): if skip == None: skip = [] for i, c in enumerate(self.columns): for params in c.parameters(): params.requires_grad = True for i, c in enumerate(self.columns): if i not in skip: for params in c.parameters(): params.requires_grad = False print("Freeze columns from previous tasks") def shared_step( self, batch: Tuple[Observations, Optional[Rewards]], environment: PassiveEnvironment, ): """Shared step used for both training and validation. Parameters ---------- batch : Tuple[Observations, Optional[Rewards]] Batch containing Observations, and optional Rewards. When the Rewards are None, it means that we'll need to provide the Environment with actions before we can get the Rewards (e.g. image labels) back. This happens for example when being applied in a Setting which cares about sample efficiency or training performance, for example. environment : Environment The environment we're currently interacting with. Used to provide the rewards when they aren't already part of the batch (as mentioned above). Returns ------- Tuple[Tensor, Dict] The Loss tensor, and a dict of metrics to be logged. """ # Since we're training on a Passive environment, we will get both observations # and rewards, unless we're being evaluated based on our training performance, # in which case we will need to send actions to the environments before we can # get the corresponding rewards (image labels). observations: Observations = batch[0].to(self.device) rewards: Optional[Rewards] = batch[1] # Get the predictions: logits = self(observations) y_pred = logits.argmax(-1) # TODO: PNN is coded for the DomainIncrementalSetting, where the action space # is the same for each task. # Get the rewards, if necessary: if rewards is None: rewards = environment.send(Actions(y_pred)) image_labels = rewards.y.to(self.device) # print(logits.size()) loss = self.loss(logits, image_labels) accuracy = (y_pred == image_labels).sum().float() / len(image_labels) metrics_dict = {"accuracy": accuracy} return loss, metrics_dict def parameters(self, task_id): return self.columns[task_id].parameters() ================================================ FILE: sequoia/methods/pnn/pnn_method.py ================================================ from argparse import Namespace from dataclasses import dataclass from typing import Any, Dict, Mapping, Optional, Union import gym import numpy as np import torch import tqdm from gym import spaces from gym.spaces import Box from numpy import inf from simple_parsing import ArgumentParser from wandb.wandb_run import Run from sequoia.common import Config from sequoia.common.hparams import HyperParameters, categorical, log_uniform, uniform from sequoia.common.spaces import Image from sequoia.common.transforms.utils import is_image from sequoia.methods import register_method from sequoia.settings import ( Actions, Method, Observations, PassiveEnvironment, RLSetting, Setting, TaskIncrementalRLSetting, TaskIncrementalSLSetting, ) from sequoia.settings.assumptions import IncrementalAssumption from sequoia.settings.base import Environment from sequoia.utils import get_logger from .model_rl import PnnA2CAgent from .model_sl import PnnClassifier logger = get_logger(__name__) # BUG: Can't apply PNN to the ClassIncrementalSetting at the moment. # BUG: Can't apply PNN to any RL Settings at the moment. # (it was hard-coded to handle pixel cartpole). # TODO: When those bugs get fixed, restore the 'IncrementalAssumption' as the target # setting. # TODO: Debugging PNN on Incremental rather than TaskIncremental @register_method class PnnMethod(Method, target_setting=IncrementalAssumption): """ PNN Method. Applicable to both RL and SL Settings, as long as there are clear task boundaries during training (IncrementalAssumption). """ @dataclass class HParams(HyperParameters): """Hyper-parameters of the Pnn method.""" # Learning rate of the optimizer. Defauts to 0.0001 when in SL. learning_rate: float = log_uniform(1e-6, 1e-2, default=2e-4) num_steps: int = 200 # (only applicable in RL settings.) # Discount factor (Only used in RL settings). gamma: float = uniform(0.9, 0.999, default=0.99) # Number of hidden units (only used in RL settings.) hidden_size: int = categorical(64, 128, 256, default=256) # Batch size in SL, and number of parallel environments in RL. # Defaults to None in RL, and 32 when in SL. batch_size: Optional[int] = None # Maximum number of training epochs per task. (only used in SL Settings) max_epochs_per_task: int = uniform(1, 100, default=10) def __init__(self, hparams: HParams = None): # We will create those when `configure` will be called, before training. self.config: Optional[Config] = None self.task_id: Optional[int] = 0 self.hparams: Optional[PnnMethod.HParams] = hparams self.model: Union[PnnA2CAgent, PnnClassifier] self.optimizer: torch.optim.Optimizer def configure(self, setting: Setting): """Called before the method is applied on a setting (before training). You can use this to instantiate your model, for instance, since this is where you get access to the observation & action spaces. """ input_space: Box = setting.observation_space["x"] # For now all Settings have `Discrete` (i.e. classification) action spaces. action_space: spaces.Discrete = setting.action_space self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") self.num_actions = action_space.n self.num_inputs = np.prod(input_space.shape) self.added_tasks = [] if not (setting.task_labels_at_train_time and setting.task_labels_at_test_time): logger.warning( RuntimeWarning( "TODO: PNN doesn't have 'propper' task inference, and task labels " "arent always available! This will use an output head at random." ) ) if isinstance(setting, RLSetting): # If we're applied to an RL setting: # Used these as the default hparams in RL: self.hparams = self.hparams or self.HParams() assert self.hparams self.train_steps_per_task = setting.steps_per_task # We want a batch_size of None, i.e. only one observation at a time. setting.batch_size = None self.num_steps = self.hparams.num_steps # Otherwise, we can train basically as long as we want on each task. self.loss_function = { "gamma": self.hparams.gamma, } if is_image(setting.observation_space.x): # Observing pixel input. self.arch = "conv" else: # Observing state input (e.g. the 4 floats in cartpole rather than images) self.arch = "mlp" self.model = PnnA2CAgent(self.arch, self.hparams.hidden_size) else: # If we're applied to a Supervised Learning setting: # Used these as the default hparams in SL: self.hparams = self.hparams or self.HParams( learning_rate=0.0001, batch_size=32, ) if self.hparams.batch_size is None: self.hparams.batch_size = 32 # Set the batch size on the setting. setting.batch_size = self.hparams.batch_size # For now all Settings on the supervised side of the tree have images as # inputs, so the observation spaces are of type `Image` (same as Box, but with # additional `h`, `w`, `c` and `b` attributes). assert isinstance(input_space, Image) assert ( setting.increment == setting.test_increment ), "Assuming same number of classes per task for training and testing." # TODO: (@lebrice): Temporarily 'fixing' this by making it so each output # head has as many outputs as there are classes in total, which might make # no sense, but currently works. # It would be better to refactor this so that each output head can have only # as many outputs as is required, and then reshape / offset the predictions. n_outputs = setting.increment n_outputs = setting.action_space.n self.layer_size = [self.num_inputs, 256, n_outputs] self.model = PnnClassifier( n_layers=len(self.layer_size) - 1, ) def on_task_switch(self, task_id: Optional[int]) -> None: """Called when switching tasks in a CL setting.""" # This method gets called if task boundaries are known in the current # setting. Furthermore, if task labels are available, task_id will be # the index of the new task. If not, task_id will be None. # For example, you could do something like this: # self.model.current_task = task_id if self.training: self.model.freeze_columns([task_id]) if task_id not in self.added_tasks: if isinstance(self.model, PnnA2CAgent): self.model.new_task( device=self.device, num_inputs=self.num_inputs, num_actions=self.num_actions, ) else: self.model.new_task(device=self.device, sizes=self.layer_size) self.added_tasks.append(task_id) self.task_id = task_id def set_optimizer(self): self.optimizer = torch.optim.Adam( self.model.parameters(self.task_id), lr=self.hparams.learning_rate, ) def get_actions(self, observations: Observations, action_space: spaces.Space) -> Actions: """Get a batch of predictions (aka actions) for the given observations.""" observations = observations.to(self.device) with torch.no_grad(): if isinstance(self.model, PnnA2CAgent): predictions = self.model(observations) _, logit = predictions # get the predicted action: action = torch.argmax(logit).item() else: logits = self.model(observations) # Get the predicted classes y_pred = logits.argmax(dim=-1).cpu().numpy() action = y_pred assert action in action_space, (action, action_space) return action def fit(self, train_env: Environment, valid_env: Environment): """Train and validate this method using the "environments" for the current task. NOTE: `train_env` and `valid_env` are both `gym.Env`s as well as `DataLoader`s. This means that if you want to write a "regular" SL training loop, you totally can, and if you want to write you RL-style training loop, you can also do that. """ if isinstance(train_env.unwrapped, PassiveEnvironment): self.fit_sl(train_env, valid_env) else: self.fit_rl(train_env, valid_env) def fit_rl(self, train_env: gym.Env, valid_env: gym.Env): """Training loop for Reinforcement Learning (a.k.a. "active") environment.""" """ base on https://towardsdatascience.com/understanding-actor-critic-methods-931b97b6df3f """ if self.model is None: self.model = PnnA2CAgent(self.arch, self.hparams.hidden_size) assert isinstance(self.model, PnnA2CAgent) self.set_optimizer() assert self.hparams # self.model.float() all_lengths = [] average_lengths = [] all_rewards = [] entropy_term = 0 for episode in range(self.train_steps_per_task): values = [] rewards = [] log_probs = [] state = train_env.reset() for steps in range(self.num_steps): value, policy_dist = self.model(state) value = value.item() dist = policy_dist.detach().numpy() action = np.random.choice(self.num_actions, p=np.squeeze(dist)) log_prob = torch.log(policy_dist.squeeze(0)[action]) entropy = -np.sum(np.mean(dist) * np.log(dist)) new_state, reward, done, _ = train_env.step(action) rewards.append(reward.y) values.append(value) log_probs.append(log_prob) entropy_term += entropy state = new_state if done or steps == self.num_steps - 1: Qval, _ = self.model(state) Qval = Qval.item() all_rewards.append(np.sum(rewards)) all_lengths.append(steps) average_lengths.append(np.mean(all_lengths[-10:])) if episode % 10 == 0: print( f"episode: {episode}, " f"reward: {np.sum(rewards)}, " f"total length: {steps}, " f"average length: {average_lengths[-1]}" ) break Qvals = np.zeros_like(values) for t in reversed(range(len(rewards))): Qval = rewards[t] + self.hparams.gamma * Qval Qvals[t] = Qval # update actor critic values_tensor = torch.as_tensor(values, dtype=torch.float) Qvals = torch.as_tensor(Qvals, dtype=torch.float) log_probs_tensor = torch.stack(log_probs) advantage = Qvals - values_tensor actor_loss = (-log_probs_tensor * advantage).mean() critic_loss = 0.5 * advantage.pow(2).mean() ac_loss = actor_loss + critic_loss + 0.001 * entropy_term self.optimizer.zero_grad() ac_loss.backward() self.optimizer.step() def fit_sl(self, train_env: PassiveEnvironment, valid_env: PassiveEnvironment): """Train on a Supervised Learning (a.k.a. "passive") environment.""" observations: TaskIncrementalSLSetting.Observations = train_env.reset() cuda_observations = observations.to(self.device) assert isinstance(self.model, PnnClassifier) assert self.hparams self.set_optimizer() best_val_loss = inf best_epoch = 0 for epoch in range(self.hparams.max_epochs_per_task): self.model.train() print(f"Starting epoch {epoch}") # Training loop: with torch.set_grad_enabled(True), tqdm.tqdm(train_env) as train_pbar: postfix: Dict[str, Any] = {} train_pbar.set_description(f"Training Epoch {epoch}") for i, batch in enumerate(train_pbar): loss, metrics_dict = self.model.shared_step( batch, environment=train_env, ) self.optimizer.zero_grad() loss.backward() self.optimizer.step() postfix.update(metrics_dict) train_pbar.set_postfix(postfix) # Validation loop: self.model.eval() with torch.set_grad_enabled(False), tqdm.tqdm(valid_env) as val_pbar: postfix = {} val_pbar.set_description(f"Validation Epoch {epoch}") epoch_val_loss = 0.0 for i, batch in enumerate(val_pbar): batch_val_loss, metrics_dict = self.model.shared_step( batch, environment=valid_env, ) epoch_val_loss += batch_val_loss postfix.update(metrics_dict, val_loss=epoch_val_loss) val_pbar.set_postfix(postfix) @classmethod def add_argparse_args(cls, parser: ArgumentParser) -> None: parser.add_arguments(cls.HParams, dest="hparams", default=None) @classmethod def from_argparse_args(cls, args: Namespace) -> "PnnMethod": hparams: PnnMethod.HParams = args.hparams method = cls(hparams=hparams) return method def get_search_space(self, setting: Setting) -> Mapping[str, Union[str, Dict]]: """Returns the search space to use for HPO in the given Setting. Parameters ---------- setting : Setting The Setting on which the run of HPO will take place. Returns ------- Mapping[str, Union[str, Dict]] An orion-formatted search space dictionary, mapping from hyper-parameter names (str) to their priors (str), or to nested dicts of the same form. """ return self.hparams.get_orion_space() def adapt_to_new_hparams(self, new_hparams: Dict[str, Any]) -> None: """Adapts the Method when it receives new Hyper-Parameters to try for a new run. It is required that this method be implemented if you want to perform HPO sweeps with Orion. Parameters ---------- new_hparams : Dict[str, Any] The new hyper-parameters being recommended by the HPO algorithm. These will have the same structure as the search space. """ # Here we overwrite the corresponding attributes with the new suggested values # leaving other fields unchanged. # NOTE: These new hyper-paramers will be used in the next run in the sweep, # since each call to `configure` will create a new Model. self.hparams = self.hparams.replace(**new_hparams) def setup_wandb(self, run: Run) -> None: """Called by the Setting when using Weights & Biases, after `wandb.init`. This method is here to provide Methods with the opportunity to log some of their configuration options or hyper-parameters to wandb. NOTE: The Setting has already set the `"setting"` entry in the `wandb.config` by this point. Parameters ---------- run : wandb.Run Current wandb Run. """ run.config["hparams"] = self.hparams.to_dict() def main_rl(): """Applies the PnnMethod in a RL Setting.""" parser = ArgumentParser(description=__doc__, add_dest_to_option_strings=False) Config.add_argparse_args(parser, dest="config") PnnMethod.add_argparse_args(parser, dest="method") setting = TaskIncrementalRLSetting( dataset="cartpole", nb_tasks=2, train_task_schedule={ 0: {"gravity": 10, "length": 0.3}, 1000: {"gravity": 10, "length": 0.5}, }, ) args = parser.parse_args() config: Config = Config.from_argparse_args(args, dest="config") method: PnnMethod = PnnMethod.from_argparse_args(args, dest="method") method.config = config # 2. Creating the Method # method = ImproveMethod() # 3. Applying the method to the setting: results = setting.apply(method, config=config) print(results.summary()) print(f"objective: {results.objective}") return results def main_sl(): """Applies the PnnMethod in a SL Setting.""" parser = ArgumentParser(description=__doc__, add_dest_to_option_strings=False) # Add arguments for the Setting # TODO: PNN is coded for the DomainIncrementalSetting, where the action space # is the same for each task. # parser.add_arguments(DomainIncrementalSetting, dest="setting") parser.add_arguments(TaskIncrementalSLSetting, dest="setting") # TaskIncrementalSLSetting.add_argparse_args(parser, dest="setting") Config.add_argparse_args(parser, dest="config") # Add arguments for the Method: PnnMethod.add_argparse_args(parser, dest="method") args = parser.parse_args() # setting: TaskIncrementalSLSetting = args.setting setting: TaskIncrementalSLSetting = TaskIncrementalSLSetting.from_argparse_args( # setting: DomainIncrementalSetting = DomainIncrementalSetting.from_argparse_args( args, dest="setting", ) config: Config = Config.from_argparse_args(args, dest="config") method: PnnMethod = PnnMethod.from_argparse_args(args, dest="method") method.config = config results = setting.apply(method, config=config) print(results.summary()) return results if __name__ == "__main__": # Run RL Setting main_sl() # Run SL Setting # main_rl() ================================================ FILE: sequoia/methods/random_baseline.py ================================================ """A random baseline Method that gives random predictions for any input. Should be applicable to any Setting. """ from argparse import Namespace from typing import Any, Dict, Mapping, Optional, Union import gym import numpy as np import tqdm from simple_parsing import ArgumentParser from torch import Tensor from sequoia.methods import register_method from sequoia.settings import Setting from sequoia.settings.base import Actions, Environment, Method, Observations from sequoia.settings.sl import SLSetting from sequoia.utils import get_logger logger = get_logger(__name__) @register_method class RandomBaselineMethod(Method, target_setting=Setting): """Baseline method that gives random predictions for any given setting. This method doesn't have a model or any parameters. It just returns a random action for every observation. """ def __init__(self): self.max_train_episodes: Optional[int] = None def configure(self, setting: Setting): """Called before the method is applied on a setting (before training). You can use this to instantiate your model, for instance, since this is where you get access to the observation & action spaces. """ if isinstance(setting, SLSetting): # Being applied in SL, we will only do one 'epoch" (a.k.a. "episode"). self.max_train_episodes = 1 def fit( self, train_env: Environment, valid_env: Environment, ): episodes = 0 with tqdm.tqdm(desc="training") as train_pbar: while not train_env.is_closed(): for i, batch in enumerate(train_env): if isinstance(batch, Observations): observations, rewards = batch, None else: observations, rewards = batch batch_size = observations.x.shape[0] y_pred = train_env.action_space.sample() # If we're at the last batch, it might have a different size, so w # give only the required number of values. if isinstance(y_pred, (np.ndarray, Tensor)): if y_pred.shape[0] != batch_size: y_pred = y_pred[:batch_size] if rewards is None: rewards = train_env.send(y_pred) train_pbar.set_postfix({"Episode": episodes, "Step": i}) train_pbar.update() # train as you usually would. if train_env.is_closed(): break episodes += 1 if self.max_train_episodes and episodes >= self.max_train_episodes: train_env.close() break def get_actions(self, observations: Observations, action_space: gym.Space) -> Actions: return action_space.sample() def get_search_space(self, setting: Setting) -> Mapping[str, Union[str, Dict]]: """Returns the search space to use for HPO in the given Setting. Parameters ---------- setting : Setting The Setting on which the run of HPO will take place. Returns ------- Mapping[str, Union[str, Dict]] An orion-formatted search space dictionary, mapping from hyper-parameter names (str) to their priors (str), or to nested dicts of the same form. """ logger.warning( UserWarning( "Hey, you seem to be trying to perform an HPO sweep using the random " "baseline method?" ) ) # Assuming that this is just used for debugging, so giving back a simple space. return {"foo": "choices([0, 1, 2])"} def adapt_to_new_hparams(self, new_hparams: Dict[str, Any]) -> None: """Adapts the Method when it receives new Hyper-Parameters to try for a new run. It is required that this method be implemented if you want to perform HPO sweeps with Orion. Parameters ---------- new_hparams : Dict[str, Any] The new hyper-parameters being recommended by the HPO algorithm. These will have the same structure as the search space. """ foo = new_hparams["foo"] print(f"Using new suggested value {foo}") @classmethod def add_argparse_args(cls, parser: ArgumentParser): pass @classmethod def from_argparse_args(cls, args: Namespace): return cls() if __name__ == "__main__": RandomBaselineMethod.main() ================================================ FILE: sequoia/methods/random_baseline_test.py ================================================ # TODO: Create a sort of reusable fixture for the Method # TODO: Figure out how to ACTUALLY set the checkpoint dir in pytorch-lightning! from typing import List from sequoia.settings import all_settings from .random_baseline import RandomBaselineMethod # Use 'Method' as an alias for the actual Method cusblass under test. (since at # the moment quite a few tests share some common code. # List of datasets that are currently supported. supported_datasets: List[str] = [ "mnist", "fashionmnist", "cifar10", "cifar100", "kmnist", "cartpole", ] def test_is_applicable_to_all_settings(): settings = RandomBaselineMethod.get_applicable_settings() assert set(settings) == set(all_settings) ================================================ FILE: sequoia/methods/stable_baselines3_methods/__init__.py ================================================ from .a2c import A2CMethod, A2CModel from .base import SB3BaseHParams, StableBaselines3Method from .ddpg import DDPGMethod, DDPGModel from .dqn import DQNMethod, DQNModel from .off_policy_method import OffPolicyMethod, OffPolicyModel from .on_policy_method import OnPolicyMethod, OnPolicyModel from .policy_wrapper import PolicyWrapper from .ppo import PPOMethod, PPOModel from .sac import SACMethod, SACModel from .td3 import TD3Method, TD3Model ================================================ FILE: sequoia/methods/stable_baselines3_methods/a2c.py ================================================ """ Method that uses the A2C model from stable-baselines3 and targets the RL settings in the tree. """ import math from dataclasses import dataclass from typing import Callable, ClassVar, Dict, Mapping, Optional, Type, Union import gym import torch from gym import spaces from simple_parsing import mutable_field from stable_baselines3.a2c import A2C from sequoia.common.hparams import log_uniform, uniform from sequoia.methods import register_method from sequoia.settings.rl import ContinualRLSetting from sequoia.utils import get_logger from .on_policy_method import OnPolicyMethod, OnPolicyModel logger = get_logger(__name__) class A2CModel(A2C, OnPolicyModel): """Advantage Actor Critic (A2C) model imported from stable-baselines3. Paper: https://arxiv.org/abs/1602.01783 Code: The SB3 implementation borrows code from https://github.com/ikostrikov/pytorch-a2c-ppo-acktr-gail and and Stable Baselines (https://github.com/hill-a/stable-baselines) Introduction to A2C: https://hackernoon.com/intuitive-rl-intro-to-advantage-actor-critic-a2c-4ff545978752 """ @dataclass class HParams(OnPolicyModel.HParams): """Hyper-parameters of the A2C Model. TODO: Set actual 'good' priors for these hyper-parameters, as these were set somewhat arbitrarily. (They do however use the same defaults as in SB3). """ # learning rate for the optimizer, it can be a function of the current # progress remaining (from 1 to 0) learning_rate: Union[float, Callable] = log_uniform(1e-7, 1e-2, default=7e-4) # The number of steps to run for each environment per update (i.e. batch size # is n_steps * n_env where n_env is number of environment copies running in # parallel) # NOTE: Default value here is much lower than in PPO, which might indicate # that this A2C is more "on-policy"? (i.e. that it requires data to be super # "fresh")? n_steps: int = uniform(3, 64, default=5, discrete=True) # Discount factor gamma: float = 0.99 # gamma: float = uniform(0.9, 0.9999, default=0.99) # Factor for trade-off of bias vs variance for Generalized Advantage Estimator. # Equivalent to classic advantage when set to 1. gae_lambda: float = 1.0 # gae_lambda: float = uniform(0.5, 1.0, default=1.0) # Entropy coefficient for the loss calculation ent_coef: float = 0.0 # ent_coef: float = uniform(0.0, 1.0, default=0.0) # Value function coefficient for the loss calculation vf_coef: float = 0.5 # vf_coef: float = uniform(0.01, 1.0, default=0.5) # The maximum value for the gradient clipping max_grad_norm: float = 0.5 # max_grad_norm: float = uniform(0.1, 10, default=0.5) # RMSProp epsilon. It stabilizes square root computation in denominator of # RMSProp update. rms_prop_eps: float = 1e-5 # rms_prop_eps: float = log_uniform(1e-7, 1e-3, default=1e-5) # Whether to use RMSprop (default) or Adam as optimizer use_rms_prop: bool = True # use_rms_prop: bool = categorical(True, False, default=True) # Whether to use generalized State Dependent Exploration (gSDE) instead of # action noise exploration (default: False) use_sde: bool = False # use_sde: bool = categorical(True, False, default=False) # Sample a new noise matrix every n steps when using gSDE. # Default: -1 (only sample at the beginning of the rollout) sde_sample_freq: int = -1 # sde_sample_freq: int = categorical(-1, 1, 5, 10, default=-1) # Whether to normalize or not the advantage normalize_advantage: bool = False # normalize_advantage: bool = categorical(True, False, default=False) # The log location for tensorboard (if None, no logging) tensorboard_log: Optional[str] = None # # Whether to create a second environment that will be used for evaluating the # # agent periodically. (Only available when passing string for the environment) # create_eval_env: bool = False # # Additional arguments to be passed to the policy on creation # policy_kwargs: Optional[Dict[str, Any]] = None # The verbosity level: 0 no output, 1 info, 2 debug verbose: int = 0 # Seed for the pseudo random generators seed: Optional[int] = None # Device (cpu, cuda, ...) on which the code should be run. # Setting it to auto, the code will be run on the GPU if possible. device: Union[torch.device, str] = "auto" # :param _init_setup_model: Whether or not to build the network at the # creation of the instance # _init_setup_model: bool = True @register_method @dataclass class A2CMethod(OnPolicyMethod): """Method that uses the A2C model from stable-baselines3.""" # changing the 'name' in this case here, because the default name would be # 'a_2_c'. name: ClassVar[str] = "a2c" Model: ClassVar[Type[A2CModel]] = A2CModel # Hyper-parameters of the A2C model. hparams: A2CModel.HParams = mutable_field(A2CModel.HParams) def configure(self, setting: ContinualRLSetting): super().configure(setting=setting) if setting.steps_per_phase: if self.hparams.n_steps > setting.steps_per_phase: self.hparams.n_steps = math.ceil(0.1 * setting.steps_per_phase) logger.info( f"Capping the n_steps to 10% of step budget length: " f"{self.hparams.n_steps}" ) # NOTE: We limit the number of trainign steps per task, such that we never # attempt to fill the buffer using more samples than the environment allows. self.train_steps_per_task = min( self.train_steps_per_task, setting.steps_per_phase - self.hparams.n_steps - 1, ) logger.info(f"Limitting training steps per task to {self.train_steps_per_task}") def create_model(self, train_env: gym.Env, valid_env: gym.Env) -> A2CModel: return self.Model(env=train_env, **self.hparams.to_dict()) def fit(self, train_env: gym.Env, valid_env: gym.Env): super().fit(train_env=train_env, valid_env=valid_env) def get_actions( self, observations: ContinualRLSetting.Observations, action_space: spaces.Space ) -> ContinualRLSetting.Actions: return super().get_actions( observations=observations, action_space=action_space, ) def on_task_switch(self, task_id: Optional[int]) -> None: """Called when switching tasks in a CL setting. If task labels are available, `task_id` will correspond to the index of the new task. Otherwise, if task labels aren't available, `task_id` will be `None`. todo: use this to customize how your method handles task transitions. """ super().on_task_switch(task_id=task_id) def get_search_space(self, setting: ContinualRLSetting) -> Mapping[str, Union[str, Dict]]: search_space = super().get_search_space(setting) if isinstance(setting.action_space, spaces.Discrete): # From stable_baselines3/common/base_class.py", line 170: # > Generalized State-Dependent Exploration (gSDE) can only be used with # continuous actions # Therefore we remove related entries in the search space, so they keep # their default values. search_space.pop("use_sde", None) search_space.pop("sde_sample_freq", None) return search_space if __name__ == "__main__": results = A2CMethod.main() print(results) ================================================ FILE: sequoia/methods/stable_baselines3_methods/a2c_test.py ================================================ from typing import ClassVar, Type from .a2c import A2CMethod, A2CModel from .base import BaseAlgorithm, StableBaselines3Method from .base_test import DiscreteActionSpaceMethodTests class TestA2C(DiscreteActionSpaceMethodTests): Method: ClassVar[Type[StableBaselines3Method]] = A2CMethod Model: ClassVar[Type[BaseAlgorithm]] = A2CModel ================================================ FILE: sequoia/methods/stable_baselines3_methods/base.py ================================================ """ Example of creating an A2C agent using the simplebaselines3 package. See https://stable-baselines3.readthedocs.io/en/master/guide/install.html """ from abc import ABC from dataclasses import dataclass from typing import Any, Callable, ClassVar, Dict, List, Mapping, Optional, Type, Union import gym import torch from gym import spaces from simple_parsing import choice, mutable_field from simple_parsing.helpers.hparams import HyperParameters, categorical, log_uniform from stable_baselines3.common.base_class import BaseAlgorithm, BasePolicy, MaybeCallback # from stable_baselines3.common.vec_env.obs_dict_wrapper import ObsDictWrapper from wandb.wandb_run import Run from sequoia.common.transforms.utils import is_image from sequoia.settings import Method, Setting from sequoia.settings.rl.continual import ContinualRLSetting from sequoia.utils.logging_utils import get_logger from sequoia.utils.serialization import register_decoding_fn logger = get_logger(__name__) # "Patch" the _wrap_env function of the BaseAlgorithm class of # stable_baselines, to make it recognize the VectorEnv from gym.vector as a # vectorized environment. # Stable-Baselines3 has a lot of duplicated code from openai gym # def _wrap_env(env: GymEnv, verbose: int = 0, monitor_wrapper: bool = True) -> VecEnv: # """ " # Wrap environment with the appropriate wrappers if needed. # For instance, to have a vectorized environment # or to re-order the image channels. # :param env: # :param verbose: # :param monitor_wrapper: Whether to wrap the env in a ``Monitor`` when possible. # :return: The wrapped environment. # """ # # if not isinstance(env, VecEnv): # if not ( # isinstance(env, (VecEnv, VectorEnv)) # or isinstance(env.unwrapped, (VecEnv, VectorEnv)) # ): # # if not is_wrapped(env, Monitor) and monitor_wrapper: # if monitor_wrapper and not ( # is_wrapped(env, Monitor) # or is_wrapped(env, gym.wrappers.Monitor) # or has_wrapper(env, gym.wrappers.Monitor) # ): # if verbose >= 1: # print("Wrapping the env with a `Monitor` wrapper") # env = Monitor(env) # if verbose >= 1: # print("Wrapping the env in a DummyVecEnv.") # env = DummyVecEnv([lambda: env]) # if is_image_space(env.observation_space) and not is_wrapped(env, VecTransposeImage): # if verbose >= 1: # print("Wrapping the env in a VecTransposeImage.") # env = VecTransposeImage(env) # # check if wrapper for dict support is needed when using HER # if isinstance(env.observation_space, gym.spaces.dict.Dict): # env = ObsDictWrapper(env) # return env # BaseAlgorithm._wrap_env = staticmethod(_wrap_env) class RemoveInfoWrapper(gym.Wrapper): """Wrapper used to remove the 'info' dict, since there seems to be a bug in sb3 whenever there is something in the 'info' dict. """ def step(self, action): obs, rewards, done, info = self.env.step(action) info = {} return obs, rewards, done, info @dataclass class SB3BaseHParams(HyperParameters): """Hyper-parameters of a model from the `stable_baselines3` package. The command-line arguments for these are created with simple-parsing. """ # The policy model to use (MlpPolicy, CnnPolicy, ...) policy: Optional[Union[str, Type[BasePolicy]]] = choice("MlpPolicy", "CnnPolicy", default=None) # # The base policy used by this method # policy_base: Type[BasePolicy] # learning rate for the optimizer, it can be a function of the current # progress remaining (from 1 to 0) learning_rate: Union[float, Callable] = log_uniform(1e-7, 1e-2, default=1e-4) # Additional arguments to be passed to the policy on creation policy_kwargs: Optional[Dict[str, Any]] = None # the log location for tensorboard (if None, no logging) tensorboard_log: Optional[str] = None # The verbosity level: 0 none, 1 training information, 2 debug verbose: int = 1 # Device on which the code should run. By default, it will try to use a Cuda # compatible device and fallback to cpu if it is not possible. device: Union[torch.device, str] = "auto" # # Whether the algorithm supports training with multiple environments (as in A2C) # support_multi_env: bool = False # Whether to create a second environment that will be used for evaluating # the agent periodically. (Only available when passing string for the # environment) create_eval_env: bool = False # # When creating an environment, whether to wrap it or not in a Monitor wrapper. # monitor_wrapper: bool = True # Seed for the pseudo random generators seed: Optional[int] = None # # Whether to use generalized State Dependent Exploration (gSDE) instead of # action noise exploration (default: False) # use_sde: bool = False # # Sample a new noise matrix every n steps when using gSDE Default: -1 # (only sample at the beginning of the rollout) # sde_sample_freq: int = -1 # Wether to clear the experience buffer at the beginning of a new task. # NOTE: We use to_dict here so that it doesn't get passed do the Policy class. clear_buffers_between_tasks: bool = categorical(True, False, default=False, to_dict=False) @dataclass class StableBaselines3Method(Method, ABC, target_setting=ContinualRLSetting): """Base class for the methods that use models from the stable_baselines3 repo. """ family: ClassVar[str] = "sb3" # Class variable that represents what kind of Model will be used. # (This is just here so we can easily create one Method class per model type # by just changing this class attribute.) Model: ClassVar[Type[BaseAlgorithm]] # HyperParameters of the Method. hparams: SB3BaseHParams = mutable_field(SB3BaseHParams) # The number of training steps to run per task. # NOTE: This shouldn't be set to more than the task length when applying this method # on a ContinualRLSetting, because we don't currently have a way of "resetting" # the nonstationarity in the environment, and there is only one task, # therefore if we trained for say 10 million steps, while the # non-stationarity only lasts for 10_000 steps, we'd have seen an almost # stationary distribution, since the environment would have stopped changing after # 10_000 steps. # train_steps_per_task: int = 10_000 # callback(s) called at every step with state of the algorithm. callback: MaybeCallback = None # The number of timesteps before logging. log_interval: int = 100 # the name of the run for TensorBoard logging tb_log_name: str = "run" # Evaluate the agent every ``eval_freq`` timesteps (this may vary a little) # TODO: Log the evaluations to wandb. eval_freq: int = 5_000 # Number of episode to evaluate the agent n_eval_episodes = 5 # Path to a folder where the evaluations will be saved eval_log_path: Optional[str] = None def __post_init__(self): self.model: Optional[BaseAlgorithm] = None # Extra wrappers to add to the train_env and valid_env before passing # them to the `learn` method from stable-baselines3. import operator from functools import partial from sequoia.common.gym_wrappers import TransformObservation, TransformReward self.extra_train_wrappers: List[Callable[[gym.Env], gym.Env]] = [ partial(TransformObservation, f=operator.itemgetter("x")), # partial(TransformAction, f=operator.itemgetter("y_pred"), partial(TransformReward, f=operator.itemgetter("y")), RemoveInfoWrapper, ] self.extra_valid_wrappers: List[Callable[[gym.Env], gym.Env]] = [ partial(TransformObservation, f=operator.itemgetter("x")), partial(TransformReward, f=operator.itemgetter("y")), RemoveInfoWrapper, ] # Number of timesteps to train on for each task. self.total_timesteps_per_task: int = 0 self.train_env: gym.Env = None self.valid_env: gym.Env = None def configure(self, setting: ContinualRLSetting): # Delete the model, if present. self.model = None # For now, we don't batch the space because stablebaselines3 will add an # additional batch dimension if we do. # TODO: Still need to debug the batching stuff with stablebaselines, # some methods support it, some don't, and it doesn't recognize # VectorEnvs from gym. setting.batch_size = None # BUG: Need to fix an issue when using the CnnPolicy and Atary envs, the # input shape isn't what they expect (only 2 channels instead of three # apparently.) # from sequoia.common.transforms import Transforms # NOTE: Important to not use any transforms, since the SB3 methods want to get # the 'raw' np.uint8 image as an input. transforms = [ # Transforms.to_tensor, # Transforms.three_channels, # Transforms.channels_first_if_needed, ] setting.transforms = transforms setting.train_transforms = transforms setting.val_transforms = transforms setting.test_transforms = transforms if self.hparams.policy is None: if is_image(setting.observation_space.x): self.hparams.policy = "CnnPolicy" else: self.hparams.policy = "MlpPolicy" logger.debug(f"Will use {self.hparams.policy} as the policy.") # TODO: Double check that some settings might not impose a limit on # number of training steps per environment (e.g. task-incremental RL?) if setting.steps_per_phase: # if self.train_steps_per_task > setting.steps_per_phase: # warnings.warn( # RuntimeWarning( # f"Can't train for the requested {self.train_steps_per_task} " # f"steps, since we're (currently) only allowed a maximum of " # f"{setting.steps_per_phase} steps.)" # ) # ) # Use as many training steps as possible. self.train_steps_per_task = setting.steps_per_phase - 1 # Otherwise, we can train basically as long as we want on each task. def create_model(self, train_env: gym.Env, valid_env: gym.Env) -> BaseAlgorithm: """Create a Model given the training and validation environments.""" model_kwargs = self.hparams.to_dict() assert "clear_buffers_between_tasks" not in model_kwargs return self.Model(env=train_env, **model_kwargs) def fit(self, train_env: gym.Env, valid_env: gym.Env): # Remove the extra information that the Setting gives us. for wrapper in self.extra_train_wrappers: train_env = wrapper(train_env) for wrapper in self.extra_valid_wrappers: valid_env = wrapper(valid_env) if self.model is None: self.model = self.create_model(train_env, valid_env) else: # TODO: "Adapt"/re-train the model on the new environment. # BUG: In the MT10 benchmark, the last entry in the observation space is # very slightly different, which prevents us from doing this: """ >>> env.observation_space.low array([-0.525 , 0.348 , -0.0525, -1. , -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -0.525 , 0.348 , -0.0525, -1., -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -0.1 , 0.8 , 0.01 ], dtype=float32) >>> observation_space.low array([-0.525 , 0.348 , -0.0525, -1. , -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -0.525 , 0.348 , -0.0525, -1., -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -0.1 , 0.8 , 0.05 ], dtype=float32) """ if self.train_env is not None: # BUG: MT10 has *slightly* different values in 'low' between tasks! if ( isinstance(train_env.observation_space, spaces.Box) and train_env.observation_space.shape[-1] == 39 ): train_env.observation_space = self.train_env.observation_space self.model.set_env(train_env) self.train_env = train_env self.valid_env = valid_env # Decide how many steps to train on. total_timesteps = self.train_steps_per_task # TODO: Get the max number of steps directly from the env, rather than from the # setting's fields. logger.info(f"Starting training, for a maximum of {total_timesteps} steps.") # todo: Customize the parametrers of the model and/or of this "learn" # method if needed. self.model = self.model.learn( # The total number of samples (env steps) to train on total_timesteps=total_timesteps, eval_env=valid_env, callback=self.callback, log_interval=self.log_interval, tb_log_name=self.tb_log_name, eval_freq=self.eval_freq, n_eval_episodes=self.n_eval_episodes, eval_log_path=self.eval_log_path, # whether or not to reset the current timestep number (used in logging) reset_num_timesteps=True, ) def get_actions( self, observations: ContinualRLSetting.Observations, action_space: spaces.Space ) -> ContinualRLSetting.Actions: obs = observations.x predictions = self.model.predict(obs) action, _ = predictions assert action in action_space, (observations, action, action_space) return action def get_search_space(self, setting: Setting) -> Mapping[str, Union[str, Dict]]: """Returns the search space to use for HPO in the given Setting. Parameters ---------- setting : Setting The Setting on which the run of HPO will take place. Returns ------- Mapping[str, Union[str, Dict]] An orion-formatted search space dictionary, mapping from hyper-parameter names (str) to their priors (str), or to nested dicts of the same form. """ return { "algo_hparams": self.hparams.get_orion_space(), } def adapt_to_new_hparams(self, new_hparams: Dict[str, Any]) -> None: """Adapts the Method when it receives new Hyper-Parameters to try for a new run. It is required that this method be implemented if you want to perform HPO sweeps with Orion. Parameters ---------- new_hparams : Dict[str, Any] The new hyper-parameters being recommended by the HPO algorithm. These will have the same structure as the search space. """ # Here we overwrite the corresponding attributes with the new suggested values # leaving other fields unchanged. # NOTE: These new hyper-paramers will be used in the next run in the sweep, # since each call to `configure` will create a new Model. self.hparams = self.hparams.replace(**new_hparams["algo_hparams"]) def setup_wandb(self, run: Run) -> None: """Called by the Setting when using Weights & Biases, after `wandb.init`. This method is here to provide Methods with the opportunity to log some of their configuration options or hyper-parameters to wandb. NOTE: The Setting has already set the `"setting"` entry in the `wandb.config` by this point. Parameters ---------- run : wandb.Run Current wandb Run. """ run.config["hparams"] = self.hparams.to_dict() def on_task_switch(self, task_id: Optional[int]) -> None: """Called when switching tasks in a CL setting. If task labels are available, `task_id` will correspond to the index of the new task. Otherwise, if task labels aren't available, `task_id` will be `None`. todo: use this to customize how your method handles task transitions. """ if self.hparams.clear_buffers_between_tasks: self.clear_buffers() def clear_buffers(self): """Clears out the experience buffer of the Policy.""" # I think that's the right way to do it.. not sure. # assert False, self.model.replay_buffer.pos if self.model: # TODO: These are really interesting methods! # self.model.save_replay_buffer # self.model.load_replay_buffer self.model.replay_buffer.reset() # We do this just to prevent errors when trying to decode the hparams class above, and # also to silence the related warnings from simple-parsing's decoding.py module. register_decoding_fn(Type[BasePolicy], lambda v: v) register_decoding_fn(Callable, lambda v: v) ================================================ FILE: sequoia/methods/stable_baselines3_methods/base_test.py ================================================ from inspect import Parameter, Signature, getsourcefile, signature from typing import ClassVar, Dict, Type import pytest from stable_baselines3.common.off_policy_algorithm import OffPolicyAlgorithm from stable_baselines3.common.on_policy_algorithm import OnPolicyAlgorithm from sequoia.common.config import Config from sequoia.conftest import monsterkong_required from sequoia.methods.method_test import MethodTests from sequoia.settings.base import Results from sequoia.settings.rl import DiscreteTaskAgnosticRLSetting, IncrementalRLSetting, RLSetting from .base import BaseAlgorithm, StableBaselines3Method # @pytest.mark.parametrize( # "MethodType, AlgoType", # [ # (OnPolicyMethod, OnPolicyAlgorithm), # (OffPolicyMethod, OffPolicyAlgorithm), # (A2CMethod, A2C), # (DDPGMethod, DDPG), # (PPOMethod, PPO), # (DQNMethod, DQN), # (TD3Method, TD3), # (SACMethod, SAC), # ], # ) class StableBaselines3MethodTests(MethodTests): Method: ClassVar[Type[StableBaselines3Method]] = StableBaselines3Method Model: ClassVar[Type[BaseAlgorithm]] SB3_Algo: ClassVar[Type[BaseAlgorithm]] debug_kwargs: ClassVar[Dict] = {} @pytest.mark.parametrize("clear_buffers", [False, True]) def test_clear_buffers_between_tasks(self, clear_buffers: bool, config: Config): setting_kwargs = dict( nb_tasks=2, train_steps_per_task=1_000, test_steps_per_task=1_000, config=config, ) setting_kwargs.update(self.setting_kwargs) setting = DiscreteTaskAgnosticRLSetting(**setting_kwargs) setting.setup() assert setting.train_max_steps == 2_000 assert setting.test_max_steps == 2_000 method = self.Method(hparams=self.Model.HParams(clear_buffers_between_tasks=clear_buffers)) method.configure(setting) method.fit( train_env=setting.train_dataloader(), valid_env=setting.val_dataloader(), ) assert method.hparams.clear_buffers_between_tasks == clear_buffers # TODO: Not clear how to check the length of the replay buffer! length_before_task_switch = get_current_length_of_replay_buffer(method.model) method.on_task_switch(task_id=1) if clear_buffers: assert get_current_length_of_replay_buffer(method.model) == 0 else: assert get_current_length_of_replay_buffer(method.model) == length_before_task_switch def test_hparams_have_same_defaults_as_in_sb3( self, ): hparams = self.Model.HParams() AlgoType = [ cls for cls in self.Model.mro() if cls.__module__.startswith("stable_baselines3") ][0] sig: Signature = signature(AlgoType.__init__) for attr_name, value_in_hparams in hparams.to_dict().items(): params_names = list(sig.parameters.keys()) assert attr_name in params_names, f"Hparams has extra field {attr_name}" algo_constructor_parameter = sig.parameters[attr_name] sb3_default = algo_constructor_parameter.default if sb3_default is Parameter.empty: continue if attr_name in "verbose": continue # ignore the default value of the 'verbose' param which we change. if ( attr_name == "train_freq" and isinstance(sb3_default, tuple) and len(sb3_default) == 2 ): # Convert the default of (1, "steps") to 1, since that's the format we use. if sb3_default[1] == "step": sb3_default = sb3_default[0] if isinstance(value_in_hparams, list): value_in_hparams = tuple(value_in_hparams) assert value_in_hparams == sb3_default, ( f"{self.Method.__name__} in Sequoia has different default value for " f"hyper-parameter '{attr_name}' than in SB3: \n" f"\t{value_in_hparams} != {sb3_default}\n" f"Path to sequoia implementation: {getsourcefile(self.Method)}\n" f"Path to SB3 implementation: {getsourcefile(AlgoType)}\n" ) @classmethod @pytest.fixture def method(cls, config: Config) -> StableBaselines3Method: """Fixture that returns the Method instance to use when testing/debugging.""" return cls.Method(**cls.debug_kwargs) def validate_results( self, setting: RLSetting, method: StableBaselines3Method, results: RLSetting.Results, ) -> None: assert results assert results.objective # TODO: Set some 'reasonable' bounds on the performance here, depending on the # setting/dataset. def test_debug(self, method: StableBaselines3Method, setting: RLSetting, config: Config): results: Results = setting.apply(method, config=config) assert results.objective is not None print(results.summary()) self.validate_results(setting=setting, method=method, results=results) class DiscreteActionSpaceMethodTests(StableBaselines3MethodTests): debug_kwargs: ClassVar[Dict] = {} expected_debug_mean_episode_reward: ClassVar[float] = 135 setting_kwargs: ClassVar[str] = {"dataset": "CartPole-v0"} @pytest.mark.timeout(120) @monsterkong_required def test_monsterkong(self): method = self.Method(**self.debug_kwargs) setting = IncrementalRLSetting( dataset="monsterkong", nb_tasks=2, train_steps_per_task=1_000, test_steps_per_task=1_000, ) results: IncrementalRLSetting.Results = setting.apply(method, config=Config(debug=True)) print(results.summary()) from functools import singledispatch from stable_baselines3.common.buffers import RolloutBuffer @singledispatch def get_current_length_of_replay_buffer(algo: BaseAlgorithm) -> int: """Returns the current length of the replay buffer of the given Algorithm.""" raise NotImplementedError(algo) @get_current_length_of_replay_buffer.register def _(algo: OffPolicyAlgorithm): return algo.replay_buffer.pos @get_current_length_of_replay_buffer.register def _(algo: OnPolicyAlgorithm): rollout_buffer: RolloutBuffer return algo.rollout_buffer.pos class ContinuousActionSpaceMethodTests(StableBaselines3MethodTests): setting_kwargs: ClassVar[str] = {"dataset": "MountainCarContinuous-v0"} ================================================ FILE: sequoia/methods/stable_baselines3_methods/ddpg.py ================================================ """ Method that uses the DDPG model from stable-baselines3 and targets the RL settings in the tree. """ from dataclasses import dataclass from typing import Callable, ClassVar, Optional, Type, Union import gym from gym import spaces from simple_parsing import mutable_field from stable_baselines3.common.off_policy_algorithm import TrainFreq from stable_baselines3.ddpg import DDPG from sequoia.common.hparams import log_uniform from sequoia.methods import register_method from sequoia.settings.rl import ContinualRLSetting from sequoia.utils.logging_utils import get_logger from .off_policy_method import OffPolicyMethod, OffPolicyModel logger = get_logger(__name__) class DDPGModel(DDPG, OffPolicyModel): """Customized version of the DDPG model from stable-baselines-3.""" @dataclass class HParams(OffPolicyModel.HParams): """Hyper-parameters of the DDPG Model.""" # TODO: Add hparams specific to DDPG here. # The learning rate, it can be a function of the current progress (from # 1 to 0) learning_rate: Union[float, Callable] = log_uniform(1e-6, 1e-2, default=1e-3) # The verbosity level: 0 none, 1 training information, 2 debug verbose: int = 0 train_freq: TrainFreq = TrainFreq(frequency=1, unit="episode") # Minibatch size for each gradient update batch_size: int = 100 # How many gradient steps to do after each rollout (see ``train_freq`` # and ``n_episodes_rollout``) Set to ``-1`` means to do as many gradient # steps as steps done in the environment during the rollout. gradient_steps: int = -1 # gradient_steps: int = categorical(1, -1, default=-1) @register_method @dataclass class DDPGMethod(OffPolicyMethod): """Method that uses the DDPG model from stable-baselines3.""" Model: ClassVar[Type[DDPGModel]] = DDPGModel # Hyper-parameters of the DDPG model. hparams: DDPGModel.HParams = mutable_field(DDPGModel.HParams) # Approximate limit on the size of the replay buffer, in megabytes. max_buffer_size_megabytes: float = 2_048.0 def configure(self, setting: ContinualRLSetting): super().configure(setting) def create_model(self, train_env: gym.Env, valid_env: gym.Env) -> DDPGModel: return self.Model(env=train_env, **self.hparams.to_dict()) def fit(self, train_env: gym.Env, valid_env: gym.Env): super().fit(train_env=train_env, valid_env=valid_env) def get_actions( self, observations: ContinualRLSetting.Observations, action_space: spaces.Space ) -> ContinualRLSetting.Actions: return super().get_actions( observations=observations, action_space=action_space, ) def on_task_switch(self, task_id: Optional[int]) -> None: """Called when switching tasks in a CL setting. If task labels are available, `task_id` will correspond to the index of the new task. Otherwise, if task labels aren't available, `task_id` will be `None`. todo: use this to customize how your method handles task transitions. """ super().on_task_switch(task_id=task_id) if __name__ == "__main__": results = DDPGMethod.main() print(results) ================================================ FILE: sequoia/methods/stable_baselines3_methods/ddpg_test.py ================================================ from typing import ClassVar, Type import pytest from .base import BaseAlgorithm, StableBaselines3Method from .base_test import ContinuousActionSpaceMethodTests from .ddpg import DDPGMethod, DDPGModel @pytest.mark.timeout(60) class TestDDPG(ContinuousActionSpaceMethodTests): Method: ClassVar[Type[StableBaselines3Method]] = DDPGMethod Model: ClassVar[Type[BaseAlgorithm]] = DDPGModel ================================================ FILE: sequoia/methods/stable_baselines3_methods/dqn.py ================================================ """ Method that uses the DQN model from stable-baselines3 and targets the RL settings in the tree. """ from dataclasses import dataclass from typing import Callable, ClassVar, Optional, Type, Union import gym from gym import spaces from simple_parsing import mutable_field from simple_parsing.helpers.hparams import log_uniform, uniform from stable_baselines3.dqn import DQN from sequoia.common.hparams import categorical from sequoia.common.transforms import ChannelsFirst from sequoia.methods import register_method from sequoia.settings.rl import ContinualRLSetting from sequoia.utils.logging_utils import get_logger from .off_policy_method import OffPolicyMethod, OffPolicyModel logger = get_logger(__name__) class DQNModel(DQN, OffPolicyModel): """Customized version of the DQN model from stable-baselines-3.""" @dataclass class HParams(OffPolicyModel.HParams): """Hyper-parameters of the DQN model from `stable_baselines3`. The command-line arguments for these are created with simple-parsing. """ # ------------------ # overwritten hparams # The learning rate, it can be a function of the current progress (from # 1 to 0) learning_rate: Union[float, Callable] = log_uniform(1e-6, 1e-2, default=1e-4) # size of the replay buffer buffer_size: int = uniform(100_000, 10_000_000, default=1_000_000) # -------------------- # How many steps of the model to collect transitions for before learning # starts. learning_starts: int = 50_000 # Minibatch size for each gradient update batch_size: int = 32 # Update the model every ``train_freq`` steps. Set to `-1` to disable. train_freq: int = 4 # train_freq: int = categorical(1, 10, 100, 1_000, 10_000, default=4) # The soft update coefficient ("Polyak update", between 0 and 1) default # 1 for hard update tau: float = 1.0 # tau: float = uniform(0., 1., default=1.0) # Update the target network every ``target_update_interval`` environment # steps. target_update_interval: int = categorical(1, 10, 100, 1_000, 10_000, default=10_000) # Fraction of entire training period over which the exploration rate is # reduced. exploration_fraction: float = 0.1 # exploration_fraction: float = uniform(0.05, 0.3, default=0.1) # Initial value of random action probability. exploration_initial_eps: float = 1.0 # exploration_initial_eps: float = uniform(0.5, 1.0, default=1.0) # final value of random action probability. exploration_final_eps: float = 0.05 # exploration_final_eps: float = uniform(0, 0.1, default=0.05) # The maximum value for the gradient clipping. max_grad_norm: float = 10 # max_grad_norm: float = uniform(1, 100, default=10) def train(self, gradient_steps: int, batch_size: int = 100) -> None: super().train(gradient_steps, batch_size=batch_size) @register_method @dataclass class DQNMethod(OffPolicyMethod): """Method that uses a DQN model from the stable-baselines3 package.""" Model: ClassVar[Type[DQNModel]] = DQNModel # Hyper-parameters of the DQN model. hparams: DQNModel.HParams = mutable_field(DQNModel.HParams) # Approximate limit on the size of the replay buffer, in megabytes. max_buffer_size_megabytes: float = 1_024 * 10.0 def configure(self, setting: ContinualRLSetting): super().configure(setting) # NOTE: Need to change some attributes depending on the maximal number of steps # in the environment allowed in the given Setting. if setting.steps_per_phase: ten_percent_of_step_budget = setting.steps_per_phase // 10 if self.hparams.target_update_interval > ten_percent_of_step_budget: # Same for the 'update target network' interval. self.hparams.target_update_interval = ten_percent_of_step_budget // 2 logger.info( f"Reducing the target network update interval to " f"{self.hparams.target_update_interval}, because of the limit on " f"training steps imposed by the Setting." ) def create_model(self, train_env: gym.Env, valid_env: gym.Env) -> DQNModel: return self.Model(env=train_env, **self.hparams.to_dict()) def fit(self, train_env: gym.Env, valid_env: gym.Env): super().fit(train_env=train_env, valid_env=valid_env) def get_actions( self, observations: ContinualRLSetting.Observations, action_space: spaces.Space ) -> ContinualRLSetting.Actions: obs = observations.x # Temp fix for monsterkong and DQN: if obs.shape == (64, 64, 3): obs = ChannelsFirst.apply(obs) predictions = self.model.predict(obs) action, _ = predictions assert action in action_space, (observations, action, action_space) return action def on_task_switch(self, task_id: Optional[int]) -> None: """Called when switching tasks in a CL setting. If task labels are available, `task_id` will correspond to the index of the new task. Otherwise, if task labels aren't available, `task_id` will be `None`. todo: use this to customize how your method handles task transitions. """ super().on_task_switch(task_id=task_id) if __name__ == "__main__": results = DQNMethod.main() print(results) ================================================ FILE: sequoia/methods/stable_baselines3_methods/dqn_test.py ================================================ from typing import ClassVar, Dict, Type import numpy as np import pytest from gym import spaces from sequoia.common.config import Config from sequoia.common.spaces import Image from sequoia.settings.rl import IncrementalRLSetting from .base import BaseAlgorithm, StableBaselines3Method from .base_test import DiscreteActionSpaceMethodTests from .dqn import DQNMethod, DQNModel from .off_policy_method_test import OffPolicyMethodTests class TestDQN(DiscreteActionSpaceMethodTests, OffPolicyMethodTests): Method: ClassVar[Type[StableBaselines3Method]] = DQNMethod Model: ClassVar[Type[BaseAlgorithm]] = DQNModel debug_kwargs: ClassVar[Dict] = {} # TODO: Maybe this is because of the buffer isn't filled up enough with the short # number of allowed steps? @pytest.mark.xfail(reason="DQN really sucks on cartpole?") def test_classic_control_state(self, config: Config): super().test_classic_control_state(config=config) @pytest.mark.xfail(reason="DQN really sucks on cartpole?") def test_incremental_classic_control_state(self, config: Config): super().test_incremental_classic_control_state(config=config) def test_dqn_monsterkong_adds_channel_first_transform(self): method = self.Method(**self.debug_kwargs) setting = IncrementalRLSetting( dataset="monsterkong", nb_tasks=2, train_steps_per_task=1_000, test_steps_per_task=1_000, ) assert setting.train_max_steps == 2_000 assert setting.test_max_steps == 2_000 assert setting.nb_tasks == 2 assert setting.observation_space.x == Image(0, 255, shape=(64, 64, 3), dtype=np.uint8) assert setting.observation_space.task_labels.n == 2 # assert setting.observation_space == TypedDictSpace( # spaces={ # "x": Image(0, 255, shape=(64, 64, 3), dtype=np.uint8), # "task_labels": Sparse(spaces.Discrete(2), sparsity=0.5), # "done": Sparse(spaces.Box(False, True, (), dtype=np.bool), sparsity=1), # }, # dtype=setting.Observations, # ) assert setting.observation_space.dtype is setting.Observations assert setting.action_space == spaces.Discrete(6) # monsterkong has 6 actions. # (Before the method gets to change the Setting): # By default the setting gives the same shape of obs as the underlying env. for env_method in [ setting.train_dataloader, setting.val_dataloader, setting.test_dataloader, ]: print(f"Testing method {env_method.__name__}") with env_method() as env: reset_obs = env.reset() # TODO: Fix this so the 'x' space actually gets tensor support. # assert reset_obs in env.observation_space assert reset_obs.numpy() in env.observation_space assert reset_obs.x.shape == (64, 64, 3) # Let the Method configure itself on the Setting: method.configure(setting) # (After the method gets to change the Setting): for env_method in [ setting.train_dataloader, setting.val_dataloader, setting.test_dataloader, ]: with env_method() as env: reset_obs = env.reset() # Fix this numpy bug. assert reset_obs.numpy() in env.observation_space assert reset_obs.x.shape == (64, 64, 3) ================================================ FILE: sequoia/methods/stable_baselines3_methods/off_policy_method.py ================================================ """ Base class used to not duplicate the tweaks made all the off-policy algos from SB3. """ import math import warnings from abc import ABC from dataclasses import dataclass from typing import Any, Callable, ClassVar, Optional, Type, Union import gym from gym import spaces from gym.spaces.utils import flatten_space from simple_parsing import mutable_field from simple_parsing.helpers.serialization import register_decoding_fn from stable_baselines3.common.off_policy_algorithm import OffPolicyAlgorithm, TrainFreq from sequoia.common.hparams import log_uniform, uniform from sequoia.settings.rl import ContinualRLSetting from sequoia.utils.logging_utils import get_logger from .base import SB3BaseHParams, StableBaselines3Method logger = get_logger(__name__) def decode_trainfreq(v: Any): if isinstance(v, list) and len(v) == 2: return TrainFreq(v[0], v[1]) return v register_decoding_fn(TrainFreq, decode_trainfreq) class OffPolicyModel(OffPolicyAlgorithm, ABC): """Tweaked version of the OffPolicyAlgorithm from SB3.""" @dataclass class HParams(SB3BaseHParams): """Hyper-parameters common to all off-policy algos from SB3.""" # The learning rate, it can be a function of the current progress (from # 1 to 0) learning_rate: Union[float, Callable] = log_uniform(1e-6, 1e-2, default=1e-4) # size of the replay buffer buffer_size: int = uniform(100, 10_000_000, default=1_000_000) # How many steps of the model to collect transitions for before learning # starts. learning_starts: int = 100 # Minibatch size for each gradient update batch_size: int = 256 # batch_size: int = categorical(1, 2, 4, 8, 16, 32, 128, default=32) # The soft update coefficient ("Polyak update", between 0 and 1) default # 1 for hard update tau: float = 0.005 # tau: float = uniform(0., 1., default=1.0) # The discount factor gamma: float = 0.99 # gamma: float = uniform(0.9, 0.9999, default=0.99) # Update the model every ``train_freq`` steps. Set to `-1` to disable. train_freq: int = 1 # train_freq: int = categorical(1, 10, 100, 1_000, 10_000, default=10) # How many gradient steps to do after each rollout (see ``train_freq`` # and ``n_episodes_rollout``) Set to ``-1`` means to do as many gradient # steps as steps done in the environment during the rollout. gradient_steps: int = 1 # gradient_steps: int = categorical(1, -1, default=1) # Enable a memory efficient variant of the replay buffer at a cost of # more complexity. # See https://github.com/DLR-RM/stable-baselines3/issues/37#issuecomment-637501195 optimize_memory_usage: bool = False # Whether to create a second environment that will be used for # evaluating the agent periodically. (Only available when passing string # for the environment) create_eval_env: bool = False # The verbosity level: 0 no output, 1 info, 2 debug verbose: int = 1 @dataclass class OffPolicyMethod(StableBaselines3Method, ABC): """ABC for a Method that uses an off-policy Algorithm from SB3.""" # Type of model to use. This has to be overwritten in a subclass. Model: ClassVar[Type[OffPolicyModel]] = OffPolicyModel # Hyper-parameters of the DDPG model. hparams: OffPolicyModel.HParams = mutable_field(OffPolicyModel.HParams) # Approximate limit on the size of the replay buffer, in megabytes. max_buffer_size_megabytes: float = 2_048.0 def __post_init__(self): super().__post_init__() self.model: OffPolicyAlgorithm def configure(self, setting: ContinualRLSetting): super().configure(setting) # The default value for the buffer size in the DQN model is WAY too # large, so we re-size it depending on the size of the observations. # NOTE: (issue #156) Only consider the images, not the task labels for these # buffer size calculations (since the task labels might be None and have the # np.object dtype). x_space = setting.observation_space.x flattened_observation_space = flatten_space(x_space) observation_size_bytes = flattened_observation_space.sample().nbytes # IF there are more than a few dimensions per observation, then we # should probably reduce the size of the replay buffer according to # the size of the observations. max_buffer_size_bytes = self.max_buffer_size_megabytes * 1024 * 1024 max_buffer_length = max_buffer_size_bytes // observation_size_bytes if max_buffer_length == 0: raise RuntimeError( f"Couldn't even fit a single observation in the buffer, " f"given the specified max_buffer_size_megabytes " f"({self.max_buffer_size_megabytes}) and the size of a " f"single observation ({observation_size_bytes} bytes)!" ) if self.hparams.buffer_size > max_buffer_length: calculated_size_bytes = observation_size_bytes * self.hparams.buffer_size calculated_size_gb = calculated_size_bytes / 1024**3 warnings.warn( RuntimeWarning( f"The selected buffer size ({self.hparams.buffer_size} is " f"too large! (It would take roughly around " f"{calculated_size_gb:.3f}Gb to hold many observations alone! " f"The buffer size will be capped at {max_buffer_length} " f"entries." ) ) self.hparams.buffer_size = int(max_buffer_length) # NOTE: Need to change some attributes depending on the maximal number of steps # in the environment allowed in the given Setting. if setting.train_max_steps: logger.info( f"Total training steps are limited to {setting.train_steps_per_task} " f"steps per task, {setting.train_max_steps} steps in total." ) ten_percent_of_step_budget = setting.steps_per_phase // 10 if self.hparams.buffer_size > ten_percent_of_step_budget: warnings.warn( RuntimeWarning("Reducing max buffer size to ten percent of the step budget.") ) self.hparams.buffer_size = ten_percent_of_step_budget if self.hparams.learning_starts > ten_percent_of_step_budget: logger.info( f"The model was originally going to use the first " f"{self.hparams.learning_starts} steps for pure random " f"exploration, but the setting has a max number of steps set to " f"{setting.train_max_steps}, therefore we will limit the number of " f"exploration steps to 10% of that 'step budget' = " f"{ten_percent_of_step_budget} steps." ) self.hparams.learning_starts = ten_percent_of_step_budget if self.hparams.train_freq != -1 and isinstance(self.hparams.train_freq, int): # Update the model at least 2 times during each task, and at most # once per step. self.hparams.train_freq = min( self.hparams.train_freq, int(0.5 * ten_percent_of_step_budget), ) self.hparams.train_freq = max(self.hparams.train_freq, 1) logger.info(f"Training frequency: {self.hparams.train_freq}") logger.info(f"Will use a Replay buffer of size {self.hparams.buffer_size}.") if setting.steps_per_phase: if not isinstance(self.hparams.train_freq, int): if self.hparams.train_freq[1] == "step": self.hparams.train_freq = self.hparams.train_freq[0] else: assert self.hparams.train_freq[1] == "episode" # Use some value based of the maximum episode length if available, # else use a "reasonable" default value. # TODO: Double-check that this makes sense. if setting.max_episode_steps: self.hparams.train_freq = setting.max_episode_steps else: self.hparams.train_freq = 10 warnings.warn( RuntimeWarning( f"Need the training frequency units to be steps for now! " f"(Train freq has been changed to every " f"{self.hparams.train_freq} steps)." ) ) # NOTE: We limit the number of training steps per task, such that we never # attempt to fill the buffer using more samples than the environment allows. if self.hparams.train_freq > setting.steps_per_phase: self.hparams.n_steps = math.ceil(0.1 * setting.steps_per_phase) logger.info( f"Capping the n_steps to 10% of step budget length: " f"{self.hparams.n_steps}" ) self.train_steps_per_task = min( self.train_steps_per_task, setting.steps_per_phase - self.hparams.train_freq - 1, ) logger.info(f"Limitting training steps per task to {self.train_steps_per_task}") def create_model(self, train_env: gym.Env, valid_env: gym.Env) -> OffPolicyModel: return self.Model(env=train_env, **self.hparams.to_dict()) def fit(self, train_env: gym.Env, valid_env: gym.Env): super().fit(train_env=train_env, valid_env=valid_env) def get_actions( self, observations: ContinualRLSetting.Observations, action_space: spaces.Space ) -> ContinualRLSetting.Actions: return super().get_actions( observations=observations, action_space=action_space, ) def on_task_switch(self, task_id: Optional[int]) -> None: """Called when switching tasks in a CL setting. If task labels are available, `task_id` will correspond to the index of the new task. Otherwise, if task labels aren't available, `task_id` will be `None`. todo: use this to customize how your method handles task transitions. """ super().on_task_switch(task_id=task_id) def clear_buffers(self): """Clears out the experience buffer of the Policy.""" # I think that's the right way to do it.. not sure. if self.model: # TODO: These are really interesting methods! # self.model.save_replay_buffer # self.model.load_replay_buffer self.model.replay_buffer.reset() ================================================ FILE: sequoia/methods/stable_baselines3_methods/off_policy_method_test.py ================================================ from typing import ClassVar, Dict, Type from .off_policy_method import OffPolicyAlgorithm, OffPolicyMethod class OffPolicyMethodTests: Method: ClassVar[Type[OffPolicyMethod]] Model: ClassVar[Type[OffPolicyAlgorithm]] debug_dataset: ClassVar[str] debug_kwargs: ClassVar[Dict] = {} ================================================ FILE: sequoia/methods/stable_baselines3_methods/on_policy_method.py ================================================ """ Base class used to not duplicate the tweaks made all the on-policy algos from SB3. """ import math import warnings from abc import ABC from dataclasses import dataclass from typing import Callable, ClassVar, Dict, Mapping, Optional, Type, Union import gym import torch from gym import spaces from simple_parsing import mutable_field from stable_baselines3.common.on_policy_algorithm import OnPolicyAlgorithm from sequoia.common.hparams import log_uniform, uniform from sequoia.settings.rl import ContinualRLSetting from sequoia.utils.logging_utils import get_logger from .base import SB3BaseHParams, StableBaselines3Method logger = get_logger(__name__) class OnPolicyModel(OnPolicyAlgorithm, ABC): """Tweaked version of the OnPolicyAlgorithm from SB3.""" @dataclass class HParams(SB3BaseHParams): """Hyper-parameters common to all on-policy algos from SB3.""" # learning rate for the optimizer, it can be a function of the current # progress remaining (from 1 to 0) learning_rate: Union[float, Callable] = log_uniform(1e-7, 1e-2, default=1e-3) # The number of steps to run for each environment per update (i.e. batch size # is n_steps * n_env where n_env is number of environment copies running in # parallel) # NOTE: Default value here is much lower than in PPO, which might indicate # that this A2C is more "on-policy"? (i.e. that it requires data to be super # "fresh")? n_steps: int = uniform(3, 64, default=5, discrete=True) # Discount factor gamma: float = 0.99 # gamma: float = uniform(0.9, 0.9999, default=0.99) # Factor for trade-off of bias vs variance for Generalized Advantage Estimator. # Equivalent to classic advantage when set to 1. gae_lambda: float = 1.0 # gae_lambda: float = uniform(0.5, 1.0, default=1.0) # Entropy coefficient for the loss calculation ent_coef: float = 0.0 # ent_coef: float = uniform(0.0, 1.0, default=0.0) # Value function coefficient for the loss calculation vf_coef: float = 0.5 # vf_coef: float = uniform(0.01, 1.0, default=0.5) # The maximum value for the gradient clipping max_grad_norm: float = 0.5 # max_grad_norm: float = uniform(0.1, 10, default=0.5) # Whether to use generalized State Dependent Exploration (gSDE) instead of # action noise exploration (default: False) use_sde: bool = False # use_sde: bool = categorical(True, False, default=False) # Sample a new noise matrix every n steps when using gSDE. # Default: -1 (only sample at the beginning of the rollout) sde_sample_freq: int = -1 # sde_sample_freq: int = categorical(-1, 1, 5, 10, default=-1) # The log location for tensorboard (if None, no logging) tensorboard_log: Optional[str] = None # # Whether to create a second environment that will be used for evaluating the # # agent periodically. (Only available when passing string for the environment) # create_eval_env: bool = False # # Additional arguments to be passed to the policy on creation # policy_kwargs: Optional[Dict[str, Any]] = None # The verbosity level: 0 no output, 1 info, 2 debug verbose: int = 1 # Seed for the pseudo random generators seed: Optional[int] = None # Device (cpu, cuda, ...) on which the code should be run. # Setting it to auto, the code will be run on the GPU if possible. device: Union[torch.device, str] = "auto" # :param _init_setup_model: Whether or not to build the network at the # creation of the instance # _init_setup_model: bool = True @dataclass class OnPolicyMethod(StableBaselines3Method, ABC): """Method that uses the A2C model from stable-baselines3.""" Model: ClassVar[Type[OnPolicyModel]] = OnPolicyModel # Hyper-parameters of the model/algorithm. hparams: OnPolicyModel.HParams = mutable_field(OnPolicyModel.HParams) def configure(self, setting: ContinualRLSetting): super().configure(setting=setting) if setting.steps_per_phase: min_model_updates = 20 if self.hparams.n_steps > setting.steps_per_phase // min_model_updates: # Set the number of steps per update so that there are *at least* # `min_model_updates` model updates during a single `fit` call. new_n_steps = math.ceil(setting.steps_per_phase / min_model_updates) warnings.warn( RuntimeWarning( f"Capping the number of steps per update to {new_n_steps}, in " f"order to update the model at least {min_model_updates} " f"times per phase (call to `fit`)." ) ) assert new_n_steps > 1 self.hparams.n_steps = new_n_steps # NOTE: We limit the number of trainign steps per task, such that we never # attempt to fill the buffer using more samples than the environment allows. self.train_steps_per_task = min( self.train_steps_per_task, setting.steps_per_phase - self.hparams.n_steps - 1, ) logger.info(f"Limitting training steps per task to {self.train_steps_per_task}") def create_model(self, train_env: gym.Env, valid_env: gym.Env) -> OnPolicyModel: logger.info("Creating model with hparams: \n" + self.hparams.dumps_json(indent="\t")) return self.Model(env=train_env, **self.hparams.to_dict()) def fit(self, train_env: gym.Env, valid_env: gym.Env): super().fit(train_env=train_env, valid_env=valid_env) def get_actions( self, observations: ContinualRLSetting.Observations, action_space: spaces.Space ) -> ContinualRLSetting.Actions: return super().get_actions( observations=observations, action_space=action_space, ) def on_task_switch(self, task_id: Optional[int]) -> None: """Called when switching tasks in a CL setting. If task labels are available, `task_id` will correspond to the index of the new task. Otherwise, if task labels aren't available, `task_id` will be `None`. todo: use this to customize how your method handles task transitions. """ super().on_task_switch(task_id=task_id) def clear_buffers(self): """Clears out the experience buffer of the Policy.""" # I think that's the right way to do it.. not sure. if self.model: # TODO: These are really interesting methods! # self.model.save_replay_buffer # self.model.load_replay_buffer self.model.rollout_buffer.reset() def get_search_space(self, setting: ContinualRLSetting) -> Mapping[str, Union[str, Dict]]: search_space = super().get_search_space(setting) if isinstance(setting.action_space, spaces.Discrete): # From stable_baselines3/common/base_class.py", line 170: # > Generalized State-Dependent Exploration (gSDE) can only be used with # continuous actions # Therefore we remove related entries in the search space, so they keep # their default values. search_space.pop("use_sde", None) search_space.pop("sde_sample_freq", None) return search_space ================================================ FILE: sequoia/methods/stable_baselines3_methods/policy_wrapper.py ================================================ from abc import ABC, abstractmethod from functools import wraps from typing import ClassVar, Dict, Generic, Optional, Type, TypeVar, Union from stable_baselines3.a2c import A2C from stable_baselines3.a2c.policies import ActorCriticPolicy from stable_baselines3.common.base_class import BaseAlgorithm from stable_baselines3.common.policies import BasePolicy from torch import Tensor from sequoia.utils import get_logger logger = get_logger(__name__) T = TypeVar("T") Policy = TypeVar("Policy", bound=BasePolicy) SB3Algo = TypeVar("SB3Algo", bound=BaseAlgorithm) Wrapper = TypeVar("Wrapper", bound="PolicyWrapper") class PolicyWrapper(BasePolicy, ABC, Generic[Policy]): """Base class for 'wrappers' to be applied to policies from SB3. This adds "hooks" into the `step()` and `zero_grad()` method of the Policy's optimizer. NOTE: Hasn't been worked on in a while, would not recommend using this unless you're very familiar with SB3 source code and there is no other way of doing what you want. """ # Dictionary that stores the types of policies that have been 'wrapped' with # this mixin. _wrapped_classes: ClassVar[Dict[Type[T], Type[Union[T, "PolicyWrapper"]]]] = {} def __init__(self, *args, _already_initialized: bool = False, **kwargs): # When calling `EWCMixin.__init__(existing_policy)`, we don't want # to actually call the policy's __init__. if not _already_initialized: super().__init__(*args, **kwargs) @abstractmethod def get_loss(self: Policy) -> Union[float, Tensor]: """This will get called before the call to `policy.optimizer.step()` from within the `train` method of the algos from stable-baselines3. You can use this to return some kind of loss tensor to use. """ def before_optimizer_step(self: Policy): """Called before executing `self.policy.optimizer.step()` in the training loop of the SB3 algos. """ def after_zero_grad(self: Policy): """Called after `self.policy.optimizer.zero_grad()` in the training loop of the SB3 algos. """ # Backpropagate the loss here, by default, so that any grad clipping # also affects the grads of the loss, for instance. wrapper_loss = self.get_loss() logger.debug(f"{type(self).__name__} loss: {wrapper_loss}") if isinstance(wrapper_loss, Tensor) and wrapper_loss.requires_grad: wrapper_loss.backward(retain_graph=True) @classmethod def wrap_policy( cls: Type[Wrapper], policy: Policy, **mixin_init_kwargs ) -> Union[Policy, Wrapper]: """IDEA: "Wrap" a Policy, so that every time its optimizer's `step()` method gets called, it actually first backpropagates an EWC loss. Parameters ---------- policy : Policy [description] Returns ------- Union[Policy, EWCMixin] [description] """ assert isinstance(policy, BasePolicy) if not isinstance(policy, cls): # Dynamically change the class of this single instance to be a subclass # of its current class, with the addition of the EWCMixin base class. policy.__class__ = cls.wrap_policy_class(type(policy)) # 'initialize' the existing object for this mixin type. cls.__init__(policy, _already_initialized=True, **mixin_init_kwargs) assert isinstance(policy, cls) optimizer = policy.optimizer or policy.optimizer_class if optimizer is None: raise NotImplementedError("Need to have an optimizer instance atm") # 'Replace' the `policy.optimizer.step` with a function that might first # backpropagates the loss. _step = optimizer.step # NOTE: Setting the policy's `optimizer` attribute to a new value will # will actually break this. @wraps(optimizer.step) def new_optimizer_step(*args, **kwargs): policy.before_optimizer_step() return _step(*args, **kwargs) optimizer.step = new_optimizer_step _zero_grad = optimizer.zero_grad @wraps(optimizer.zero_grad) def new_zero_grad(*args, **kwargs): _zero_grad(*args, **kwargs) policy.after_zero_grad() optimizer.zero_grad = new_zero_grad return policy @classmethod def wrap_policy_class( cls: Type[Wrapper], policy_type: Type[Policy] ) -> Type[Union[Policy, Wrapper]]: """Add the wrapper as a base class to a policy type from SB3.""" assert issubclass(policy_type, BasePolicy) if issubclass(policy_type, cls): # It already has the mixin, so return the class unchanged. return policy_type # Save the results so we don't create two wrappers for the same class. if policy_type in cls._wrapped_classes: return cls._wrapped_classes[policy_type] class WrappedPolicy(policy_type, cls): # type: ignore pass WrappedPolicy.__name__ = policy_type.__name__ + "With" + cls.__name__ cls._wrapped_classes[policy_type] = WrappedPolicy return WrappedPolicy @classmethod def wrap_algorithm(cls: Type[Wrapper], algo: SB3Algo, **wrapper_kwargs) -> SB3Algo: """Wrap an existing algorithm's policy using this wrapper.""" assert isinstance(algo, BaseAlgorithm) if not isinstance(algo.policy, cls): # Dynamically change the class of this single instance to be a subclass # of its current class, with the addition of the EWCMixin base class. if algo.policy is None: # We want to wrap the _setup_model so the policy gets wrapped. # raise NotImplementedError("TODO") _original_setup_model = algo._setup_model @wraps(algo._setup_model) def _wrapped_setup_model(*args, **kwargs) -> None: _original_setup_model(*args, **kwargs) assert isinstance(algo.policy, BasePolicy) algo.policy = cls.wrap_policy(algo.policy, **wrapper_kwargs) algo._setup_model = _wrapped_setup_model else: algo.policy = cls.wrap_policy(algo.policy, **wrapper_kwargs) return algo @classmethod def wrap_algorithm_class( cls: Type[Wrapper], algo_type: Type[SB3Algo] ) -> Type[Union[SB3Algo, Wrapper]]: """Same idea, but wraps a class of algorithm, so that its policies are wrapped with this mixin. """ if algo_type in cls._wrapped_classes: return cls._wrapped_classes[algo_type] class WrappedAlgo(algo_type): # type: ignore def __init__(self, *args, **kwargs): # IDEA Extract the arguments that could be used for the wrapper? super().__init__(*args, **kwargs) self.policy: Union[BasePolicy, Wrapper] def _setup_model(self): super()._setup_model() # TODO: Figure out a way of passing the kwargs to the policy? # maybe using the 'policy_kwargs' argument to the constructor? self.policy = cls.wrap_policy(self.policy) # No need to change the train loop anymore! # def train(self) -> None: # return super().train() # IDEA: Redirect any failing attribute lookups to the policy? def __getattr__(self, attr: str): try: return super().__getattribute__(attr) except AttributeError as e: if hasattr(self.policy, attr): return getattr(self.policy, attr) raise e # The above would remove the need for any of these: # def on_task_switch(self, task_id: Optional[int]): # self.policy.on_task_switch(task_id) # def ewc_loss(self) -> Union[float, Tensor]: # return self.policy.ewc_loss() WrappedAlgo.__name__ = algo_type.__name__ + "With" + cls.__name__ cls._wrapped_classes[algo_type] = WrappedAlgo return WrappedAlgo from stable_baselines3 import A2C # Either 'manually', like this: class A2CWithEWC(A2C): def __init__(self, *args, ewc_coefficient: float = 1.0, ewc_p_norm: int = 2, **kwargs): self.ewc_coefficient = ewc_coefficient self.ewc_p_norm = ewc_p_norm super().__init__(*args, **kwargs) self.policy: Union[ActorCriticPolicy, EWC] def _setup_model(self): super()._setup_model() # Just to show that the policy was just wrapped. self.policy = EWC._wrap_policy( self.policy, ewc_coefficient=self.ewc_coefficient, ewc_p_norm=self.ewc_p_norm, ) def on_task_switch(self, task_id: Optional[int]) -> None: self.policy.on_task_switch(task_id) ## OR automatically, like this! # A2CWithEWC = EWC._wrap_algorithm_class(A2C) # DQNWithEWC = EWC._wrap_algorithm_class(DQN) # PPOWithEWC = EWC._wrap_algorithm_class(PPO) # DDPGWithEWC = EWC._wrap_algorithm_class(DDPG) # SACWithEWC = EWC._wrap_algorithm_class(SAC) ================================================ FILE: sequoia/methods/stable_baselines3_methods/ppo.py ================================================ """ Method that uses the PPO model from stable-baselines3 and targets the RL settings in the tree. """ from dataclasses import dataclass from typing import ClassVar, Dict, Mapping, Optional, Type, Union import gym import torch from gym import spaces from simple_parsing import mutable_field from stable_baselines3.ppo import PPO from sequoia.common.hparams import log_uniform from sequoia.methods import register_method from sequoia.settings.rl import ContinualRLSetting from sequoia.utils.logging_utils import get_logger from .on_policy_method import OnPolicyMethod, OnPolicyModel logger = get_logger(__name__) class PPOModel(PPO, OnPolicyModel): """Proximal Policy Optimization algorithm (PPO) (clip version) - from SB3. Paper: https://arxiv.org/abs/1707.06347 Code: The SB3 implementation borrows code from OpenAI Spinning Up (https://github.com/openai/spinningup/) https://github.com/ikostrikov/pytorch-a2c-ppo-acktr-gail and and Stable Baselines (PPO2 from https://github.com/hill-a/stable-baselines) Introduction to PPO: https://spinningup.openai.com/en/latest/algorithms/ppo.html """ @dataclass class HParams(OnPolicyModel.HParams): """Hyper-parameters of the PPO Model.""" # # The policy model to use (MlpPolicy, CnnPolicy, ...) # policy: Union[str, Type[ActorCriticPolicy]] # # The environment to learn from (if registered in Gym, can be str) # env: Union[GymEnv, str] # The learning rate, it can be a function of the current progress remaining # (from 1 to 0) learning_rate: float = log_uniform(1e-6, 1e-2, default=3e-4) # The number of steps to run for each environment per update (i.e. batch size # is n_steps * n_env where n_env is number of environment copies running in # parallel) n_steps: int = log_uniform(32, 8192, default=2048, discrete=True) # Minibatch size batch_size: int = 64 # batch_size: Optional[int] = categorical(16, 32, 64, 128, default=64) # Number of epoch when optimizing the surrogate loss n_epochs: int = 10 # Discount factor gamma: float = 0.99 # gamma: float = uniform(0.9, 0.9999, default=0.99) # Factor for trade-off of bias vs variance for Generalized Advantage Estimator gae_lambda: float = 0.95 # gae_lambda: float = uniform(0.8, 1.0, default=0.95) # Clipping parameter, it can be a function of the current progress remaining # (from 1 to 0). clip_range: float = 0.2 # clip_range: float = uniform(0.05, 0.4, default=0.2) # Clipping parameter for the value function, it can be a function of the current # progress remaining (from 1 to 0). This is a parameter specific to the OpenAI # implementation. If None is passed (default), no clipping will be done on the # value function. IMPORTANT: this clipping depends on the reward scaling. clip_range_vf: Optional[float] = None # Entropy coefficient for the loss calculation ent_coef: float = 0.0 # ent_coef: float = uniform(0., 1., default=0.0) # Value function coefficient for the loss calculation vf_coef: float = 0.5 # vf_coef: float = uniform(0.01, 1.0, default=0.5) # The maximum value for the gradient clipping max_grad_norm: float = 0.5 # max_grad_norm: float = uniform(0.1, 10, default=0.5) # Whether to use generalized State Dependent Exploration (gSDE) instead of # action noise exploration (default: False) use_sde: bool = False # use_sde: bool = categorical(True, False, default=False) # Sample a new noise matrix every n steps when using gSDE Default: -1 (only # sample at the beginning of the rollout) sde_sample_freq: int = -1 # sde_sample_freq: int = categorical(-1, 1, 5, 10, default=-1) # Limit the KL divergence between updates, because the clipping is not enough to # prevent large update see issue #213 # (cf https://github.com/hill-a/stable-baselines/issues/213) # By default, there is no limit on the kl div. target_kl: Optional[float] = None # the log location for tensorboard (if None, no logging) tensorboard_log: Optional[str] = None # # Whether to create a second environment that will be used for evaluating the # # agent periodically. (Only available when passing string for the environment) # create_eval_env: bool = False # # Additional arguments to be passed to the policy on creation # policy_kwargs: Optional[Dict[str, Any]] = None # The verbosity level: 0 no output, 1 info, 2 debug verbose: int = 1 # Seed for the pseudo random generators seed: Optional[int] = None # Device (cpu, cuda, ...) on which the code should be run. Setting it to auto, # the code will be run on the GPU if possible. device: Union[torch.device, str] = "auto" # Whether or not to build the network at the creation of the instance # _init_setup_model: bool = True @register_method @dataclass class PPOMethod(OnPolicyMethod): """Method that uses the PPO model from stable-baselines3.""" Model: ClassVar[Type[PPOModel]] = PPOModel # Hyper-parameters of the PPO Model. hparams: PPOModel.HParams = mutable_field(PPOModel.HParams) def configure(self, setting: ContinualRLSetting): super().configure(setting=setting) def create_model(self, train_env: gym.Env, valid_env: gym.Env) -> PPOModel: logger.info("Creating model with hparams: \n" + self.hparams.dumps_json(indent="\t")) return self.Model(env=train_env, **self.hparams.to_dict()) def fit(self, train_env: gym.Env, valid_env: gym.Env): super().fit(train_env=train_env, valid_env=valid_env) def get_actions( self, observations: ContinualRLSetting.Observations, action_space: spaces.Space ) -> ContinualRLSetting.Actions: return super().get_actions( observations=observations, action_space=action_space, ) def on_task_switch(self, task_id: Optional[int]) -> None: """Called when switching tasks in a CL setting. If task labels are available, `task_id` will correspond to the index of the new task. Otherwise, if task labels aren't available, `task_id` will be `None`. todo: use this to customize how your method handles task transitions. """ super().on_task_switch(task_id=task_id) def get_search_space(self, setting: ContinualRLSetting) -> Mapping[str, Union[str, Dict]]: return super().get_search_space(setting) if __name__ == "__main__": results = PPOMethod.main() print(results) ================================================ FILE: sequoia/methods/stable_baselines3_methods/ppo_test.py ================================================ from typing import ClassVar, Type from .base import BaseAlgorithm, StableBaselines3Method from .base_test import DiscreteActionSpaceMethodTests from .ppo import PPOMethod, PPOModel class TestPPO(DiscreteActionSpaceMethodTests): Method: ClassVar[Type[StableBaselines3Method]] = PPOMethod Model: ClassVar[Type[BaseAlgorithm]] = PPOModel ================================================ FILE: sequoia/methods/stable_baselines3_methods/sac.py ================================================ """ Method that uses the SAC model from stable-baselines3 and targets the RL settings in the tree. """ from dataclasses import dataclass from typing import Callable, ClassVar, Optional, Type, Union import gym from gym import spaces from simple_parsing import mutable_field from stable_baselines3.sac.sac import SAC from sequoia.common.hparams import log_uniform from sequoia.methods import register_method from sequoia.settings.rl import ContinualRLSetting from sequoia.utils.logging_utils import get_logger from .off_policy_method import OffPolicyMethod, OffPolicyModel logger = get_logger(__name__) class SACModel(SAC, OffPolicyModel): """Customized version of the SAC model from stable-baselines-3.""" @dataclass class HParams(OffPolicyModel.HParams): """Hyper-parameters of the SAC Model.""" # The learning rate, it can be a function of the current progress (from # 1 to 0) learning_rate: Union[float, Callable] = log_uniform(1e-6, 1e-2, default=3e-4) buffer_size: int = 1_000_000 learning_starts: int = 100 batch_size: int = 256 tau: float = 0.005 gamma: float = 0.99 train_freq = 1 gradient_steps: int = 1 # action_noise: Optional[ActionNoise] = None optimize_memory_usage: bool = False ent_coef: Union[str, float] = "auto" target_update_interval: int = 1 target_entropy: Union[str, float] = "auto" use_sde: bool = False sde_sample_freq: int = -1 @register_method @dataclass class SACMethod(OffPolicyMethod): """Method that uses the SAC model from stable-baselines3.""" Model: ClassVar[Type[SACModel]] = SACModel # Hyper-parameters of the SAC model. hparams: SACModel.HParams = mutable_field(SACModel.HParams) # Approximate limit on the size of the replay buffer, in megabytes. max_buffer_size_megabytes: float = 2_048.0 def configure(self, setting: ContinualRLSetting): super().configure(setting) def create_model(self, train_env: gym.Env, valid_env: gym.Env) -> SACModel: return self.Model(env=train_env, **self.hparams.to_dict()) def fit(self, train_env: gym.Env, valid_env: gym.Env): super().fit(train_env=train_env, valid_env=valid_env) def get_actions( self, observations: ContinualRLSetting.Observations, action_space: spaces.Space ) -> ContinualRLSetting.Actions: return super().get_actions( observations=observations, action_space=action_space, ) def on_task_switch(self, task_id: Optional[int]) -> None: """Called when switching tasks in a CL setting. If task labels are available, `task_id` will correspond to the index of the new task. Otherwise, if task labels aren't available, `task_id` will be `None`. todo: use this to customize how your method handles task transitions. """ super().on_task_switch(task_id=task_id) if __name__ == "__main__": results = SACMethod.main() print(results) ================================================ FILE: sequoia/methods/stable_baselines3_methods/sac_test.py ================================================ from typing import ClassVar, Type import pytest from sequoia.common.config import Config from sequoia.conftest import slow from sequoia.settings import Setting from sequoia.settings.rl import ContinualRLSetting, IncrementalRLSetting, TaskIncrementalRLSetting from .base import BaseAlgorithm, StableBaselines3Method from .base_test import ContinuousActionSpaceMethodTests from .sac import SACMethod, SACModel @slow @pytest.mark.timeout(120) class TestSAC(ContinuousActionSpaceMethodTests): Method: ClassVar[Type[StableBaselines3Method]] = SACMethod Model: ClassVar[Type[BaseAlgorithm]] = SACModel # TODO: Look into why SAC is so slow, there's probably a parameter which isn't being set # properly. @slow @pytest.mark.timeout(120) @pytest.mark.parametrize( "Setting", [ContinualRLSetting, IncrementalRLSetting, TaskIncrementalRLSetting] ) @pytest.mark.parametrize("observe_state", [True, False]) def test_continuous_mountaincar(self, Setting: Type[Setting], observe_state: bool): method = self.Method() setting = Setting( dataset="MountainCarContinuous-v0", nb_tasks=2, train_steps_per_task=1_000, test_steps_per_task=1_000, ) results: ContinualRLSetting.Results = setting.apply(method, config=Config(debug=True)) print(results.summary()) ================================================ FILE: sequoia/methods/stable_baselines3_methods/td3.py ================================================ """ TODO: Implement and test DDPG. """ from dataclasses import dataclass from typing import Callable, ClassVar, Optional, Type, Union import gym from gym import spaces from simple_parsing import mutable_field from stable_baselines3.common.off_policy_algorithm import TrainFreq from stable_baselines3.td3 import TD3 from sequoia.common.hparams import log_uniform from sequoia.methods import register_method from sequoia.settings.rl import ContinualRLSetting from sequoia.utils.logging_utils import get_logger from .off_policy_method import OffPolicyMethod, OffPolicyModel logger = get_logger(__name__) class TD3Model(TD3, OffPolicyModel): @dataclass class HParams(OffPolicyModel.HParams): """Hyper-parameters of the TD3 model.""" # TODO: Add HParams specific to TD3 here, if any, and also check that the # default values are correct. # The learning rate, it can be a function of the current progress (from # 1 to 0) learning_rate: Union[float, Callable] = log_uniform(1e-6, 1e-2, default=1e-3) # Minibatch size for each gradient update batch_size: int = 100 # batch_size: int = categorical(1, 2, 4, 8, 16, 32, 128, default=32) train_freq: TrainFreq = (1, "episode") # How many gradient steps to do after each rollout (see ``train_freq`` # and ``n_episodes_rollout``) Set to ``-1`` means to do as many gradient # steps as steps done in the environment during the rollout. gradient_steps: int = -1 # gradient_steps: int = categorical(1, -1, default=1) @register_method @dataclass class TD3Method(OffPolicyMethod): """Method that uses the TD3 model from stable-baselines3.""" Model: ClassVar[Type[TD3Model]] = TD3Model hparams: TD3Model.HParams = mutable_field(TD3Model.HParams) # Approximate limit on the size of the replay buffer, in megabytes. max_buffer_size_megabytes: float = 2_048.0 def configure(self, setting: ContinualRLSetting): super().configure(setting) def create_model(self, train_env: gym.Env, valid_env: gym.Env) -> TD3Model: return self.Model(env=train_env, **self.hparams.to_dict()) def fit(self, train_env: gym.Env, valid_env: gym.Env): super().fit(train_env=train_env, valid_env=valid_env) def get_actions( self, observations: ContinualRLSetting.Observations, action_space: spaces.Space ) -> ContinualRLSetting.Actions: return super().get_actions( observations=observations, action_space=action_space, ) def on_task_switch(self, task_id: Optional[int]) -> None: """Called when switching tasks in a CL setting. If task labels are available, `task_id` will correspond to the index of the new task. Otherwise, if task labels aren't available, `task_id` will be `None`. todo: use this to customize how your method handles task transitions. """ super().on_task_switch(task_id=task_id) if __name__ == "__main__": results = TD3Method.main() print(results) ================================================ FILE: sequoia/methods/stable_baselines3_methods/td3_test.py ================================================ from typing import ClassVar, Type from .base import BaseAlgorithm, StableBaselines3Method from .base_test import ContinuousActionSpaceMethodTests from .td3 import TD3Method, TD3Model class TestTD3(ContinuousActionSpaceMethodTests): Method: ClassVar[Type[StableBaselines3Method]] = TD3Method Model: ClassVar[Type[BaseAlgorithm]] = TD3Model ================================================ FILE: sequoia/methods/trainer.py ================================================ """ 'Patch' for the Trainer of Pytorch Lightning so it can use gym environment as dataloaders (via the GymDataLoader class of Sequoia). """ import os from dataclasses import dataclass from functools import singledispatch from pathlib import Path from typing import Any, Callable, Iterable, List, Optional, Union import gym import pytorch_lightning.trainer.connectors.data_connector import pytorch_lightning.utilities.apply_func import torch from pytorch_lightning import Callback from pytorch_lightning import Trainer as _Trainer from pytorch_lightning.loggers import LightningLoggerBase from pytorch_lightning.trainer.connectors.data_connector import DataConnector from pytorch_lightning.trainer.supporters import CombinedLoader from pytorch_lightning.utilities.apply_func import apply_to_collection from simple_parsing import choice from torch.utils.data import DataLoader from sequoia.common import Batch from sequoia.common.config import Config from sequoia.common.gym_wrappers.utils import IterableWrapper, has_wrapper from sequoia.common.hparams import HyperParameters, uniform from sequoia.settings.rl.continual.environment import GymDataLoader from sequoia.settings.sl import PassiveEnvironment from sequoia.utils.logging_utils import get_logger from sequoia.utils.parseable import Parseable logger = get_logger(__name__) @dataclass class TrainerConfig(HyperParameters, Parseable): """Configuration dataclass for a pytorch-lightning Trainer. See the docs for the Trainer from pytorch lightning for more info on the options. TODO: Pytorch Lightning already has a mechanism for adding argparse arguments for the Trainer.. It would be nice to find a way to use the 'native' way of adding arguments in PL in addition to using simple-parsing. """ gpus: int = torch.cuda.device_count() overfit_batches: float = 0.0 fast_dev_run: bool = False # Maximum number of epochs to train for. max_epochs: int = uniform(1, 100, default=10) # Number of nodes to use. num_nodes: int = 1 accelerator: Optional[str] = None log_gpu_memory: bool = False val_check_interval: Union[int, float] = 1.0 auto_scale_batch_size: Optional[str] = None auto_lr_find: bool = False # Floating point precision to use in the model. (See pl.Trainer) precision: int = choice(16, 32, default=32) default_root_dir: Path = Path(os.environ.get("RESULTS_DIR", os.getcwd() + "/results")) # How much of training dataset to check (floats = percent, int = num_batches) limit_train_batches: Union[int, float] = 1.0 # How much of validation dataset to check (floats = percent, int = num_batches) limit_val_batches: Union[int, float] = 1.0 # How much of test dataset to check (floats = percent, int = num_batches) limit_test_batches: Union[int, float] = 1.0 # If ``True``, enable checkpointing. # It will configure a default ModelCheckpoint callback if there is no user-defined # ModelCheckpoint in the `callbacks`. checkpoint_callback: bool = True def make_trainer( self, config: Config, callbacks: Optional[List[Callback]] = None, loggers: Iterable[LightningLoggerBase] = None, ) -> "Trainer": """Create a Trainer object from the command-line args. Adds the given loggers and callbacks as well. """ # FIXME: Trying to subclass the DataConnector to fix issues while iterating # over gym envs, that arise because of the _with_is_last() function from # lightning. import pytorch_lightning.trainer.trainer from pytorch_lightning.trainer.connectors.data_connector import DataConnector setattr(pytorch_lightning.trainer.trainer, "DataConnector", DataConnector) trainer = Trainer( logger=loggers, callbacks=callbacks, gpus=self.gpus, num_nodes=self.num_nodes, max_epochs=self.max_epochs, accelerator=self.accelerator, log_gpu_memory=self.log_gpu_memory, overfit_batches=self.overfit_batches, fast_dev_run=self.fast_dev_run, auto_scale_batch_size=self.auto_scale_batch_size, auto_lr_find=self.auto_lr_find, # TODO: Either move the log-dir-related stuff from Config to this # class, or figure out a way to pass the value from Config to this # function default_root_dir=self.default_root_dir, limit_train_batches=self.limit_train_batches, limit_val_batches=self.limit_val_batches, limit_test_batches=self.limit_train_batches, checkpoint_callback=self.checkpoint_callback, profiler=None, # TODO: Seem to have an impact on the problem below. ) return trainer class Trainer(_Trainer): def __init__(self, **kwargs): super().__init__(**kwargs) def fit(self, model, train_dataloader=None, val_dataloaders=None, datamodule=None): # TODO: Figure out what method to overwrite to fix the problem of accessing two # batches in a row in the environment. (with_is_last annoyance.) if isinstance(train_dataloader, gym.Env): if has_wrapper(train_dataloader, GymDataLoader): train_env = train_dataloader # raise NotImplementedError("TODO: Fix this.") return super().fit( model, train_dataloader=train_dataloader, val_dataloaders=val_dataloaders, datamodule=datamodule, ) # TODO: Debugging/fixing this buggy method from Pytorch-Lightning. # def _apply_to_collection( # data: Any, # dtype: Union[type, tuple], # function: Callable, # *args, # wrong_dtype: Optional[Union[type, tuple]] = None, # **kwargs # ) -> Any: apply_to_collection = singledispatch(apply_to_collection) setattr(pytorch_lightning.utilities.apply_func, "apply_to_collection", apply_to_collection) # import pytorch_lightning.overrides.data_parallel # setattr(pytorch_lightning.overrides.data_parallel, "apply_to_collection", apply_to_collection) @apply_to_collection.register(Batch) def _apply_to_batch( data: Batch, dtype: Union[type, tuple], function: Callable, *args, wrong_dtype: Optional[Union[type, tuple]] = None, **kwargs, ) -> Any: # assert False, f"YAY! {type(data)}" # logger.debug(f"{type(data)}, {dtype}, {function}, {args}, {wrong_dtype}, {kwargs}") return type(data)( **{ k: apply_to_collection(v, dtype, function, *args, wrong_dtype=wrong_dtype, **kwargs) for k, v in data.items() } ) class ProfiledEnvironment(IterableWrapper, DataLoader): def __iter__(self): for i, obs in enumerate(super().__iter__()): # logger.debug(f"Step {i}, obs.done={obs.done}") done = obs.done if not isinstance(done, bool) or not done.shape: # TODO: When we have batch size of 1, or more generally in RL, do we # want one call to `trainer.fit` to last a given number of episodes ? # TODO: Look into the `max_steps` argument to Trainer. done = all(done) # done = done or self.is_closed() done = self.is_closed() yield i, (obs, done) class PatchedDataConnector(DataConnector): def get_profiled_train_dataloader(self, train_dataloader: DataLoader): if isinstance(train_dataloader, CombinedLoader) and isinstance( train_dataloader.loaders, gym.Env ): env = train_dataloader.loaders # TODO: Replacing this 'CombinedLoader' on the Trainer with the env, since I # don't think we need it (not using multiple train dataloaders with PL atm.) self.trainer.train_dataloader = env if not isinstance(env.unwrapped, PassiveEnvironment): # Only really need to do this 'profile' thing for 'active' environments. return ProfiledEnvironment(env) else: # This gets called before each epoch, so we get here on the start of the # second training epoch. # TODO: Check that this isn't causing issues between tasks assert train_dataloader is self.trainer.train_dataloader profiled_dl = self.trainer.profiler.profile_iterable( enumerate(prefetch_iterator(train_dataloader)), "get_train_batch" ) return profiled_dl setattr( pytorch_lightning.trainer.connectors.data_connector, "DataConnector", PatchedDataConnector, ) pytorch_lightning.trainer.connectors.data_connector.DataConnector = PatchedDataConnector ================================================ FILE: sequoia/methods.puml ================================================ @startuml methods ' !include gym.plantuml ' remove gym.spaces ' TODO: There must be a simpler way to only keep a single node, right? ' !include settings.puml ' remove settings.active ' remove settings.assumptions ' remove settings.passive ' remove SettingABC ' !include settings/base.puml package methods { package base_method { class BaseMethod implements Method { + hparams: BaseModel.HParams + config: Config + trainer_options: TrainerConfig + trainer: Trainer } } package aux_tasks { package auxiliary_task { abstract class AuxiliaryTask { + options: AuxiliaryTask.Options + get_loss(ForwardPass, Actions, Rewards): Loss } abstract class AuxiliaryTask.Options { + coefficient: float } AuxiliaryTask *-- AuxiliaryTask.Options } } !include ./methods/models.puml } @enduml ================================================ FILE: sequoia/sequoia.puml ================================================ @startuml sequoia package sequoia { !include common.puml !include settings.puml !include methods.puml } @enduml ================================================ FILE: sequoia/settings/README.md ================================================ # Sequoia - Settings ### (WIP) Adding a new Setting: Prerequisites: - Take a quick look at the `dataclasses` example - Take a quick look at [simple_parsing](https://github.com/lebrice/SimpleParsing) (A python package I've created) which we use to generate the command-line arguments for the Settings. ## Available Settings: - ## [Setting](sequoia/settings/base/setting.py) Base class for all research settings in ML: Root node of the tree. A 'setting' is loosely defined here as a learning problem with a specific set of assumptions, restrictions, and an evaluation procedure. For example, Reinforcement Learning is a type of Setting in which we assume that an Agent is able to observe an environment, take actions upon it, and receive rewards back from the environment. Some of the assumptions include that the reward is dependant on the action taken, and that the actions have an impact on the environment's state (and on the next observations the agent will receive). The evaluation procedure consists in trying to maximize the reward obtained from an environment over a given number of steps. This 'Setting' class should ideally represent the most general learning problem imaginable, with almost no assumptions about the data or evaluation procedure. This is a dataclass. Its attributes are can also be used as command-line arguments using `simple_parsing`. Abstract (required) methods: - **apply** Applies a given Method on this setting to produce Results. - **prepare_data** (things to do on 1 GPU/TPU not on every GPU/TPU in distributed mode). - **setup** (things to do on every accelerator in distributed mode). - **train_dataloader** the training environment/dataloader. - **val_dataloader** the val environments/dataloader(s). - **test_dataloader** the test environments/dataloader(s). "Abstract"-ish (required) class attributes: - `Results`: The class of Results that are created when applying a Method on this setting. - `Observations`: The type of Observations that will be produced in this setting. - `Actions`: The type of Actions that are expected from this setting. - `Rewards`: The type of Rewards that this setting will (potentially) return upon receiving an action from the method. - ## [RLSetting](sequoia/settings/rl/setting.py) LightningDataModule for an 'active' setting. This is to be the parent of settings like RL or maybe Active Learning. - ## [ContinualRLSetting](sequoia/settings/rl/continual/setting.py) Reinforcement Learning Setting where the environment changes over time. This is an Active setting which uses gym environments as sources of data. These environments' attributes could change over time following a task schedule. An example of this could be that the gravity increases over time in cartpole, making the task progressively harder as the agent interacts with the environment. - ## [DiscreteTaskAgnosticRLSetting](sequoia/settings/rl/discrete/setting.py) Continual Reinforcement Learning Setting where there are clear task boundaries, but where the task information isn't available. - ## [IncrementalRLSetting](sequoia/settings/rl/incremental/setting.py) Continual RL setting in which: - Changes in the environment's context occur suddenly (same as in Discrete, Task-Agnostic RL) - Task boundary information (and task labels) are given at training time - Task boundary information is given at test time, but task identity is not. - ## [TaskIncrementalRLSetting](sequoia/settings/rl/task_incremental/setting.py) Continual RL setting with clear task boundaries and task labels. The task labels are given at both train and test time. - ## [MultiTaskRLSetting](sequoia/settings/rl/multi_task/setting.py) Reinforcement Learning setting where the environment alternates between a set of tasks sampled uniformly. Implemented as a TaskIncrementalRLSetting, but where the tasks are randomly sampled during training. - ## [TraditionalRLSetting](sequoia/settings/rl/traditional/setting.py) Your usual "Classical" Reinforcement Learning setting. Implemented as a MultiTaskRLSetting, but with a single task. - ## [MultiTaskRLSetting](sequoia/settings/rl/multi_task/setting.py) Reinforcement Learning setting where the environment alternates between a set of tasks sampled uniformly. Implemented as a TaskIncrementalRLSetting, but where the tasks are randomly sampled during training. - ## [SLSetting](sequoia/settings/sl/setting.py) Supervised Learning Setting. Core assuptions: - Current actions have no influence on future observations. - The environment gives back "dense feedback", (the 'reward' associated with all possible actions at each step, rather than a single action) For example, supervised learning is a Passive setting, since predicting a label has no effect on the reward you're given (the label) or on the next samples you observe. - ## [ContinualSLSetting](sequoia/settings/sl/continual/setting.py) Continuous, Task-Agnostic, Continual Supervised Learning. This is *currently* the most "general" Supervised Continual Learning setting in Sequoia. - Data distribution changes smoothly over time. - Smooth transitions between "tasks" - No information about task boundaries or task identity (no task IDs) - Maximum of one 'epoch' through the environment. - ## [DiscreteTaskAgnosticSLSetting](sequoia/settings/sl/discrete/setting.py) Continual Supervised Learning Setting where there are clear task boundaries, but where the task information isn't available. - ## [IncrementalSLSetting](sequoia/settings/sl/incremental/setting.py) Supervised Setting where the data is a sequence of 'tasks'. This class is basically is the supervised version of an Incremental Setting The current task can be set at the `current_task_id` attribute. - ## [TaskIncrementalSLSetting](sequoia/settings/sl/task_incremental/setting.py) Setting where data arrives in a series of Tasks, and where the task labels are always available (both train and test time). - ## [MultiTaskSLSetting](sequoia/settings/sl/multi_task/setting.py) IID version of the Task-Incremental Setting, where the data is shuffled. Can be used to estimate the upper bound performance of Task-Incremental CL Methods. - ## [DomainIncrementalSLSetting](sequoia/settings/sl/domain_incremental/setting.py) Supervised CL Setting where the input domain shifts incrementally. Task labels and task boundaries are given at training time, but not at test-time. The crucial difference between the Domain-Incremental and Class-Incremental settings is that the action space is smaller in domain-incremental learning, as it is a `Discrete(n_classes_per_task)`, rather than the `Discrete(total_classes)` in Class-Incremental setting. For example: Create a classifier for odd vs even hand-written digits. It first be trained on digits 0 and 1, then digits 2 and 3, then digits 4 and 5, etc. At evaluation time, it will be evaluated on all digits - ## [TraditionalSLSetting](sequoia/settings/sl/traditional/setting.py) Your 'usual' supervised learning Setting, where the samples are i.i.d. This Setting is slightly different than the others, in that it can be recovered in *two* different ways: - As a variant of Task-Incremental learning, but where there is only one task; - As a variant of Domain-Incremental learning, but where there is only one task. - ## [MultiTaskSLSetting](sequoia/settings/sl/multi_task/setting.py) IID version of the Task-Incremental Setting, where the data is shuffled. Can be used to estimate the upper bound performance of Task-Incremental CL Methods. ================================================ FILE: sequoia/settings/__init__.py ================================================ """ """ import inspect from typing import Any, Dict, Iterable, List, Set, Type from .base.bases import Method, SettingABC from .base.environment import Environment from .base.objects import Actions, ActionType, Observations, ObservationType, Rewards, RewardType from .base.results import Results from .base.setting import Setting, SettingType from .rl import * from .sl import * # # all concrete settings: # all_settings: List[Type[Setting]] = [ # ClassIncrementalSetting, # DomainIncrementalSetting, # TaskIncrementalSLSetting, # TraditionalSLSetting, # MultiTaskSetting, # ContinualRLSetting, # IncrementalRLSetting, # TaskIncrementalRLSetting, # RLSetting, # ] # Or, get All the settings: all_settings: Set[Type[SettingABC]] = set([Setting, *Setting.children()]) # FIXME: Remove this, just checking the inspect atm.: # import inspect # import pprint # print(Setting.get_tree_string()) # exit() # print(inspect.getclasstree(all_settings, unique=True)) # assert False # assert False, all_settings ================================================ FILE: sequoia/settings/assumptions/__init__.py ================================================ """ WIP: Mixin-style classes that define 'traits'/'assumptions' about a Setting. IDEA: This package could define things that are to be reused in both the RL and the CL branches, kindof like a horizontal slice accross the tree. The reasoning behind this is that some methods might require task labels, but apply on both sides of the tree. An alternative to this could also be to allow Methods to target multiple settings, but this could get weird pretty quick. """ from .incremental import IncrementalAssumption # from .task_incremental import TaskIncrementalSLSetting ================================================ FILE: sequoia/settings/assumptions/assumptions.puml ================================================ @startuml assumptions package assumptions { ' TODO: How to describe relationship between gym.Env and these other ' assumptions about the env? ' abstract class Environment { ' } ' gym.Env --|> Environment package "assumptions about the environment" as supervision_assumptions { package "effect of future actions on the environment" as active_vs_passive { interface PossiblyActiveEnvironment <> { # Actions MAY influence future observations } abstract class ActiveEnvironment <> extends PossiblyActiveEnvironment { # Actions DO influence future observations -- Examples: Playing tennis } abstract class PassiveEnvironment <> extends PossiblyActiveEnvironment { Actions DONT influence future observations -- Examples: + Predicting what might happen next when watching a movie. } ' Environment --|> PossiblyActiveEnvironment } package "type of feedback (rewards)" as feedback_type_assumption { interface Feedback <> {} abstract class SparseFeedback <> extends Feedback { the environment only gives back the reward associated with the action taken. -- Example: When you play a game, you get a reward based on how good your action was. } abstract class DenseFeedback <> extends SparseFeedback { The environment gives the reward for all possible actions at every step. -- Example: Image classification: The method is told what the image was and what it was not. The reward (correct vs incorrect prediction) is given for all the potential actions! } } } package "assumptions about the context" as context_assumption_family { package "discrete vs continuous" as context_continuous_vs_discrete { abstract class ContinuousContext <> { The context variable is continuous: c ∈ R Example: Varying friction with the ground in an environment. } abstract class DiscreteContext <> extends ContinuousContext { The context variable is discrete: c ∈ N Example: A list of possible tasks } abstract class FixedContext <> extends DiscreteContext { The context variable is fixed to a single value } } package "observability" as context_observability { abstract class HiddenContext <> { Methods don't have access to the context variable. } ' abstract class BoundariesObservable <> extends HiddenContext { ' Task boundaries are given during training ' } abstract class PartiallyObservableContext <> extends HiddenContext { Methods may have access to the context variable some of the time Example: Have task labels during training, but not during testing. } abstract class FullyObservableContext <> extends PartiallyObservableContext { Methods always have access to the context variable. i.e., during training and testing. } } package "non-stationarity" as context_nonstationarity_assumption { abstract class Continual <> { The context may change smoothly over time. } abstract class Incremental <> extends Continual { The context can change suddenly (task boundaries) } abstract class Stationary <> extends Incremental { The context is sampled uniformly } } package "shared vs disjoint spaces between tasks" as action_space_assumption { ' NOTE: We could have this for the observation and reward spaces too! abstract class PossiblySharedActionSpace { It is possible that there is an overlap in the action space between tasks. } abstract class SharedActionSpaces extends PossiblySharedActionSpace { The action space remains the same in all tasks. } abstract class DisjointActionSpaces extends PossiblySharedActionSpace { Each task has its own (disjoint) action space. } } } } package cl { package continuous { abstract class ContinuousTaskAgnosticSetting <> extends base.SettingABC { - clear_task_boundaries: bool = False ' - task_labels_at_train_time: bool = False ' - task_labels_at_test_time: bool = False ' - stationary_context: bool = False ' - shared_action_space: bool = False } abstract class continuous.Environment <> extends gym.Env {} abstract class continuous.Observations <> extends base.Observations {} abstract class continuous.Actions <> extends base.Actions {} abstract class continuous.Rewards <> extends base.Rewards {} ' continuous.Environment -.- continuous.Observations: yields ' continuous.Environment -.- continuous.Actions: receives ' continuous.Environment -.- continuous.Rewards: returns } package discrete { abstract class DiscreteTaskAgnosticSetting <> extends ContinuousTaskAgnosticSetting { == New assumptions == + clear_task_boundaries: Constant[bool] = True ' + known_task_boundaries_at_train_time: bool = False ' + known_task_boundaries_at_test_time: bool = False == Inherited assumptions == ' # task_labels_at_train_time: bool = False ' # task_labels_at_test_time: bool = False ' # stationary_context: bool = False ' # shared_action_space: bool = False } abstract class discrete.Environment <> extends continuous.Environment {} abstract class discrete.Observations <> extends continuous.Observations {} abstract class discrete.Actions <> extends continuous.Actions {} abstract class discrete.Rewards <> extends continuous.Rewards {} ' discrete.Environment -.- discrete.Observations: yields ' discrete.Environment -.- discrete.Actions: receives ' discrete.Environment -.- discrete.Rewards: returns } package incremental { abstract class IncrementalSetting <> extends DiscreteTaskAgnosticSetting{ == New assumptions == + known_task_boundaries_at_train_time: Constant[bool] = True + known_task_boundaries_at_test_time: Constant[bool] = True == Inherited assumptions == # clear_task_boundaries: Constant[bool] = True ' # task_labels_at_train_time: bool = False ' # task_labels_at_test_time: bool = False ' # shared_action_space: bool = False ' # stationary_context: bool = False } abstract class incremental.Environment <> extends discrete.Environment {} abstract class incremental.Observations <> extends discrete.Observations {} abstract class incremental.Actions <> extends discrete.Actions {} abstract class incremental.Rewards <> extends discrete.Rewards {} ' incremental.Environment -.- incremental.Observations: yields ' incremental.Environment -.- incremental.Actions: receives ' incremental.Environment -.- incremental.Rewards: returns } package class_incremental { abstract class ClassIncrementalSetting <> extends IncrementalSetting { == New assumptions == + shared_action_space: Constant[bool] = False == Inherited assumptions == # clear_task_boundaries: Constant[bool] = True # known_task_boundaries_at_train_time: Constant[bool] = True # known_task_boundaries_at_test_time: Constant[bool] = True ' # task_labels_at_train_time: bool = False ' # task_labels_at_test_time: bool = False ' # stationary_context: bool = False } abstract class class_incremental.Environment <> extends incremental.Environment {} abstract class class_incremental.Observations <> extends incremental.Observations {} abstract class class_incremental.Actions <> extends incremental.Actions {} abstract class class_incremental.Rewards <> extends incremental.Rewards {} ' class_incremental.Environment -.- class_incremental.Observations: yields ' class_incremental.Environment -.- class_incremental.Actions: receives ' class_incremental.Environment -.- class_incremental.Rewards: returns } package domain_incremental { abstract class DomainIncrementalSetting <> extends IncrementalSetting { == New assumptions == + shared_action_space: Constant[bool] = True == Inherited assumptions == # clear_task_boundaries: Constant[bool] = True # known_task_boundaries_at_train_time: Constant[bool] = True # known_task_boundaries_at_test_time: Constant[bool] = True } abstract class domain_incremental.Environment <> extends incremental.Environment {} abstract class domain_incremental.Observations <> extends incremental.Observations {} abstract class domain_incremental.Actions <> extends incremental.Actions {} abstract class domain_incremental.Rewards <> extends incremental.Rewards {} ' domain_incremental.Environment -.- domain_incremental.Observations: yields ' domain_incremental.Environment -.- domain_incremental.Actions: receives ' domain_incremental.Environment -.- domain_incremental.Rewards: returns } package task_incremental { abstract class TaskIncrementalSetting <> extends IncrementalSetting { == New assumptions == + task_labels_at_train_time: Constant[bool] = True + task_labels_at_test_time: Constant[bool] = True == Inherited assumptions == # clear_task_boundaries: Constant[bool] = True # known_task_boundaries_at_train_time: Constant[bool] = True # known_task_boundaries_at_test_time: Constant[bool] = True } abstract class task_incremental.Environment <> extends incremental.Environment {} abstract class task_incremental.Observations <> extends incremental.Observations {} abstract class task_incremental.Actions <> extends incremental.Actions {} abstract class task_incremental.Rewards <> extends incremental.Rewards {} ' task_incremental.Environment -.- task_incremental.Observations: yields ' task_incremental.Environment -.- task_incremental.Actions: receives ' task_incremental.Environment -.- task_incremental.Rewards: returns } package traditional{ abstract class TraditionalSetting <> extends IncrementalSetting { == New assumptions == + stationary_context: Constant[bool] = True == Inherited assumptions == # clear_task_boundaries: Constant[bool] = True } abstract class traditional.Environment <> extends incremental.Environment {} abstract class traditional.Observations <> extends incremental.Observations {} abstract class traditional.Actions <> extends incremental.Actions {} abstract class traditional.Rewards <> extends incremental.Rewards {} ' traditional.Environment -.- traditional.Observations: yields ' traditional.Environment -.- traditional.Actions: receives ' traditional.Environment -.- traditional.Rewards: returns } package multi_task { abstract class MultiTaskSetting <> extends TaskIncrementalSetting, TraditionalSetting { == New assumptions (compared to Traditional) == + task_labels_at_train_time: Constant[bool] = True + task_labels_at_test_time: Constant[bool] = True == New assumptions (compared to TaskIncremental) == + stationary_context: Context[bool] = True == Inherited assumptions == # stationary_context: Context[bool] = True # task_labels_at_train_time: Constant[bool] = True # task_labels_at_test_time: Constant[bool] = True # clear_task_boundaries: Constant[bool] = True # known_task_boundaries_at_train_time: Constant[bool] = True # known_task_boundaries_at_test_time: Constant[bool] = True } abstract class multi_task.Environment <> extends task_incremental.Environment, traditional.Environment {} abstract class multi_task.Observations <> extends task_incremental.Observations, traditional.Observations {} abstract class multi_task.Actions <> extends task_incremental.Actions, traditional.Actions {} abstract class multi_task.Rewards <> extends task_incremental.Rewards, traditional.Rewards {} } } ' !include settings/base/base.puml ' remove settings.base ' !include gym.puml remove assumptions ' remove @unlinked remove class_incremental remove domain_incremental ' remove <> ' remove <> ' remove <> ' remove <> ' show context_assumption_family ' remove assumptions ' remove supervision_assumptions ' remove context_assumption_family ' remove <> ' remove <> ' remove sl ' remove cl ' remove rl ' show SLSetting ' show RLSetting ' remove <> ' hide empty fields ' hide empty methods ' ' remove gym ' remove gym.spaces ' ' remove cl ' remove class_incremental ' remove domain_incremental @enduml ================================================ FILE: sequoia/settings/assumptions/base.py ================================================ from sequoia.settings.base.bases import SettingABC from sequoia.utils.logging_utils import get_logger logger = get_logger(__name__) # IDEA: (@lebrice) Exploring the idea of using metaclasses to customize the isinstance # and subclass checks, so that it could be property-based. This is probably not worth it # though. # It's also quite dumb that we have to extend a metaclass from pytorch lightning! # class AssumptionMeta(_DataModuleWrapper): # def __instancecheck__(self, instance: Union[SettingABC, Any]): # logger.debug(f"InstanceCheck on assumption {self} for instance {instance}") # return super().__instancecheck__(instance) class AssumptionBase(SettingABC): pass ================================================ FILE: sequoia/settings/assumptions/classification.py ================================================ # TODO: Test if a `Protocol` task from the typing or typing-extensions module could be # used as an Assumption, based on the type of action space on the Setting, etc. # def num_classes_in_task(self, task_id: int, train: bool) -> Union[int, List[int]]: # """ Returns the number of classes in the given task. """ # increment = self.increment if train else self.test_increment # if isinstance(increment, list): # return increment[task_id] # return increment # def num_classes_in_current_task(self, train: bool = None) -> int: # """ Returns the number of classes in the current task. """ # # TODO: Its ugly to have the 'method' tell us if we're currently in # # train/eval/test, no? Maybe just make a method for each? # return self.num_classes_in_task(self._current_task_id, train=train) # def task_classes(self, task_id: int, train: bool) -> List[int]: # """ Gives back the 'true' labels present in the given task. """ # start_index = sum(self.num_classes_in_task(i, train) for i in range(task_id)) # end_index = start_index + self.num_classes_in_task(task_id, train) # if train: # return self.class_order[start_index:end_index] # else: # return self.test_class_order[start_index:end_index] # def current_task_classes(self, train: bool) -> List[int]: # """ Gives back the labels present in the current task. """ # return self.task_classes(self._current_task_id, train) ================================================ FILE: sequoia/settings/assumptions/context_discreteness.py ================================================ from dataclasses import dataclass from sequoia.utils.utils import constant, flag from .base import AssumptionBase @dataclass class ContinuousContextAssumption(AssumptionBase): # Wether we have clear boundaries between tasks, or if the transitions are smooth. # Equivalent to wether the context variable is discrete vs continuous. smooth_task_boundaries: bool = flag(True) @dataclass class DiscreteContextAssumption(ContinuousContextAssumption): # Wether we have clear boundaries between tasks, or if the transitions are smooth. # Equivalent to wether the context variable is discrete vs continuous. smooth_task_boundaries: bool = constant(False) ================================================ FILE: sequoia/settings/assumptions/context_visibility.py ================================================ from dataclasses import dataclass from sequoia.utils.utils import constant, flag from .base import AssumptionBase @dataclass class HiddenContextAssumption(AssumptionBase): # Wether the task labels are observable during training. task_labels_at_train_time: bool = flag(False) # Wether the task labels are observable during testing. task_labels_at_test_time: bool = flag(False) # Wether we get informed when reaching the boundary between two tasks during # training. known_task_boundaries_at_train_time: bool = flag(False) # Wether we get informed when reaching the boundary between two tasks during # testing. known_task_boundaries_at_test_time: bool = flag(False) @dataclass class PartiallyObservableContextAssumption(HiddenContextAssumption): # Wether the task labels are observable during training. task_labels_at_train_time: bool = constant(True) # Wether we get informed when reaching the boundary between two tasks during # training. known_task_boundaries_at_train_time: bool = constant(True) known_task_boundaries_at_test_time: bool = flag(True) @dataclass class FullyObservableContextAssumption(PartiallyObservableContextAssumption): # Wether the task labels are observable during testing. task_labels_at_test_time: bool = constant(True) # Wether we get informed when reaching the boundary between two tasks during # testing. known_task_boundaries_at_test_time: bool = constant(True) ================================================ FILE: sequoia/settings/assumptions/continual.py ================================================ import itertools import json import time from abc import ABC, abstractmethod from dataclasses import asdict, dataclass, field, is_dataclass from io import StringIO from pathlib import Path from typing import Any, ClassVar, Dict, Optional, Type import gym import tqdm from gym.vector.utils import batch_space from simple_parsing import field from simple_parsing.helpers.serialization.serializable import Serializable from torch import Tensor from wandb.wandb_run import Run import wandb from sequoia.common.config import Config, WandbConfig from sequoia.common.gym_wrappers.utils import IterableWrapper from sequoia.common.metrics import Metrics, MetricsType from sequoia.settings.base import Actions, Method from sequoia.settings.base.results import Results from sequoia.utils.logging_utils import get_logger from sequoia.utils.utils import add_prefix, flag from .base import AssumptionBase from .iid_results import TaskResults logger = get_logger(__name__) @dataclass class ContinualResults(TaskResults[MetricsType]): _runtime: Optional[float] = None _online_training_performance: Dict[int, MetricsType] = field(default_factory=dict) @property def online_performance(self) -> Dict[int, MetricsType]: """Returns the online training performance. In SL, this is only recorded over the first epoch. Returns ------- Dict[int, MetricType] a dictionary mapping from step number to the Metrics object produced at that step. """ if not self._online_training_performance: return {} return self._online_training_performance @property def online_performance_metrics(self) -> MetricsType: return sum(self.online_performance.values(), Metrics()) def to_log_dict(self, verbose: bool = False) -> Dict: log_dict = {} log_dict["Average Performance"] = super().to_log_dict(verbose=verbose) if self._online_training_performance: log_dict["Online Performance"] = self.online_performance_metrics.to_log_dict( verbose=verbose ) return log_dict def summary(self, verbose: bool = False) -> str: s = StringIO() print(json.dumps(self.to_log_dict(verbose=verbose), indent="\t"), file=s) s.seek(0) return s.read() @dataclass class ContinualAssumption(AssumptionBase): """Assumptions for Setting where the environments change over time.""" # Which dataset to use. # dataset: ClassVar[str] = "" known_task_boundaries_at_train_time: bool = flag(False) # Wether we get informed when reaching the boundary between two tasks during # training. Only used when `smooth_task_boundaries` is False. known_task_boundaries_at_test_time: bool = flag(False) # Wether we have sudden changes in the environments, or if the transition # are "smooth". smooth_task_boundaries: bool = flag(True) # Wether task labels are available at train time. # NOTE: Forced to True at the moment. task_labels_at_train_time: bool = flag(False) # Wether task labels are available at test time. task_labels_at_test_time: bool = flag(False) @dataclass(frozen=True) class Observations(AssumptionBase.Observations): task_labels: Optional[Tensor] = None @dataclass(frozen=True) class Actions(AssumptionBase.Actions): pass @dataclass(frozen=True) class Rewards(AssumptionBase.Rewards): pass # TODO: Move everything necessary to get ContinualRLSetting to work out of # Incremental and into this here. Makes no sense that ContinualRLSetting inherits # from Incremental, rather than this! Results: ClassVar[Type[ContinualResults]] = ContinualResults # Options related to Weights & Biases (wandb). Turned Off by default. Passing any of # its arguments will enable wandb. # NOTE: Adding `cmd=False` here, so we only create the args in `Experiment`. # TODO: Fix this up. wandb: Optional[WandbConfig] = field(default=None, compare=False, cmd=False) def main_loop(self, method: Method) -> ContinualResults: """Runs a continual learning training loop, wether in RL or CL.""" # TODO: Add ways of restoring state to continue a given run. if self.wandb and self.wandb.project: # Init wandb, and then log the setting's options. self.wandb_run = self.setup_wandb(method) method.setup_wandb(self.wandb_run) train_env = self.train_dataloader() valid_env = self.val_dataloader() logger.info(f"Starting training") method.set_training() self._start_time = time.process_time() method.fit( train_env=train_env, valid_env=valid_env, ) train_env.close() valid_env.close() logger.info(f"Finished Training.") results = self.test_loop(method) if self.monitor_training_performance: results._online_training_performance = train_env.get_online_performance() logger.info(f"Resulting objective of Test Loop: {results.objective}") self._end_time = time.process_time() runtime = self._end_time - self._start_time results._runtime = runtime logger.info(f"Finished main loop in {runtime} seconds.") self.log_results(method, results) return results def test_loop(self, method: Method) -> "IncrementalAssumption.Results": """WIP: Continual test loop.""" test_env = self.test_dataloader() test_env: TestEnvironment was_training = method.training method.set_testing() try: # If the Method has `test` defined, use it. method.test(test_env) test_env.close() test_env: TestEnvironment # Get the metrics from the test environment test_results: Results = test_env.get_results() except NotImplementedError: logger.debug( f"Will query the method for actions at each step, " f"since it doesn't implement a `test` method." ) obs = test_env.reset() # TODO: Do we always have a maximum number of steps? or of episodes? # Will it work the same for Supervised and Reinforcement learning? max_steps: int = getattr(test_env, "step_limit", None) # Reset on the last step is causing trouble, since the env is closed. pbar = tqdm.tqdm(itertools.count(), total=max_steps, desc="Test") episode = 0 for step in pbar: if obs is None: break # NOTE: The env might not be closed, while `obs` is actually still there. # if test_env.is_closed(): # logger.debug(f"Env is closed") # break # logger.debug(f"At step {step}") # BUG: Need to pass an action space that actually reflects the batch # size, even for the last batch! # BUG: This doesn't work if the env isn't batched. action_space = test_env.action_space batch_size = getattr(test_env, "num_envs", getattr(test_env, "batch_size", 0)) env_is_batched = batch_size is not None and batch_size >= 1 if env_is_batched: # NOTE: Need to pass an action space that actually reflects the batch # size, even for the last batch! obs_batch_size = obs.x.shape[0] if obs.x.shape else None action_space_batch_size = ( test_env.action_space.shape[0] if test_env.action_space.shape else None ) if obs_batch_size is not None and obs_batch_size != action_space_batch_size: action_space = batch_space(test_env.single_action_space, obs_batch_size) action = method.get_actions(obs, action_space) if test_env.is_closed(): break obs, reward, done, info = test_env.step(action) if done and not test_env.is_closed(): # logger.debug(f"end of test episode {episode}") obs = test_env.reset() episode += 1 test_env.close() test_results: Results = test_env.get_results() if wandb.run: d = add_prefix(test_results.to_log_dict(), prefix="Test", sep="/") # d = add_prefix(test_metrics.to_log_dict(), prefix="Test", sep="/") # d["current_task"] = task_id wandb.log(d) # Restore 'training' mode, if it was set at the start. if was_training: method.set_training() return test_results # return test_results # if not self.task_labels_at_test_time: # # TODO: move this wrapper to common/wrappers. # test_env = RemoveTaskLabelsWrapper(test_env) def setup_wandb(self, method: Method) -> Run: """Call wandb.init, log the experiment configuration to the config dict. This assumes that `self.wandb` is not None. This happens when one of the wandb arguments is passed. Parameters ---------- method : Method Method to be applied. """ assert isinstance(self.wandb, WandbConfig) method_name: str = method.get_name() setting_name: str = self.get_name() if not self.wandb.run_name: # Set the default name for this run. run_name = f"{method_name}-{setting_name}" dataset = getattr(self, "dataset", None) if isinstance(dataset, str): run_name += f"-{dataset}" if getattr(self, "nb_tasks", 0) > 1: run_name += f"_{self.nb_tasks}t" # type: ignore self.wandb.run_name = run_name run: Run = self.wandb.wandb_init() run.config["setting"] = setting_name # Add the setting's options into the config: setting_config_dict: Dict[str, Any] = {} if isinstance(self, Serializable): setting_config_dict = self.to_dict() elif is_dataclass(self): setting_config_dict = asdict(self) run.config.update({f"setting.{k}": v for k, v in setting_config_dict.items()}) run.config["method"] = method_name run.config["method_full_name"] = method.get_full_name() run.summary["setting"] = self.get_name() if isinstance(self.dataset, str): run.summary["dataset"] = self.dataset run.summary["method"] = method.get_name() assert wandb.run is run return run def log_results(self, method: Method, results: Results, prefix: str = "") -> None: """ TODO: Create the tabs we need to show up in wandb: 1. Final - Average "Current/Online" performance (scalar) - Average "Final" performance (scalar) - Runtime 2. Test - Task i (evolution over time (x axis is the task id, if possible)) """ logger.info(results.summary()) if wandb.run: wandb.summary["method"] = method.get_name() wandb.summary["setting"] = self.get_name() dataset = getattr(self, "dataset", "") if dataset and isinstance(dataset, str): wandb.summary["dataset"] = dataset results_dict = results.to_log_dict() if prefix: results_dict = add_prefix(results_dict, prefix=prefix, sep="/") wandb.log(results_dict) # BUG: Sometimes logging a matplotlib figure causes a crash: # File "/home/fabrice/miniconda3/envs/sequoia/lib/python3.8/site-packages/plotly/matplotlylib/mplexporter/utils.py", line 246, in get_grid_style # if axis._gridOnMajor and len(gridlines) > 0: # AttributeError: 'XAxis' object has no attribute '_gridOnMajor' # Seems to be fixed by downgrading the matplotlib version to 3.2.2 plots_dict = results.make_plots() if prefix: plots_dict = add_prefix(plots_dict, prefix=prefix, sep="/") wandb.log(plots_dict) # TODO: Finish the run here? Not sure this is right. # wandb.run.finish() @property def phases(self) -> int: """The number of training 'phases', i.e. how many times `method.fit` will be called. In the case of Continual and DiscreteTaskAgnostic, fit is only called once, with an environment that shifts between all the tasks. In Incremental, fit is called once per task, while in Traditional and MultiTask, fit is called once. """ return 1 from gym.vector import VectorEnv from sequoia.common.gym_wrappers.utils import EnvType class TestEnvironment(gym.wrappers.Monitor, IterableWrapper[EnvType], ABC): """Wrapper around a 'test' environment, which limits the number of steps and keeps tracks of the performance. """ def __init__( self, env: EnvType, directory: Path, step_limit: int = 1_000, # TODO: Remove this, use a dedicated wrapper for that. no_rewards: bool = False, config: Config = None, *args, **kwargs, ): super().__init__(env, directory, *args, **kwargs) # TODO: Need to stop re-creating the Monitor wrappers when we already have the list of envs # for each task! logger.info(f"Creating test env (Monitor) with log directory {self.directory}") self.step_limit = step_limit self.no_rewards = no_rewards self._steps = 0 self.config = config # if wandb.run: # wandb.gym.monitor() def step(self, action): self._before_step(action) # NOTE: Monitor wrapper from gym doesn't call `super().step`, so we have to # overwrite it here. observation, reward, done, info = IterableWrapper.step(self, action) done = self._after_step(observation, reward, done, info) return observation, reward, done, info def reset(self, **kwargs): self._before_reset() observation = IterableWrapper.reset(self, **kwargs) self._after_reset(observation) return observation @abstractmethod def get_results(self) -> Results: """Return how well the Method was applied on this environment. In RL, this would be based on the mean rewards, while in supervised learning it could be the average accuracy, for instance. Returns ------- Results [description] """ # TODO: Total reward over a number of steps? Over a number of episodes? # Average reward? What's the metric we care about in RL? rewards = self.get_episode_rewards() lengths = self.get_episode_lengths() total_steps = self.get_total_steps() return sum(rewards) / total_steps def step(self, action): # TODO: Its A bit uncomfortable that we have to 'unwrap' these here.. # logger.debug(f"Step {self._steps}") action_for_stats = action.y_pred if isinstance(action, Actions) else action self._before_step(action_for_stats) if isinstance(action, Tensor): action = action.cpu().numpy() observation, reward, done, info = self.env.step(action) observation_for_stats = observation.x reward_for_stats = reward.y # TODO: Always render when debugging? or only when the corresponding # flag is set in self.config? try: if self.config and self.config.render and self.config.debug: self.render("human") except NotImplementedError: pass if isinstance(self.env.unwrapped, VectorEnv): done = all(done) else: done = bool(done) done = self._after_step(observation_for_stats, reward_for_stats, done, info) if self.get_total_steps() >= self.step_limit: done = True self.close() # Remove the rewards if they aren't allowed. if self.no_rewards: reward = None return observation, reward, done, info TestEnvironment.__test__ = False ================================================ FILE: sequoia/settings/assumptions/discrete_results.py ================================================ import json from dataclasses import dataclass from io import StringIO from typing import ClassVar, Dict, Generic, List import matplotlib.pyplot as plt from simple_parsing.helpers import list_field from sequoia.common.metrics import Metrics from sequoia.settings.base.results import Results from .iid_results import MetricType, TaskResults @dataclass class TaskSequenceResults(Results, Generic[MetricType]): """Results obtained when evaluated on a sequence of (discrete) Tasks.""" task_results: List[TaskResults[MetricType]] = list_field() # For now, all the 'concrete' objectives (mean reward / episode in RL, accuracy in # SL) have higher => better lower_is_better: ClassVar[bool] = False def __post_init__(self): if self.task_results and isinstance(self.task_results[0], dict): self.task_results = [ TaskResults.from_dict(task_result, drop_extra_fields=False) for task_result in self.task_results ] @property def objective_name(self) -> str: return self.average_metrics.objective_name @property def num_tasks(self) -> int: """Returns the number of tasks. Returns ------- int Number of tasks. """ return len(self.task_results) @property def average_metrics(self) -> MetricType: return sum(self.average_metrics_per_task, Metrics()) @property def average_metrics_per_task(self) -> List[MetricType]: return [task_result.average_metrics for task_result in self.task_results] @property def objective(self) -> float: return self.average_metrics.objective def to_log_dict(self, verbose: bool = False) -> Dict: result = {} for task_id, task_results in enumerate(self.task_results): result[f"Task {task_id}"] = task_results.to_log_dict(verbose=verbose) result["Average"] = self.average_metrics.to_log_dict(verbose=verbose) return result def summary(self, verbose: bool = False): s = StringIO() print(json.dumps(self.to_log_dict(verbose=verbose), indent="\t"), file=s) s.seek(0) return s.read() def make_plots(self) -> Dict[str, plt.Figure]: result = {} for task_id, task_results in enumerate(self.task_results): result[f"Task {task_id}"] = task_results.make_plots() return result ================================================ FILE: sequoia/settings/assumptions/iid.py ================================================ """ IDEA: create the simple train loop for an IID setting (RL or CL). """ from dataclasses import dataclass from sequoia.utils.utils import constant from .task_incremental import TaskIncrementalAssumption # TODO: Import and use the `TaskResults` here. @dataclass class TraditionalSetting(TaskIncrementalAssumption): """Assumption (mixin) for Settings where the data is stationary (only one task). """ nb_tasks: int = constant(1) @property def phases(self) -> int: """The number of training 'phases', i.e. how many times `method.fit` will be called. Defaults to the number of tasks, but may be different, for instance in so-called Multi-Task Settings, this is set to 1. """ return 1 ================================================ FILE: sequoia/settings/assumptions/iid_results.py ================================================ """ Results for an IID experiment. """ from dataclasses import dataclass, field from typing import ClassVar, Dict, Generic, List, TypeVar import matplotlib.pyplot as plt from sequoia.common.metrics import Metrics from sequoia.settings.base.results import Results MetricType = TypeVar("MetricType", bound=Metrics) @dataclass class TaskResults(Results, Generic[MetricType]): """Results within a given Task. This is just a List of a given Metrics type, with additional methods. """ # For now, all the 'concrete' objectives (mean reward / episode in RL, accuracy in # SL) have higher => better lower_is_better: ClassVar[bool] = False metrics: List[MetricType] = field(default_factory=list) plots_dict: Dict[str, plt.Figure] = field(default_factory=dict) def __post_init__(self): if self.metrics and isinstance(self.metrics[0], dict): self.metrics = [ Metrics.from_dict(metrics, drop_extra_fields=False) for metrics in self.metrics ] def __str__(self) -> str: return f"{type(self).__name__}(average(metrics)={self.average_metrics})" def __repr__(self) -> str: return f"{type(self).__name__}(average(metrics)={self.average_metrics})" @property def average_metrics(self) -> MetricType: """Returns the average 'Metrics' object for this task.""" return sum(self.metrics, Metrics()) @property def objective(self) -> float: """Returns the main 'objective' value (a float) for this task. This value could be the average accuracy in SL, or the mean reward / episode in RL, depending on the type of Metrics stored in `self`. Returns ------- float A single float that describes how 'good' these results are. """ return self.average_metrics.objective @property def objective_name(self) -> str: # TODO: Add this objective_name attribute on Metrics return self.average_metrics.objective_name def __str__(self): return f"{type(self).__name__}({self.average_metrics})" def to_log_dict(self, verbose: bool = False) -> Dict: """Produce a dictionary that describes the results / metrics etc. Can be logged to console or to wandb using `wandb.log(results.to_log_dict())`. Parameters ---------- verbose : bool, optional Wether to include very detailed information. Defaults to `False`. Returns ------- Dict A dict mapping from str keys to either values or nested dicts of the same form. """ return self.average_metrics.to_log_dict(verbose=verbose) def summary(self) -> str: return str(self.to_log_dict()) def make_plots(self) -> Dict[str, plt.Figure]: """Produce a set of plots using the Metrics stored in this object. Returns ------- Dict[str, plt.Figure] Dict mapping from strings to matplotlib plots. """ # Could actually create plots here too. return self.plots_dict ================================================ FILE: sequoia/settings/assumptions/incremental.py ================================================ import itertools import time from abc import abstractmethod from dataclasses import dataclass from typing import ClassVar, Optional, Sequence, Type, Union import tqdm from gym.vector.utils.spaces import batch_space from simple_parsing import field from torch import Tensor from wandb.wandb_run import Run import wandb from sequoia.common.gym_wrappers.step_callback_wrapper import StepCallbackWrapper from sequoia.settings.base import Actions, Environment, Method, Results, Rewards, Setting from sequoia.utils.logging_utils import get_logger from sequoia.utils.utils import add_prefix, constant, flag from .continual import ContinualAssumption, TestEnvironment from .incremental_results import IncrementalResults, TaskSequenceResults logger = get_logger(__name__) @dataclass class IncrementalAssumption(ContinualAssumption): """Mixin that defines methods that are common to all 'incremental' settings, where the data is separated into tasks, and where you may not always get the task labels. Concretely, this holds the train and test loops that are common to the ClassIncrementalSetting (highest node on the Passive side) and ContinualRL (highest node on the Active side), therefore this setting, while abstract, is quite important. """ # Which dataset to use. # dataset: str Results: ClassVar[Type[Results]] = IncrementalResults @dataclass(frozen=True) class Observations(Setting.Observations): """Observations produced by an Incremental setting. Adds the 'task labels' to the base Observation. """ task_labels: Union[Optional[Tensor], Sequence[Optional[Tensor]]] = None # Wether we have clear boundaries between tasks, or if the transition is # smooth. smooth_task_boundaries: bool = constant(False) # constant for now. # Wether task labels are available at train time. # NOTE: Forced to True at the moment. task_labels_at_train_time: bool = flag(default=True) # Wether task labels are available at test time. task_labels_at_test_time: bool = flag(default=False) # Wether we get informed when reaching the boundary between two tasks during # training. Only used when `smooth_task_boundaries` is False. # TODO: Setting constant for now, but we could add task boundary detection # later on! known_task_boundaries_at_train_time: bool = constant(True) # Wether we get informed when reaching the boundary between two tasks during # training. Only used when `smooth_task_boundaries` is False. known_task_boundaries_at_test_time: bool = True # The number of tasks. By default 0, which means that it will be set # depending on other fields in __post_init__, or eventually be just 1. nb_tasks: int = field(5, alias=["n_tasks", "num_tasks"]) # Attributes (not parsed through the command-line): _current_task_id: int = field(default=0, init=False) def __post_init__(self): super().__post_init__() self.train_env: Environment = None # type: ignore self.val_env: Environment = None # type: ignore self.test_env: TestEnvironment = None # type: ignore self.wandb_run: Optional[Run] = None self._start_time: Optional[float] = None self._end_time: Optional[float] = None self._setting_logged_to_wandb: bool = False @property def phases(self) -> int: """The number of training 'phases', i.e. how many times `method.fit` will be called. Defaults to the number of tasks, but may be different, for instance in so-called Multi-Task Settings, this is set to 1. """ return self.nb_tasks @property def current_task_id(self) -> Optional[int]: """Get the current task id. TODO: Do we want to return None if the task labels aren't currently available? (at either Train or Test time?) Or if we 'detect' if this is being called from the method? TODO: This property doesn't really make sense in the Multi-Task SL or RL settings. """ return self._current_task_id @current_task_id.setter def current_task_id(self, value: int) -> None: """Sets the current task id.""" self._current_task_id = value def task_boundary_reached(self, method: Method, task_id: int, training: bool): known_task_boundaries = ( self.known_task_boundaries_at_train_time if training else self.known_task_boundaries_at_test_time ) task_labels_available = ( self.task_labels_at_train_time if training else self.task_labels_at_test_time ) if known_task_boundaries: # Inform the model of a task boundary. If the task labels are # available, then also give the id of the new task to the # method. # TODO: Should we also inform the method of wether or not the # task switch is occuring during training or testing? if not hasattr(method, "on_task_switch"): logger.warning( UserWarning( f"On a task boundary, but since your method doesn't " f"have an `on_task_switch` method, it won't know about " f"it! " ) ) elif not task_labels_available: method.on_task_switch(None) elif self.phases == 1: # NOTE: on_task_switch won't be called if there is only one task. pass else: method.on_task_switch(task_id) def main_loop(self, method: Method) -> IncrementalResults: """Runs an incremental training loop, wether in RL or CL.""" # TODO: Add ways of restoring state to continue a given run? # For each training task, for each test task, a list of the Metrics obtained # during testing on that task. # NOTE: We could also just store a single metric for each test task, but then # we'd lose the ability to create a plots to show the performance within a test # task. # IDEA: We could use a list of IIDResults! (but that might cause some circular # import issues) results = self.Results() if self.monitor_training_performance: results._online_training_performance = [] if self.wandb and self.wandb.project: # Init wandb, and then log the setting's options. self.wandb_run = self.setup_wandb(method) method.setup_wandb(self.wandb_run) # TODO: Fix this up, need to set the '_objective_scaling_factor' to a different # value depending on the 'dataset' / environment. results._objective_scaling_factor = self._get_objective_scaling_factor() method.set_training() self._start_time = time.process_time() for task_id in range(self.phases): logger.info( f"Starting training" + (f" on task {task_id}." if self.nb_tasks > 1 else ".") ) self.current_task_id = task_id self.task_boundary_reached(method, task_id=task_id, training=True) # Creating the dataloaders ourselves (rather than passing 'self' as # the datamodule): task_train_env = self.train_dataloader() task_valid_env = self.val_dataloader() method.fit( train_env=task_train_env, valid_env=task_valid_env, ) task_train_env.close() task_valid_env.close() if self.monitor_training_performance: results._online_training_performance.append(task_train_env.get_online_performance()) logger.info(f"Finished Training on task {task_id}.") test_metrics: TaskSequenceResults = self.test_loop(method) # Add a row to the transfer matrix. results.task_sequence_results.append(test_metrics) logger.info(f"Resulting objective of Test Loop: {test_metrics.objective}") if wandb.run: d = add_prefix(test_metrics.to_log_dict(), prefix="Test", sep="/") # d = add_prefix(test_metrics.to_log_dict(), prefix="Test", sep="/") d["current_task"] = task_id wandb.log(d) self._end_time = time.process_time() runtime = self._end_time - self._start_time results._runtime = runtime logger.info(f"Finished main loop in {runtime} seconds.") self.log_results(method, results) return results def test_loop(self, method: Method) -> "IncrementalAssumption.Results": """(WIP): Runs an incremental test loop and returns the Results. The idea is that this loop should be exactly the same, regardless of if you're on the RL or the CL side of the tree. NOTE: If `self.known_task_boundaries_at_test_time` is `True` and the method has the `on_task_switch` callback defined, then a callback wrapper is added that will invoke the method's `on_task_switch` and pass it the task id (or `None` if `not self.task_labels_available_at_test_time`) when a task boundary is encountered. This `on_task_switch` 'callback' wrapper gets added the same way for Supervised or Reinforcement learning settings. """ test_env = self.test_dataloader() test_env: TestEnvironment was_training = method.training method.set_testing() if self.known_task_boundaries_at_test_time and self.nb_tasks > 1: def _on_task_switch(step: int, *arg) -> None: # TODO: This attribute isn't on IncrementalAssumption itself, it's defined # on ContinualRLSetting. if step not in test_env.boundary_steps: return if not hasattr(method, "on_task_switch"): logger.warning( UserWarning( f"On a task boundary, but since your method doesn't " f"have an `on_task_switch` method, it won't know about " f"it! " ) ) return if self.task_labels_at_test_time: # TODO: Should this 'test boundary' step depend on the batch size? task_steps = sorted(test_env.boundary_steps) # TODO: If the ordering of tasks were different (shuffled # tasks for example), then this wouldn't work, we'd need a # list of the task ids or something like that. task_id = task_steps.index(step) logger.debug( f"Calling `method.on_task_switch({task_id})` " f"since task labels are available at test-time." ) method.on_task_switch(task_id) else: logger.debug( f"Calling `method.on_task_switch(None)` " f"since task labels aren't available at " f"test-time, but task boundaries are known." ) method.on_task_switch(None) test_env = StepCallbackWrapper(test_env, callbacks=[_on_task_switch]) # If the Method has `test` defined, use it. method.test(test_env) test_env: TestEnvironment # Get the metrics from the test environment test_results: TaskSequenceResults = test_env.get_results() # Restore 'training' mode, if it was set at the start. if was_training: method.set_training() return test_results # return test_results # if not self.task_labels_at_test_time: # # TODO: move this wrapper to common/wrappers. # test_env = RemoveTaskLabelsWrapper(test_env) @abstractmethod def train_dataloader( self, *args, **kwargs ) -> Environment["IncrementalAssumption.Observations", Actions, Rewards]: """Returns the DataLoader/Environment for the current train task.""" return super().train_dataloader(*args, **kwargs) @abstractmethod def val_dataloader( self, *args, **kwargs ) -> Environment["IncrementalAssumption.Observations", Actions, Rewards]: """Returns the DataLoader/Environment used for validation on the current task. """ return super().val_dataloader(*args, **kwargs) @abstractmethod def test_dataloader( self, *args, **kwargs ) -> Environment["IncrementalAssumption.Observations", Actions, Rewards]: """Returns the Test Environment (for all the tasks).""" return super().test_dataloader(*args, **kwargs) def _get_objective_scaling_factor(self) -> float: return 1.0 ================================================ FILE: sequoia/settings/assumptions/incremental_results.py ================================================ """ Results of an Incremental setting. """ import json import warnings from dataclasses import dataclass from io import StringIO from typing import ClassVar, Dict, Generic, List, Optional, Union import matplotlib.pyplot as plt import numpy as np from gym.utils import colorize from simple_parsing.helpers import list_field from simple_parsing.helpers.serialization import encode import wandb from sequoia.common.metrics import Metrics from sequoia.settings.base.results import Results from .discrete_results import TaskSequenceResults from .iid_results import MetricType, TaskResults @dataclass class IncrementalResults(Results, Generic[MetricType]): """Results for a whole train loop (transfer matrix). This class is basically just a 2d list of TaskResults objects, with some convenience methods and properties. We get one TaskSequenceResults (a 1d list of TaskResults objects) as a result of every test loop, which, in the Incremental Settings, happens after training on each task, hence why we get a nb_tasks x nb_tasks matrix of results. """ task_sequence_results: List[TaskSequenceResults[MetricType]] = list_field() min_runtime_hours: ClassVar[float] = 0.0 max_runtime_hours: ClassVar[float] = 12.0 def __post_init__(self): self._runtime: Optional[float] = None self._online_training_performance: Optional[List[Dict[int, Metrics]]] = None # Factor used to scale the 'objective' to a 'score' between 0 and 1. self._objective_scaling_factor: float = 1.0 @property def runtime_minutes(self) -> Optional[float]: return self._runtime / 60 if self._runtime is not None else None @property def runtime_hours(self) -> Optional[float]: return self._runtime / 3600 if self._runtime is not None else None @property def transfer_matrix(self) -> List[List[TaskResults]]: return [ task_sequence_result.task_results for task_sequence_result in self.task_sequence_results ] @property def metrics_matrix(self) -> List[List[MetricType]]: """Returns the 'transfer matrix' but with the average metrics for each task in each cell. NOTE: This is different from `transfer_matrix` since it returns the matrix of `TaskResults` objects (which are themselves lists of Metrics) Returns ------- List[List[MetricType]] 2d grid of average metrics for each task. """ return [ [task_results.average_metrics for task_results in task_sequence_result] for task_sequence_result in self ] @property def objective_matrix(self) -> List[List[float]]: """Return transfer matrix containing the value of the 'objective' for each task. The value at the index (i, j) gives the test performance on task j after having learned tasks 0-i. Returns ------- List[List[float]] The 2d matrix of objectives (floats). """ return [ [task_result.objective for task_result in task_sequence_result] for task_sequence_result in self.transfer_matrix ] @property def cl_score(self) -> float: """CL Score, as a weigted sum of three objectives: - The average final performance over all tasks - The average 'online' performance over all tasks - Runtime TODO: @optimass Determine the weights for each factor. Returns ------- float [description] """ # TODO: Determine the function to use to get a runtime score between 0 and 1. score = ( +0.30 * self._online_performance_score() + 0.40 * self._final_performance_score() + 0.30 * self._runtime_score() ) return score def _runtime_score(self) -> float: # TODO: function that takes the total runtime in seconds and returns a # normalized float score between 0 and 1. runtime_seconds = self._runtime if self._runtime is None: warnings.warn( RuntimeWarning( colorize( "Runtime is None! Returning runtime score of 0.\n (Make sure the " "Setting had its `monitor_training_performance` attr set to True!", color="red", ) ) ) return 0 runtime_hours = runtime_seconds / 3600 # Get the maximum runtime for this type of Results (and Setting) min_runtime_hours = type(self).min_runtime_hours max_runtime_hours = type(self).max_runtime_hours assert 0 <= min_runtime_hours < max_runtime_hours assert 0 < runtime_hours if runtime_hours <= min_runtime_hours: return 1.0 if max_runtime_hours <= runtime_hours: return 0.0 return 1 - ((runtime_hours - min_runtime_hours) / (max_runtime_hours - min_runtime_hours)) def _online_performance_score(self) -> float: """Function that takes the 'objective' of the Metrics from the average online performance, and returns a normalized float score between 0 and 1. """ objectives: List[float] = [ task_online_metric.objective for task_online_metric in self.online_performance_metrics ] return self._objective_scaling_factor * np.mean(objectives) # return self._objective_scaling_factor * self.average_online_performance.objective def _final_performance_score(self) -> float: """Function that takes the 'objective' of the Metrics from the average final performance, and returns a normalized float score between 0 and 1. """ objectives: List[float] = [ task_metric.objective for task_metric in self.final_performance_metrics ] return self._objective_scaling_factor * np.mean(objectives) # return self._objective_scaling_factor * self.average_final_performance.objective @property def objective(self) -> float: # return self.cl_score return self.average_final_performance.objective @property def num_tasks(self) -> int: return len(self.task_sequence_results) @property def online_performance(self) -> List[Dict[int, MetricType]]: """Returns the online training performance for each task. i.e. the diagonal of the transfer matrix. In SL, this is only recorded over the first epoch. Returns ------- List[Dict[int, MetricType]] A List containing, for each task, a dictionary mapping from step number to the Metrics object produced at that step. """ if not self._online_training_performance: return [{} for _ in range(self.num_tasks)] return self._online_training_performance # return [self[i][i] for i in range(self.num_tasks)] @property def online_performance_metrics(self) -> List[MetricType]: return [ sum(online_performance_dict.values(), Metrics()) for online_performance_dict in self.online_performance ] @property def final_performance(self) -> List[TaskResults[MetricType]]: return self.transfer_matrix[-1] @property def final_performance_metrics(self) -> List[MetricType]: return [task_result.average_metrics for task_result in self.final_performance] @property def average_online_performance(self) -> MetricType: return sum(self.online_performance_metrics, Metrics()) @property def average_final_performance(self) -> MetricType: return sum(self.final_performance_metrics, Metrics()) def to_log_dict(self, verbose: bool = False) -> Dict: log_dict = {} # TODO: This assumes that the metrics were stored in the right index for their # corresponding task. for task_id, task_sequence_result in enumerate(self.task_sequence_results): log_dict[f"Task {task_id}"] = task_sequence_result.to_log_dict(verbose=verbose) if self._online_training_performance: log_dict["Online Performance"] = { f"Task {task_id}": task_online_metrics.to_log_dict(verbose=verbose) for task_id, task_online_metrics in enumerate(self.online_performance_metrics) } log_dict.update( { "Final/Average Online Performance": self._online_performance_score(), "Final/Average Final Performance": self._final_performance_score(), "Final/Runtime (seconds)": self._runtime, "Final/CL Score": self.cl_score, } ) return log_dict def summary(self, verbose: bool = False): s = StringIO() log_dict = self.to_log_dict(verbose=verbose) log_dict_json = json.dumps(log_dict, indent="\t", default=encode) print(log_dict_json, file=s) s.seek(0) return s.read() def make_plots(self) -> Dict[str, Union[plt.Figure, Dict]]: plots = { f"Task {task_id}": task_sequence_result.make_plots() for task_id, task_sequence_result in enumerate(self.task_sequence_results) } axis_labels = [f"Task {task_id}" for task_id in range(self.num_tasks)] if wandb.run: plots["Transfer matrix"] = wandb.plots.HeatMap( x_labels=axis_labels, y_labels=axis_labels, matrix_values=self.objective_matrix, show_text=True, ) objective_array = np.asfarray(self.objective_matrix) perf_per_step = objective_array.mean(-1) table = wandb.Table( data=[[i + 1, perf] for i, perf in enumerate(perf_per_step)], columns=["# of learned tasks", "Average Test performance on all tasks"], ) plots["Test Performance"] = wandb.plot.line( table, x="# of learned tasks", y="Average Test performance on all tasks", title="Test Performance vs # of Learned tasks", ) return plots def __str__(self) -> str: return self.summary() ================================================ FILE: sequoia/settings/assumptions/incremental_test.py ================================================ from typing import List, Optional import gym import numpy as np from gym import Space from gym.vector.utils.spaces import batch_space from sequoia.methods import Method from sequoia.settings import Actions, Environment, Observations from .incremental import IncrementalAssumption, TestEnvironment class DummyMethod(Method, target_setting=IncrementalAssumption): """Dummy method used to check that the Setting calls `on_task_switch` with the right arguments. """ def __init__(self): self.n_task_switches = 0 self.n_fit_calls = 0 self.received_task_ids: List[Optional[int]] = [] self.received_while_training: List[bool] = [] self.train_steps_per_task: List[int] = [] self.train_episodes_per_task: List[int] = [] def fit(self, train_env: gym.Env = None, valid_env: gym.Env = None): self.n_fit_calls += 1 self.train_steps_per_task.append(0) self.train_episodes_per_task.append(0) obs = train_env.reset() for i in range(100): obs, reward, done, info = train_env.step(train_env.action_space.sample()) self.train_steps_per_task[-1] += 1 if done: self.train_episodes_per_task[-1] += 1 break def test(self, test_env: TestEnvironment): while not test_env.is_closed(): done = False obs = test_env.reset() while not done: actions = test_env.action_space.sample() obs, _, done, info = test_env.step(actions) def get_actions( self, observations: IncrementalAssumption.Observations, action_space: gym.Space ): return np.ones(action_space.shape) def on_task_switch(self, task_id: int = None): self.n_task_switches += 1 self.received_task_ids.append(task_id) self.received_while_training.append(self.training) class OtherDummyMethod(Method, target_setting=IncrementalAssumption): def __init__(self): self.batch_sizes: List[int] = [] def fit(self, train_env: Environment, valid_env: Environment): for i, batch in enumerate(train_env): if isinstance(batch, Observations): observations, rewards = batch, None else: assert isinstance(batch, tuple) and len(batch) == 2 observations, rewards = batch y_preds = train_env.action_space.sample() if rewards is None: action_space = train_env.action_space if train_env.action_space.shape: # This is a bit complicated, but it's needed because the last batch # might have a different batch dimension than the env's action # space, (only happens on the last batch in supervised learning). # TODO: Should we perhaps drop the last batch? action_space = train_env.action_space batch_size = getattr(train_env, "num_envs", getattr(train_env, "batch_size", 0)) env_is_batched = batch_size is not None and batch_size >= 1 if env_is_batched: # NOTE: Need to pass an action space that actually reflects the batch # size, even for the last batch! obs_batch_size = observations.x.shape[0] if observations.x.shape else None action_space_batch_size = ( train_env.action_space.shape[0] if train_env.action_space.shape else None ) if obs_batch_size is not None and obs_batch_size != action_space_batch_size: action_space = batch_space( train_env.single_action_space, obs_batch_size ) y_preds = action_space.sample() rewards = train_env.send(Actions(y_pred=y_preds)) def get_actions(self, observations: Observations, action_space: Space) -> Actions: # This won't work on weirder spaces. if action_space.shape: assert observations.x.shape[0] == action_space.shape[0] if getattr(observations.x, "shape", None): batch_size = 1 if observations.x.ndim > 1: batch_size = observations.x.shape[0] self.batch_sizes.append(batch_size) else: self.batch_sizes.append(0) # X isn't batched. return action_space.sample() ================================================ FILE: sequoia/settings/assumptions/task_incremental.py ================================================ from dataclasses import dataclass from sequoia.utils.utils import constant from .context_visibility import FullyObservableContextAssumption from .incremental import IncrementalAssumption @dataclass class TaskIncrementalAssumption(FullyObservableContextAssumption, IncrementalAssumption): """Assumption (mixin) for Settings where the task labels are available at both train and test time. """ task_labels_at_train_time: bool = constant(True) task_labels_at_test_time: bool = constant(True) ================================================ FILE: sequoia/settings/assumptions/task_type.py ================================================ from dataclasses import dataclass from typing import Union from torch import LongTensor, Tensor from sequoia.settings.base import Actions @dataclass(frozen=True) class ClassificationActions(Actions): """Typed dict-like class that represents the 'forward pass'/output of a classification head, which correspond to the 'actions' to be sent to the environment, in the general formulation. """ y_pred: Union[LongTensor, Tensor] logits: Tensor @property def action(self) -> LongTensor: return self.y_pred @property def y_pred_log_prob(self) -> Tensor: """returns the log probabilities for the chosen actions/predictions.""" return self.logits[:, self.y_pred] @property def y_pred_prob(self) -> Tensor: """returns the log probabilities for the chosen actions/predictions.""" return self.probabilities[self.y_pred] @property def probabilities(self) -> Tensor: """Returns the normalized probabilies for each class, i.e. the softmax-ed version of `self.logits`. """ return self.logits.softmax(-1) ================================================ FILE: sequoia/settings/base/__init__.py ================================================ from .bases import Method, SettingABC from .environment import Environment from .objects import Actions, ActionType, Observations, ObservationType, Rewards, RewardType from .results import Results from .setting import Setting, SettingType ================================================ FILE: sequoia/settings/base/base.puml ================================================ @startuml base !include gym.puml remove gym.spaces remove Wrapper hide empty members package sequoia as settings.base { ' namespace base.objects { together { together { abstract class Observations extends Batch { + x: Tensor } abstract class Actions extends Batch { + y_pred: Tensor } abstract class Rewards extends Batch { + y: Tensor } } Environment --* Observations: yields Environment --* Actions: receives Environment --* Rewards: returns interface Environment extends gym.Env, torch.DataLoader { + observation_space: Space + action_space: Space + reward_space: Space + step(Actions actions) -> Tuple[Observations, Rewards, bool, Dict] + reset() -> Observations } abstract class Results { + objective: float } interface SettingABC { -- static (class) attributes -- + {static} Results: Type[Results] + {static} Observations: Type[Observations] + {static} Actions: Type[Actions] + {static} Rewards: Type[Rewards] -- {abstract} + apply(Method): Results } ' TODO: Here we just show the most basic interface. abstract class Setting extends SettingABC, pytorch_lightning.LightningDataModule { -- static (class) attributes -- + {static} Results: Type[Results] + {static} Observations: Type[Observations] + {static} Actions: Type[Actions] + {static} Rewards: Type[Rewards] ' TODO: should we move this to `Setting` rather than SettingABC? -- inherited from LightningDataModule -- {abstract} + prepare_data() {abstract} + setup() {abstract} + train_dataloader() -> Environment {abstract} + val_dataloader() -> Environment {abstract} + test_dataloader() -> Environment == Abstract Method == {abstract} + apply(Method) -> Results } ' NOTE: Choose either of the following code blocks: ' ------------- remove Setting remove pytorch_lightning SettingABC -.left-> Environment : creates SettingABC -.-> Results : produces SettingABC -.-> Method : applies SettingABC <-.- Method : targets ' ----- OR ----- ' remove SettingABC ' Setting -.left-> Environment : creates ' Setting -.-> Results : produces ' Setting -.-> Method : applies ' Setting <-.- Method : targets ' ------------- } Method <-.-> Environment : interacts with abstract class Method { .. abstract static attributes .. {static} {abstract} target_setting: Type[S] .. abstract (required) methods .. {abstract} + fit(train_env: Environment, valid_env: Environment) {abstract} + get_actions(observations: Observations, action_space: Space) .. optional methods .. + configure(setting: S) + on_task_switch(task_id: Optional[int]) + test(test_env: Environment) ' - is_applicable(setting: SettingABC): bool } abstract class Model { + forward(input: Observations) -> Actions } Method -.- Model : ( can use ) } remove Batch @enduml ================================================ FILE: sequoia/settings/base/bases.py ================================================ """ This module defines the base classes for Settings and Methods. """ import json import traceback import typing from abc import ABC, abstractmethod from functools import partial from io import StringIO from pathlib import Path from typing import ( Any, ClassVar, Dict, Generic, Iterable, List, Mapping, Optional, Set, Tuple, Type, TypeVar, Union, ) import gym from gym.utils import colorize from pytorch_lightning import LightningDataModule from wandb.wandb_run import Run import wandb if typing.TYPE_CHECKING: from sequoia.common.config.config import Config from sequoia.settings.base.environment import Environment from sequoia.settings.base.objects import Actions, Observations, Rewards from sequoia.settings.base.results import Results from sequoia.utils.logging_utils import get_logger from sequoia.utils.parseable import Parseable from sequoia.utils.utils import ( camel_case, compute_identity, flatten_dict, get_path_to_source_file, remove_suffix, ) logger = get_logger(__name__) class SettingABC: """Abstract base class for a Setting. This just shows the minimal API. For more info, see the `Setting` class, which is the concrete implementation of this class, and the 'root' of the tree. Abstract (required) methods: - **apply** Applies a given Method on this setting to produce Results. "Abstract"-ish (required) class attributes: - `Results`: The class of Results that are created when applying a Method on this setting. - `Observations`: The type of Observations that will be produced in this setting. - `Actions`: The type of Actions that are expected from this setting. - `Rewards`: The type of Rewards that this setting will (potentially) return upon receiving an action from the method. """ Results: ClassVar[Type[Results]] = Results Observations: ClassVar[Type[Observations]] = Observations Actions: ClassVar[Type[Actions]] = Actions Rewards: ClassVar[Type[Rewards]] = Rewards @abstractmethod def apply(self, method: "Method", config: "Config" = None) -> "SettingABC.Results": """Applies a Method on this experimental Setting to produce Results. Defines the training/evaluation procedure specific to this Setting. The training/evaluation loop can be defined however you want, as long as it respects the following constraints: 1. This method should always return either a float or a Results object that indicates the "performance" of this method on this setting. 2. More importantly: You **have** to make sure that you do not break compatibility with more general methods targetting a parent setting! It should always be the case that all methods designed for any of this Setting's parents should also be applicable via polymorphism, i.e., anything that is defined to work on the class `Animal` should also work on the class `Cat`! 3. While not enforced, it is strongly encourged that you define your training/evaluation routines at a pretty high level, so that Methods that get applied to your Setting can make use of pytorch-lightning's `Trainer` & `LightningDataModule` API to be neat and fast. Parameters ---------- method : Method A Method to apply on this Setting. config : Optional[Config] Optional configuration object with things like the log dir, the data dir, cuda, wandb config, etc. When None, will be parsed from the current command-line arguments. Returns ------- Results An object that is used to measure or quantify the performance of the Method on this experimental Setting. """ raise NotImplementedError() @abstractmethod def prepare_data(self, *args, **kwargs): pass @abstractmethod def setup(self, stage: Optional[str] = None): pass @abstractmethod def train_dataloader(self, *args, **kwargs) -> Environment[Observations, Actions, Rewards]: pass @abstractmethod def val_dataloader(self, *args, **kwargs) -> Environment[Observations, Actions, Rewards]: pass @abstractmethod def test_dataloader(self, *args, **kwargs) -> Environment[Observations, Actions, Rewards]: pass @classmethod @abstractmethod def get_available_datasets(cls) -> Iterable[str]: """Returns an iterable of the names of available datasets.""" # --- Below this are some class attributes and methods related to the Tree. --- # These are some "private" class attributes. # For any new Setting subclass, it's parent setting. _parent: ClassVar[Type["SettingABC"]] = None # A list of all the direct children of this setting. _children: ClassVar[Set[Type["SettingABC"]]] = set() # List of all methods that directly target this Setting. _targeted_methods: ClassVar[Set[Type["Method"]]] = set() def __init_subclass__(cls, **kwargs): """Called whenever a new subclass of `Setting` is declared.""" # logger.debug(f"Registering a new setting: {cls.get_name()}") # Exceptionally, create this new empty list that will hold all the # forthcoming subclasses of this particular new setting. cls._children = set() cls._targeted_methods = set() # Inform the immediate parents in the tree that they have a new child. for immediate_parent in cls.get_immediate_parents(): immediate_parent._children.add(cls) super().__init_subclass__(**kwargs) @classmethod def get_applicable_methods(cls) -> List[Type["Method"]]: """Returns all the Methods applicable on this Setting.""" applicable_methods: List[Method] = [] from sequoia.methods import get_all_methods for method_type in get_all_methods(): if method_type.is_applicable(cls): applicable_methods.append(method_type) return applicable_methods @classmethod def register_method(cls, method: Type["Method"]): """Register a method as being Applicable on this type of Setting.""" cls._targeted_methods.add(method) @classmethod def get_name(cls) -> str: """Gets the name of this Setting.""" # LightningDataModule has a `name` class attribute of `...`! if getattr(cls, "name", None) != Ellipsis: return cls.name name = camel_case(cls.__qualname__) return remove_suffix(name, "_setting") @classmethod def immediate_children(cls) -> Iterable[Type["SettingABC"]]: """Returns the immediate children of this Setting in the hierarchy. In most cases, this will be a list with only one value. """ yield from cls._children @classmethod def get_immediate_children(cls) -> List[Type["SettingABC"]]: """Returns a list of the immediate children of this Setting.""" return list(cls.immediate_children()) @classmethod def children(cls) -> Iterable[Type["SettingABC"]]: """Returns an Iterator over all the children of this Setting, in-order.""" # Yield the immediate children. for child in cls._children: yield child # Yield from the children themselves. yield from child.children() @classmethod def get_children(cls) -> List[Type["SettingABC"]]: return list(cls.children()) @classmethod def immediate_parents(cls) -> List[Type["SettingABC"]]: """Returns the immediate parent(s) Setting(s). In most cases, this will be a list with only one value. """ return [parent for parent in cls.__bases__ if issubclass(parent, SettingABC)] @classmethod def get_immediate_parents(cls) -> List[Type["SettingABC"]]: """Returns the immediate parent(s) Setting(s). In most cases, this will be a list with only one value. """ return cls.immediate_parents() @classmethod def parents(cls) -> Iterable[Type["SettingABC"]]: """yields the lineage, from bottom to top. NOTE: In the case of Settings having multiple parents (such as TraditionalSLSetting), this is still just a list that reflects the method resolution order for that setting. """ return [ parent_class for parent_class in cls.mro()[1:] if issubclass(parent_class, SettingABC) ] @classmethod def get_parents(cls) -> List[Type["SettingABC"]]: return list(cls.parents()) @classmethod def get_path_to_source_file(cls: Type) -> Path: from sequoia.utils.utils import get_path_to_source_file return get_path_to_source_file(cls) @classmethod def get_tree_string( cls, formatting: str = "command_line", with_methods: bool = False, with_assumptions: bool = False, with_docstrings: bool = False, ) -> str: """Returns a string representation of the tree starting at this node downwards.""" from sequoia.utils.readme import get_tree_string, get_tree_string_markdown formatting_functions = { "command_line": get_tree_string, "markdown": get_tree_string_markdown, } if formatting not in formatting_functions.keys(): raise RuntimeError( f"formatting must be one of {','.join(formatting_functions)}, " f"got {formatting}" ) return formatting_functions[formatting]( cls, with_methods=with_methods, with_assumptions=with_assumptions, with_docstrings=with_docstrings, ) SettingType = TypeVar("SettingType", bound=SettingABC) class Method(Generic[SettingType], Parseable, ABC): """ABC for a Method, which is a solution to a research problem (a Setting).""" # Class attribute that holds the setting this method was designed to target. # Needs to either be passed to the class statement or set as a class # attribute. target_setting: ClassVar[Type[SettingType]] = None _training: bool def configure(self, setting: SettingType) -> None: """Configures this method before it gets applied on the given Setting. Args: setting (SettingType): The setting the method will be evaluated on. """ @abstractmethod def get_actions( self, observations: Observations, action_space: gym.Space ) -> Union[Actions, Any]: """Get a batch of predictions (actions) for the given observations. returned actions must fit the action space. """ @abstractmethod def fit( self, train_env: Environment[Observations, Actions, Rewards], valid_env: Environment[Observations, Actions, Rewards], ): """Called by the Setting to give the method data to train with. Might be called more than once before training is 'complete'. """ def test(self, test_env: Environment[Observations, Actions, Optional[Rewards]]): """(WIP) Optional method which could be called by the setting to give your Method more flexibility about how it wants to arrange the test env. Parameters ---------- test_env : Environment[Observations, Actions, Optional[Rewards]] Test environment which monitors your actions, and in which you are only allowed a limited number of steps. """ import tqdm pbar = tqdm.tqdm(desc="Testing") postfix = {} steps = 0 episodes = 0 while not test_env.is_closed(): observations = test_env.reset() done = False episode_steps = 0 while not (done or test_env.is_closed()): actions = self.get_actions(observations, action_space=test_env.action_space) observations, rewards, done, info = test_env.step(actions) steps += 1 episode_steps += 1 postfix.update(steps=steps, episode_steps=episode_steps) pbar.set_postfix(postfix) pbar.update() episodes += 1 postfix.update(episodes=episodes) pbar.close() def receive_results(self, setting: SettingType, results: Results) -> None: """Receive the Results of applying this method on the given Setting. This method is optional. This will be called after the method has been successfully applied to a Setting, and could be used to log or persist the results somehow. Parameters ---------- results : Results The `Results` object constructed by `setting`, as a result of applying this Method to it. """ run_name = "" # Set the default name for this run. # run_name = f"{method_name}-{setting_name}" # dataset = getattr(self, "dataset", None) # if isinstance(dataset, str): # run_name += f"-{dataset}" # if getattr(self, "nb_tasks", 0) > 1: # run_name += f"_{self.nb_tasks}t" setting_name = setting.get_name() method_name = self.get_name() base_results_dir: Path = setting.config.log_dir / setting_name / method_name dataset_name = getattr(setting, "dataset", None) if isinstance(dataset_name, str): base_results_dir /= dataset_name if wandb.run and wandb.run.id: # if setting.wandb and setting.wandb.project: run_id = wandb.run.id assert isinstance(run_id, str) # results_dir = base_results_dir / run_id # TODO: Fix this: results_dir = wandb.run.dir else: for suffix in [f"run_{i}" for i in range(100)]: results_dir = base_results_dir / suffix try: results_dir.mkdir(exist_ok=False, parents=True) except FileExistsError: pass else: break else: raise RuntimeError( f"Unable to create a unique results dir under {base_results_dir} " ) results_dir = Path(results_dir) logger.info(f"Saving results in directory {results_dir}") results_json_path = results_dir / "results.json" try: with open(results_json_path, "w") as f: json.dump(results.to_log_dict(), f) except Exception as e: print(f"Unable to save the results: {e}") setting_path = results_dir / "setting.yaml" try: setting.save(setting_path) except Exception as e: print(f"Unable to save the Setting: {e}") method_path = results_dir / "method.yaml" try: self.save(method_path) except Exception as e: print(f"Unable to save the Method: {e}") if wandb.run: wandb.save(str(results_json_path)) if setting_path.exists(): wandb.save(str(setting_path)) if method_path.exists(): wandb.save(str(method_path)) def setup_wandb(self, run: Run) -> None: """Called by the Setting when using Weights & Biases, after `wandb.init`. This method is here to provide Methods with the opportunity to log some of their configuration options or hyper-parameters to wandb. NOTE: The Setting has already set the `"setting"` entry in the `wandb.config` by this point. Parameters ---------- run : wandb.Run Current wandb Run. """ def set_training(self) -> None: """Called by the Setting to let the Method know it is in the "training" phase. By default, this will try to to look for any nn.Module attributes on `self`, and call their `train()` method. """ self._training = True try: from torch import nn for attribute, value in vars(self).items(): if isinstance(value, nn.Module): logger.debug(f"Calling 'train()' on the Method's {attribute} attribute.") value.train() except Exception as exc: logger.warning(f"Unable to call `train()` on nn.Modules of the Method: {exc}") def set_testing(self) -> None: """Called by the Setting to let the Method know when it is in "testing" phase. By default, this will try to to look for any nn.Module attributes on `self`, and call their `eval()` method. """ self._training = False try: from torch import nn for attribute, value in vars(self).items(): if isinstance(value, nn.Module): logger.debug(f"Calling 'eval()' on the Method's {attribute} attribute.") value.eval() except Exception as exc: logger.warning(f"Unable to call `eval()` on nn.Modules of the Method: {exc}") @property def training(self) -> bool: """Wether we're currently in the 'training' phase. Returns ------- bool Wether we're in the 'training' phase or not. """ return getattr(self, "_training", True) @property def testing(self) -> bool: """Wether we're currently in the 'testing' phase. Returns ------- bool Wether we're in the 'testing' phase or not. """ return not self.training # -------- # Below this are some class attributes and methods related to the Tree # structure and for launching Experiments using this method. # -------- @classmethod def main(cls, argv: Optional[Union[str, List[str]]] = None) -> Results: """Run an Experiment from the command-line using this method. (TODO: @lebrice Finish writing a good docstring here that explains how this works and how to use it.) You can then select which setting, dataset, etc. this method will be applied to using the --setting , and the rest of the arguments will be passed to the Setting's from_args method. """ from sequoia.main import Experiment experiment: Experiment # Create the Method object from the command-line: method = cls.from_args(argv, strict=False) # Then create the 'Experiment' from the command-line, which makes it # possible to choose between all the settings. experiment = Experiment.from_args(argv, strict=False) # Set the method attribute to be the one parsed above. experiment.method = method results: Results = experiment.launch(argv) return results @classmethod def is_applicable(cls, setting: Union[SettingType, Type[SettingType]]) -> bool: """Returns wether this Method is applicable to the given setting. A method is applicable on a given setting if and only if the setting is the method's target setting, or if it is a descendant of the method's target setting (below the target setting in the tree). Concretely, since the tree is implemented as an inheritance hierarchy, a method is applicable to any setting which is an instance (or subclass) of its target setting. Args: setting (SettingABC): a Setting. Returns: bool: Wether or not this method is applicable on the given setting. """ # if given an object, get it's type. if isinstance(setting, LightningDataModule): setting = type(setting) if not issubclass(setting, SettingABC) and issubclass(setting, LightningDataModule): # TODO: If we're trying to check if this method would be compatible # with a LightningDataModule, rather than a Setting, then we treat # that LightningModule the same way we would an TraditionalSLSetting. # i.e., if we're trying to apply a Method on something that isn't in # the tree, then we consider that datamodule as the TraditionalSLSetting node. from sequoia.settings import TraditionalSLSetting setting = TraditionalSLSetting return issubclass(setting, cls.target_setting) @classmethod def get_applicable_settings(cls) -> List[Type[SettingType]]: """Returns all settings on which this method is applicable. NOTE: This only returns 'concrete' Settings. """ from sequoia.settings import all_settings return list(filter(cls.is_applicable, all_settings)) # This would return ALL the setting: # return list([cls.target_setting, *cls.target_setting.children()]) @classmethod def all_evaluation_settings(cls, **kwargs) -> Iterable[SettingType]: """Generator over all the combinations of Settings/datasets on which this method is applicable. If keyword arguments are passed, they will be passed to the constructor of each setting. """ for setting_type in cls.get_applicable_settings(): for dataset in setting_type.get_available_datasets(): setting = setting_type(dataset=dataset, **kwargs) yield setting @classmethod def get_name(cls) -> str: """Gets the name of this method class.""" name = getattr(cls, "name", None) if name is None: name = camel_case(cls.__qualname__) name = remove_suffix(name, "_method") return name @classmethod def get_family(cls) -> Optional[str]: """Gets the name of the 'family' of Methods which contains this method class. This is used to differentiate methods with the same name, for instance sb3/DQN versus pl_bolts/DQN, sequoia/EWC vs avalanche/EWC, etc. """ return getattr(cls, "family", None) @classmethod def get_full_name(cls) -> str: """Gets the 'full name' of a method, which is the "{family}.{name}" if the family is set, and just the name otherwise. The full name is used as the option on the command-line. """ name = cls.get_name() family = cls.get_family() return f"{family}.{name}" if family is not None else name def __init_subclass__(cls, target_setting: Type[SettingType] = None, **kwargs) -> None: """Called when creating a new subclass of Method. Args: target_setting (Type[Setting], optional): The target setting. Defaults to None, in which case the method will inherit the target setting of it's parent class. """ if target_setting: cls.target_setting = target_setting elif getattr(cls, "target_setting", None): target_setting = cls.target_setting else: raise RuntimeError( f"You must either pass a `target_setting` argument to the " f"class statement or have a `target_setting` class variable " f"when creating a new subclass of {__class__}." ) # Register this new method on the Setting. target_setting.register_method(cls) return super().__init_subclass__(**kwargs) @classmethod def get_path_to_source_file(cls) -> Path: return get_path_to_source_file(cls) def get_experiment_name(self, setting: SettingABC, experiment_id: str = None) -> str: """Gets a unique name for the experiment where `self` is applied to `setting`. This experiment name will be passed to `orion` when performing a run of Hyper-Parameter Optimization. Parameters ---------- - setting : Setting The `Setting` onto which this method will be applied. This method will be used when - experiment_id: str, optional A custom hash to append to the experiment name. When `None` (default), a unique hash will be created based on the values of the Setting's fields. Returns ------- str The name for the experiment. """ if not experiment_id: setting_dict = setting.to_dict() # BUG: Some settings have non-string keys/value or something? d = flatten_dict(setting_dict) experiment_id = compute_identity(size=5, **d) assert isinstance(setting.dataset, str), "assuming that dataset is a str for now." return f"{self.get_name()}-{setting.get_name()}_{setting.dataset}_{experiment_id}" def get_search_space(self, setting: SettingABC) -> Mapping[str, Union[str, Dict]]: """Returns the search space to use for HPO in the given Setting. Parameters ---------- setting : Setting The Setting on which the run of HPO will take place. Returns ------- Mapping[str, Union[str, Dict]] An orion-formatted search space dictionary, mapping from hyper-parameter names (str) to their priors (str), or to nested dicts of the same form. """ raise NotImplementedError( "You need to provide an implementation for the `get_search_space` method " "in order to enable HPO sweeps." ) def adapt_to_new_hparams(self, new_hparams: Dict[str, Any]) -> None: """Adapts the Method when it receives new Hyper-Parameters to try for a new run. It is required that this method be implemented if you want to perform HPO sweeps with Orion. NOTE: It is very strongly recommended that you always re-create your model and any modules / components that depend on these hyper-parameters inside the `configure` method! (Otherwise these new hyper-parameters will not be used in the next run) Parameters ---------- new_hparams : Dict[str, Any] The new hyper-parameters being recommended by the HPO algorithm. These will have the same structure as the search space. """ raise NotImplementedError( "You need to provide an implementation for the `adapt_to_new_hparams` " "method in order to enable HPO sweeps." ) def hparam_sweep( self, setting: SettingABC, search_space: Dict[str, Union[str, Dict]] = None, experiment_id: str = None, database_path: Union[str, Path] = None, max_runs: int = None, hpo_algorithm: Union[str, Dict] = "BayesianOptimizer", debug: bool = False, ) -> Tuple[Dict, float]: """Performs a Hyper-Parameter Optimization sweep using orion. Changes the values in `self.hparams` iteratively, returning the best hparams found so far. Parameters ---------- setting : Setting Setting to run the sweep on. search_space : Dict[str, Union[str, Dict]], optional Search space of the hyper-parameter optimization algorithm. Defaults to `None`, in which case the result of the `get_search_space` method is used. experiment_id : str, optional Unique Id to use when creating the experiment in Orion. Defaults to `None`, in which case a hash of the `setting`'s fields is used. database_path : Union[str, Path], optional Path to a pickle file to be used by Orion to store the hyper-parameters and their corresponding values. Default to `None`, in which case the database is created at path `./orion_db.pkl`. max_runs : int, optional Maximum number of runs to perform. Defaults to `None`, in which case the run lasts until the search space is exhausted. hpo_algorithm : Union[str, Dict], optional The hyper-parameter optimization algorithms to use. debug : bool, optional Wether to run Orion in debug-mode, where the database is an EphemeralDb, meaning it gets created for the sweep and destroyed at the end of the sweep. Returns ------- Tuple[BaseModel.HParams, float] Best HParams, and the corresponding performance. """ try: from orion.client import build_experiment from orion.core.worker.trial import Trial except ImportError as e: raise RuntimeError( f"Need to install the optional dependencies for HPO, using " f"`pip install -e .[hpo]` (error: {e})" ) from e search_space = search_space or self.get_search_space(setting) logger.info("HPO Search space:\n" + json.dumps(search_space, indent="\t")) database_path: Path = Path(database_path or "./orion_db.pkl") logger.info(f"Will use database at path '{database_path}'.") experiment_name = self.get_experiment_name(setting, experiment_id=experiment_id) experiment = build_experiment( name=experiment_name, space=search_space, debug=debug, algorithms=hpo_algorithm, max_trials=max_runs, storage={ "type": "legacy", "database": {"type": "pickleddb", "host": str(database_path)}, }, ) previous_trials: List[Trial] = experiment.fetch_trials_by_status("completed") # Since Orion works in a 'lower is better' fashion, so if the `objective` of the # Results class for the given Setting have "higher is better", we negate the # objectives when extracting them and again before submitting them to Orion. lower_is_better = setting.Results.lower_is_better sign = 1 if lower_is_better else -1 if previous_trials: logger.info( f"Using existing Experiment {experiment} which has " f"{len(previous_trials)} existing trials." ) else: logger.info(f"Created new experiment with name {experiment_name}") trials_performed = 0 failed_trials = 0 red = partial(colorize, color="red") green = partial(colorize, color="green") while not (experiment.is_done or failed_trials == 3): # Get a new suggestion of hparams to try: trial: Trial = experiment.suggest() # --------- # (Re)create the Model with the suggested Hparams values. # --------- new_hparams: Dict = trial.params # Inner function, just used to make the code below a bit simpler. # TODO: We should probably also change some values in the Config (e.g. # log_dir, checkpoint_dir, etc) between runs. logger.info("Suggested values for this run:\n" + json.dumps(new_hparams, indent="\t")) self.adapt_to_new_hparams(new_hparams) # --------- # Evaluate the (adapted) method on the setting: # --------- try: result: Results = setting.apply(self) except Exception: logger.error(red("Encountered an error, this trial will be dropped:")) logger.error(red("-" * 60)) with StringIO() as s: traceback.print_exc(file=s) s.seek(0) logger.error(red(s.read())) logger.error(red("-" * 60)) failed_trials += 1 logger.error(red(f"({failed_trials} failed trials so far). ")) experiment.release(trial) else: # Report the results to Orion: orion_result = dict( name=result.objective_name, type="objective", value=sign * result.objective, ) experiment.observe(trial, [orion_result]) trials_performed += 1 logger.info( green( f"Trial #{trials_performed}: {result.objective_name} = {result.objective}" ) ) # Receive the results, maybe log to wandb, whatever you wanna do. self.receive_results(setting, result) logger.info( "Experiment statistics: \n" + "\n".join(f"\t{key}: {value}" for key, value in experiment.stats.items()) ) logger.info(f"Number of previous trials: {len(previous_trials)}") logger.info(f"Trials successfully completed by this worker: {trials_performed}") logger.info(f"Failed Trials attempted by this worker: {failed_trials}") if "best_trials_id" not in experiment.stats: raise RuntimeError("Can't find the best trial, experiment might be broken!") best_trial: Trial = experiment.get_trial(uid=experiment.stats["best_trials_id"]) best_hparams = best_trial.params best_objective = best_trial.objective return best_hparams, best_objective ================================================ FILE: sequoia/settings/base/environment.py ================================================ """Defines the Abstract Base class for an "Environment". NOTE (@lebrice): This 'Environment' abstraction isn't super useful at the moment because there's only the `ActiveDataLoader` that fits this interface (since we can't send anything to the usual DataLoader). """ from abc import ABC from typing import Generic import gym from sequoia.utils.logging_utils import get_logger from .objects import ActionType, ObservationType, RewardType logger = get_logger(__name__) from abc import abstractmethod class Environment( gym.Env, Generic[ObservationType, ActionType, RewardType], ABC, ): """ABC for a learning 'environment' in *both* Supervised and Reinforcement Learning. Different settings can implement this interface however they want. """ reward_space: gym.Space # @abstractmethod def is_closed(self) -> bool: """Returns wether this environment is closed.""" if hasattr(self, "env") and hasattr(self.env, "is_closed"): return self.env.is_closed() raise NotImplementedError(self) ================================================ FILE: sequoia/settings/base/objects.py ================================================ from dataclasses import dataclass from typing import Generic, TypeVar import numpy as np from torch import Tensor from sequoia.common import Batch @dataclass(frozen=True) class Observations(Batch): """A batch of "observations" coming from an Environment.""" x: Tensor @property def state(self) -> Tensor: return self.x def __len__(self) -> int: return self.batch_size @dataclass(frozen=True) class Actions(Batch): """A batch of "actions" coming from an Environment. For example, in a supervised setting, this would be the predicted labels, while in an RL setting, this would be the next 'actions' to take in the Environment. """ y_pred: Tensor @property def actions(self) -> Tensor: return self.y_pred @property def actions_np(self) -> np.ndarray: """Returns the prediction/action as a numpy array.""" if isinstance(self.y_pred, Tensor): return self.y_pred.detach().cpu().numpy() return np.asarray(self.y_pred) @property def predictions(self) -> Tensor: return self.y_pred T = TypeVar("T") @dataclass(frozen=True) class Rewards(Batch, Generic[T]): """A batch of "rewards" coming from an Environment. For example, in a supervised setting, this would be the true labels, while in an RL setting, this would be the 'reward' for a state-action pair. TODO: Maybe add the task labels as a part of the 'Reward', to help with the training of task-inference methods later on when we add those. """ # TODO: Rename this to 'reward', and add a 'y' field in the 'DenseRewards' class. y: T @property def labels(self) -> T: return self.y @property def reward(self) -> T: return self.y ObservationType = TypeVar("ObservationType", bound=Observations) ActionType = TypeVar("ActionType", bound=Actions) RewardType = TypeVar("RewardType", bound=Rewards) ================================================ FILE: sequoia/settings/base/results.py ================================================ """In the current setup, `Results` objects are created by a Setting when a method is applied to them. Each setting can define its own type of `Results` to customize what the ‘objective’ is in that particular setting. For instance, the TaskIncrementalSLSetting class also defines a TaskIncrementalResults class, where the average accuracy across all tasks is the objective. We currently have a unit testing setup that, for a given Method class, performs a quick run of training / testing (using the --fast_dev_run option from Pytorch-Lightning). In those tests, there is also a `validate_results` function, which is basically used to make sure that the results make sense, for the given method and setting. For instance, when testing a RandomBaselineMethod on an TraditionalSLSetting, the accuracy should be close to chance level. Likewise, in the `baseline_test.py` file, we make sure that the BaseMethod (just a classifier, no CL adjustments) also exhibits catastrophic forgetting when applied on a Class or Task Incremental Setting. """ from abc import ABC, abstractmethod from dataclasses import dataclass from functools import total_ordering from pathlib import Path from typing import Any, ClassVar, Dict, TypeVar, Union import matplotlib.pyplot as plt from simple_parsing import Serializable from sequoia.utils.logging_utils import get_logger logger = get_logger(__name__) @dataclass @total_ordering class Results(Serializable, ABC): """Represents the results of an experiment. Here you can define what the quantity to maximize/minize is. This class should also be used to create the plots that will be helpful to understand and compare different results. TODO: Add wandb logging here somehow. """ lower_is_better: ClassVar[bool] = False # Name for the 'objective'. objective_name: ClassVar[str] = "Objective" @property @abstractmethod def objective(self) -> float: """Returns a float value that indicating how "good" this result is. If the `lower_is_better` class variable is set to `False` (default), then this """ raise NotImplementedError("Each Result subclass should implement this.") @abstractmethod def summary(self) -> str: """Gives a string describing the results, in a way that is easy to understand. :return: A summary of the results. :rtype: str """ @abstractmethod def make_plots(self) -> Dict[str, plt.Figure]: """Generates the plots that are useful for understanding/interpreting or comparing this kind of results. :return: A dictionary mapping from plot name to the matplotlib figure. :rtype: Dict[str, plt.Figure] """ @abstractmethod def to_log_dict(self, verbose: bool = False) -> Dict[str, Any]: """Create a dict version of the results, to be logged to wandb""" return {self.objective_name: self.objective} def save(self, path: Union[str, Path], dump_fn=None, **kwargs) -> None: path = Path(path) path.parent.mkdir(exist_ok=True, parents=True) return super().save(path, dump_fn=dump_fn, **kwargs) def save_to_dir(self, save_dir: Union[str, Path], filename: str = "results.json") -> None: save_dir = Path(save_dir) save_dir.mkdir(exist_ok=True, parents=True) print(f"Results summary:") self.summary results_dump_file = save_dir / filename self.save(results_dump_file) print(f"Saved a copy of the results to {results_dump_file}") plots: Dict[str, plt.Figure] = self.make_plots() plot_paths: Dict[str, Path] = {} for fig_name, figure in plots.items(): print(f"fig_name: {fig_name}") # figure.show() # plt.waitforbuttonpress(10) path = (save_dir / fig_name).with_suffix(".jpg") path.parent.mkdir(exist_ok=True, parents=True) figure.savefig(path) # print(f"Saved figure at path {path}") plot_paths[fig_name] = path print(f"\nSaved Plots to: {plot_paths}\n") def __eq__(self, other: Any) -> bool: if isinstance(other, Results): return self.objective == other.objective elif isinstance(other, float): return self.objective == other return NotImplemented def __gt__(self, other: Any) -> bool: if isinstance(other, Results): return self.objective > other.objective elif isinstance(other, float): return self.objective > other return NotImplemented ResultsType = TypeVar("ResultsType", bound=Results) ================================================ FILE: sequoia/settings/base/setting.py ================================================ """ This module defines the `Setting` class, an ML "problem" to solve. The `Setting` class is an abstract base class which should represent the most general learning setting imaginable, i.e. with the fewest assumptions about the data, the environment, the agent, etc. The Setting class is currently loosely based on the `LightningDataModule` class from pytorch-lightning, with the goal of having an `TraditionalSLSetting` node somewhere in the tree, which would be totally interchangeable with existing datamodules from pytorch-lightning. The hope is that by staying close to that API, we can make it easier for people to adopt the repo, and also, if possible, directly reuse existing models from pytorch-lightning. See: [Pytorch-Lightning](https://pytorch-lightning.readthedocs.io/en/latest/) See: [LightningDataModule](https://pytorch-lightning.readthedocs.io/en/latest/datamodules.html) """ import itertools import sys import typing from abc import abstractmethod from dataclasses import dataclass from pathlib import Path from typing import Any, ClassVar, Dict, Generic, Iterable, List, Optional, Type, TypeVar, Union import gym import numpy as np import torch from gym import spaces from pytorch_lightning import LightningDataModule from simple_parsing import Serializable, field from torch import Tensor from sequoia.common.config import Config, WandbConfig from sequoia.common.metrics import Metrics if typing.TYPE_CHECKING: from sequoia.common.transforms import Compose from sequoia.common.transforms.transform_enum import Transforms from sequoia.settings.base.bases import Method, SettingABC from sequoia.settings.base.environment import Environment from sequoia.settings.base.objects import Actions, Observations, Rewards from sequoia.settings.base.results import Results, ResultsType from sequoia.settings.base.setting_meta import SettingMeta from sequoia.settings.presets import setting_presets from sequoia.utils import Parseable, get_logger from sequoia.utils.utils import take logger = get_logger(__name__) SettingType = TypeVar("SettingType", bound="Setting") EnvironmentType = TypeVar("EnvironmentType", bound=Environment) @dataclass class Setting( SettingABC, Parseable, Serializable, LightningDataModule, Generic[EnvironmentType], metaclass=SettingMeta, ): """Base class for all research settings in ML: Root node of the tree. A 'setting' is loosely defined here as a learning problem with a specific set of assumptions, restrictions, and an evaluation procedure. For example, Reinforcement Learning is a type of Setting in which we assume that an Agent is able to observe an environment, take actions upon it, and receive rewards back from the environment. Some of the assumptions include that the reward is dependant on the action taken, and that the actions have an impact on the environment's state (and on the next observations the agent will receive). The evaluation procedure consists in trying to maximize the reward obtained from an environment over a given number of steps. This 'Setting' class should ideally represent the most general learning problem imaginable, with almost no assumptions about the data or evaluation procedure. This is a dataclass. Its attributes are can also be used as command-line arguments using `simple_parsing`. Abstract (required) methods: - **apply** Applies a given Method on this setting to produce Results. - **prepare_data** (things to do on 1 GPU/TPU not on every GPU/TPU in distributed mode). - **setup** (things to do on every accelerator in distributed mode). - **train_dataloader** the training environment/dataloader. - **val_dataloader** the val environments/dataloader(s). - **test_dataloader** the test environments/dataloader(s). "Abstract"-ish (required) class attributes: - `Results`: The class of Results that are created when applying a Method on this setting. - `Observations`: The type of Observations that will be produced in this setting. - `Actions`: The type of Actions that are expected from this setting. - `Rewards`: The type of Rewards that this setting will (potentially) return upon receiving an action from the method. """ # ---------- Class Variables ------------- # Fields in this block are class attributes. They don't create command-line # arguments. # Type of Observations that the dataloaders (a.k.a. "environments") will # produce for this type of Setting. Observations: ClassVar[Type[Observations]] = Observations # Type of Actions that the dataloaders (a.k.a. "environments") will receive # through their `send` method, for this type of Setting. Actions: ClassVar[Type[Actions]] = Actions # Type of Rewards that the dataloaders (a.k.a. "environments") will return # after receiving an action, for this type of Setting. Rewards: ClassVar[Type[Rewards]] = Rewards # The type of Results that are given back when a method is applied on this # Setting. The `Results` class basically defines the 'evaluation metric' for # a given type of setting. See the `Results` class for more info. Results: ClassVar[Type[Results]] = Results available_datasets: ClassVar[Dict[str, Any]] = {} # Transforms to be applied to the observatons of the train/valid/test # environments. transforms: Optional[List[Transforms]] = None # Transforms to be applied to the training datasets. train_transforms: Optional[List[Transforms]] = None # Transforms to be applied to the validation datasets. val_transforms: Optional[List[Transforms]] = None # Transforms to be applied to the testing datasets. test_transforms: Optional[List[Transforms]] = None # Fraction of training data to use to create the validation set. # (Only applicable in Passive settings.) val_fraction: float = 0.2 # TODO: Still not sure where exactly we should be adding the 'batch_size' # and 'num_workers' arguments. Adding it here for now with cmd=False, so # that they can be passed to the constructor of the Setting. batch_size: Optional[int] = field(default=None, cmd=False) num_workers: Optional[int] = field(default=None, cmd=False) # # TODO: Add support for semi-supervised training. # # Fraction of the dataset that is labeled. # labeled_data_fraction: int = 1.0 # # Number of labeled examples. # n_labeled_examples: Optional[int] = None # Options related to Weights & Biases (wandb). Turned Off by default. Passing any of # its arguments will enable wandb. # NOTE: Adding `cmd=False` here, so we only create the args in `Experiment`. # TODO: Fix this up. wandb: Optional[WandbConfig] = field(default=None, compare=False, cmd=False) # Group of configuration options like log_dir, data dir, etc. # TODO: It's a bit confusing to also have a `config` attribute on the # Setting. Might want to change this a bit. config: Optional[Config] = field(default=None, cmd=False) def __post_init__( self, observation_space: gym.Space = None, action_space: gym.Space = None, reward_space: gym.Space = None, ): """Initializes the fields of the setting that weren't set from the command-line. """ from sequoia.common.transforms import Compose logger.debug("__post_init__ of Setting") # BUG: simple-parsing sometimes parses a list with a single item, itself the # list of transforms. Not sure if this still happens. def is_list_of_list(v: Any) -> bool: return isinstance(v, list) and len(v) == 1 and isinstance(v[0], list) if is_list_of_list(self.train_transforms): self.train_transforms = self.train_transforms[0] if is_list_of_list(self.val_transforms): self.val_transforms = self.val_transforms[0] if is_list_of_list(self.test_transforms): self.test_transforms = self.test_transforms[0] # if all( # t is None # for t in [ # self.transforms, # self.train_transforms, # self.val_transforms, # self.test_transforms, # ] # ): # # Use these two transforms by default if no transforms are passed at all. # # TODO: Remove this after the competition perhaps. # self.transforms = Compose([Transforms.to_tensor, Transforms.three_channels]) # TODO: Should change this, so that these transform fields are only the # additional transforms compared to `self.transforms` (the 'base' transforms) # If the constructor is called with just the `transforms` argument, like this: # (dataset="bob", transforms=foo_transform) # Then we use this value as the default for the train, val and test transforms. if self.transforms and not any( [self.train_transforms, self.val_transforms, self.test_transforms] ): if not isinstance(self.transforms, list): self.transforms = Compose([self.transforms]) self.train_transforms = self.transforms.copy() self.val_transforms = self.transforms.copy() self.test_transforms = self.transforms.copy() if self.train_transforms is not None and not isinstance(self.train_transforms, list): self.train_transforms = [self.train_transforms] if self.val_transforms is not None and not isinstance(self.val_transforms, list): self.val_transforms = [self.val_transforms] if self.test_transforms is not None and not isinstance(self.test_transforms, list): self.test_transforms = [self.test_transforms] # Actually compose the list of Transforms or callables into a single transform. self.train_transforms = Compose(self.train_transforms or []) self.val_transforms = Compose(self.val_transforms or []) self.test_transforms = Compose(self.test_transforms or []) LightningDataModule.__init__( self, train_transforms=self.train_transforms, val_transforms=self.val_transforms, test_transforms=self.test_transforms, ) self._observation_space = observation_space self._action_space = action_space self._reward_space = reward_space self.train_env: Environment = None # type: ignore self.val_env: Environment = None # type: ignore self.test_env: Environment = None # type: ignore @abstractmethod def apply(self, method: Method, config: Config = None) -> "Setting.Results": # NOTE: The actual train/test loop should be defined in a more specific # setting. This is just here as an illustration of what that could look # like. raise NotImplementedError("this is just here for illustration purposes. ") method.fit( train_env=self.train_dataloader(), valid_env=self.val_dataloader(), ) # Test loop: test_env = self.test_dataloader() test_metrics = [] # Number of episodes to test on: n_test_episodes = 1 # Perform a set number of episodes in the test environment. for episode in range(n_test_episodes): # Get initial observations. observations = test_env.reset() for i in itertools.count(): # Get the predictions/actions for a batch of observations. actions = method.get_actions(observations, test_env.action_space) observations, rewards, done, info = test_env.step(actions) # Calculate the 'metrics' (TODO: This should be done be in the env!) batch_metrics = ... test_metrics.append(batch_metrics) if done: break return self.Results(test_metrics=test_metrics) def get_metrics(self, actions: Actions, rewards: Rewards) -> Union[float, Metrics]: """Calculate the "metric" from the model predictions (actions) and the true labels (rewards). In this example, we return a 'Metrics' object: - `ClassificationMetrics` for classification problems, - `RegressionMetrics` for regression problems. We use these objects because they are awesome (they basically simplify making plots, wandb logging, and serialization), but you can also just return floats if you want, no problem. TODO: This is duplicated from Incremental. Need to fix this. """ from sequoia.common.metrics import get_metrics # In this particular setting, we only use the y_pred from actions and # the y from the rewards. if isinstance(actions, Actions): actions = torch.as_tensor(actions.y_pred) if isinstance(rewards, Rewards): rewards = torch.as_tensor(rewards.y) # TODO: At the moment there's this problem, ClassificationMetrics wants # to create a confusion matrix, which requires 'logits' (so it knows how # many classes. if isinstance(actions, Tensor): actions = actions.cpu().numpy() if isinstance(rewards, Tensor): rewards = rewards.cpu().numpy() if isinstance(self.action_space, spaces.Discrete): batch_size = rewards.shape[0] actions = torch.as_tensor(actions) if len(actions.shape) == 1 or (actions.shape[-1] == 1 and self.action_space.n != 2): fake_logits = torch.zeros([batch_size, self.action_space.n], dtype=int) # FIXME: There must be a smarter way to do this indexing. for i, action in enumerate(actions): fake_logits[i, action] = 1 actions = fake_logits return get_metrics(y_pred=actions, y=rewards) @property def image_space(self) -> Optional[gym.Space]: if isinstance(self.observation_space, spaces.Box): return self.observation_space if isinstance(self.observation_space, spaces.Tuple): assert isinstance(self.observation_space["x"], spaces.Box) return self.observation_space["x"] if isinstance(self.observation_space, spaces.Dict): return self.observation_space.spaces["x"] logger.warning( f"Don't know what the image space is. " f"(self.observation_space={self.observation_space})" ) return None @property def observation_space(self) -> gym.Space: return self._observation_space @observation_space.setter def observation_space(self, value: gym.Space) -> None: """Sets a the observation space. NOTE: This also changes the value of the `dims` attribute and the result of the `size()` method from LightningDataModule. """ if not isinstance(value, gym.Space): raise RuntimeError(f"Value must be a `gym.Space` (got {value})") if not self._dims: if isinstance(value, spaces.Box): self.dims = value.shape elif isinstance(value, spaces.Tuple): self.dims = tuple(space.shape for space in value.spaces) elif isinstance(value, spaces.Dict) and "x" in value.spaces: self.dims = value.spaces["x"].shape else: raise NotImplementedError( f"Don't know how to set the 'dims' attribute using " f"observation space {value}" ) self._observation_space = value @property def action_space(self) -> gym.Space: return self._action_space @action_space.setter def action_space(self, value: gym.Space) -> None: self._action_space = value @property def reward_space(self) -> gym.Space: return self._reward_space @reward_space.setter def reward_space(self, value: gym.Space) -> None: self._reward_space = value @classmethod def get_available_datasets(cls) -> Iterable[str]: """Returns an iterable of strings which represent the names of datasets.""" return cls.available_datasets def _setup_config(self, method: Method) -> Config: config: Config if isinstance(getattr(method, "config", None), Config): config = method.config logger.debug(f"Using Config from the Method: {config}") elif isinstance(getattr(self, "config", None), Config): config = self.config logger.debug(f"Using Config from the Setting: {config}") else: argv = self._argv if argv: logger.debug(f"Parsing the Config from the command-line arguments ({argv})") else: logger.debug(f"Parsing the config from the current command-line arguments.") config = Config.from_args(argv, strict=False) return config @classmethod def main(cls, argv: Optional[Union[str, List[str]]] = None) -> Results: from sequoia.main import Experiment experiment: Experiment # Create the Setting object from the command-line: setting = cls.from_args(argv) # Then create the 'Experiment' from the command-line, which makes it # possible to choose between all the methods. experiment = Experiment.from_args(argv) # fix the setting attribute to be the one parsed above. experiment.setting = setting results: ResultsType = experiment.launch(argv) return results def apply_all(self, argv: Union[str, List[str]] = None) -> Dict[Type["Method"], Results]: applicable_methods = self.get_applicable_methods() from sequoia.methods import Method all_results: Dict[Type[Method], Results] = {} config = Config.from_args(argv) for method_type in applicable_methods: method = method_type.from_args(argv) results = self.apply(method, config) all_results[method_type] = results logger.info(f"All results for setting of type {type(self)}:") logger.info( { method.get_name(): (results.get_metric() if results else "crashed") for method, results in all_results.items() } ) return all_results def _check_environments(self): """Do a quick check to make sure that interacting with the envs/dataloaders works correctly. """ # Check that the env's spaces are batched versions of the settings'. from gym.vector.utils import batch_space from sequoia.settings.sl import PassiveEnvironment batch_size = self.batch_size for loader_method in [ self.train_dataloader, self.val_dataloader, self.test_dataloader, ]: print(f"\n\nChecking loader method {loader_method.__name__}\n\n") env = loader_method(batch_size=batch_size) batch_size = env.batch_size # We could compare the spaces directly, but that's a bit messy, and # would be depends on the type of spaces for each. Instead, we could # check samples from such spaces on how the spaces are batched. if batch_size: expected_observation_space = batch_space(self.observation_space, n=batch_size) expected_action_space = batch_space(self.action_space, n=batch_size) expected_reward_space = batch_space(self.reward_space, n=batch_size) else: expected_observation_space = self.observation_space expected_action_space = self.action_space expected_reward_space = self.reward_space # TODO: Batching the 'Sparse' makes it really ugly, so just # comparing the 'image' portion of the space for now. assert env.observation_space["x"].shape == expected_observation_space[0].shape, ( env.observation_space["x"], expected_observation_space[0], ) assert env.action_space == expected_action_space, ( env.action_space, expected_action_space, ) assert env.reward_space == expected_reward_space, ( env.reward_space, expected_reward_space, ) # Check that the 'gym API' interaction is working correctly. reset_obs: Observations = env.reset() self._check_observations(env, reset_obs) for i in range(5): actions = env.action_space.sample() self._check_actions(env, actions) step_observations, step_rewards, done, info = env.step(actions) self._check_observations(env, step_observations) self._check_rewards(env, step_rewards) if batch_size: assert not any(done) else: assert not done # assert not (done if isinstance(done, bool) else any(done)) for batch in take(env, 5): observations: Observations rewards: Optional[Rewards] if isinstance(env, PassiveEnvironment): observations, rewards = batch else: # in RL atm, the 'dataset' gives back only the observations. # Coul observations, rewards = batch, None self._check_observations(env, observations) if rewards is not None: self._check_rewards(env, rewards) if batch_size: actions = tuple(self.action_space.sample() for _ in range(batch_size)) else: actions = self.action_space.sample() # actions = self.Actions(torch.as_tensor(actions)) rewards = env.send(actions) self._check_rewards(env, rewards) env.close() def _check_observations(self, env: Environment, observations: Any): """Check that the given observation makes sense for the given environment. TODO: This should probably not be in this file here. It's more used for testing than anything else. """ assert isinstance(observations, self.Observations), observations images = observations.x assert isinstance(images, (torch.Tensor, np.ndarray)) if isinstance(images, Tensor): images = images.cpu().numpy() # Find the 'image' space: if isinstance(env.observation_space, spaces.Box): image_space = env.observation_space elif isinstance(env.observation_space, spaces.Tuple): image_space = env.observation_space["x"] else: raise RuntimeError( f"Don't know how to find the image space in the " f"env's obs space ({env.observation_space})." ) assert images in image_space def _check_actions(self, env: Environment, actions: Any): if isinstance(actions, Actions): assert isinstance(actions, self.Actions) actions = actions.y_pred.cpu().numpy() elif isinstance(actions, Tensor): actions = actions.cpu().numpy() elif isinstance(actions, np.ndarray): actions = actions assert actions in env.action_space def _check_rewards(self, env: Environment, rewards: Any): if isinstance(rewards, Rewards): assert isinstance(rewards, self.Rewards) rewards = rewards.y if isinstance(rewards, Tensor): rewards = rewards.cpu().numpy() if isinstance(rewards, np.ndarray): rewards = rewards if isinstance(rewards, (int, float)): rewards = np.asarray(rewards) assert rewards in env.reward_space, (rewards, env.reward_space) # Just to make type hinters stop throwing errors when using the constructor # to create a Setting. def __new__(cls, *args, **kwargs): return super().__new__(cls, *args, **kwargs) @classmethod def load_benchmark(cls: Type[SettingType], benchmark: Union[str, Path]) -> SettingType: """Load the given "benchmark" (pre-configured Setting) of this type. Parameters ---------- cls : Type[SettingType] Type of Setting to create. benchmark : Union[str, Path] Either the name of a benchmark (e.g. "cartpole_state", "monsterkong", etc.) or a path to a json/yaml file. Returns ------- SettingType Setting of type `cls`, appropriately populated according to the chosen benchmark. Raises ------ RuntimeError If `benchmark` isn't an existing file or a known preset. RuntimeError If any command-line arguments are present in sys.argv which would be ignored when creating this setting. """ # If the provided benchmark isn't a path, try to get the value from # the `setting_presets` dict. If it isn't in the dict, raise an # error. if not Path(benchmark).is_file(): if benchmark in setting_presets: benchmark = setting_presets[benchmark] else: raise RuntimeError( f"Could not find benchmark '{benchmark}': it " f"is neither a path to a file or a key of the " f"`setting_presets` dictionary. \n" f"(Available presets: {setting_presets}) " ) # Creating an experiment for the given setting, loaded from the # config file. # TODO: IDEA: Do the same thing for loading the Method? logger.info( f"Will load the options for setting {cls} from the file " f"at path {benchmark}." ) # Raise an error if any of the args in sys.argv would have been used # up by the Setting, just to prevent any ambiguities. _, unused_args = cls.from_known_args() consumed_args = list(set(sys.argv[1:]) - set(unused_args)) if consumed_args: # TODO: This could also be trigerred if there were arguments # in the method with the same name as some from the Setting. raise RuntimeError( f"Cannot pass command-line arguments for the Setting when " f"loading a benchmark, since these arguments whould have been " f"ignored when creating the setting of type {cls} " f"anyway: {consumed_args}" ) drop_extras = False # Actually load the setting from the file. setting = cls.load(path=benchmark, drop_extra_fields=drop_extras) return setting ================================================ FILE: sequoia/settings/base/setting_meta.py ================================================ """ """ import dataclasses from dataclasses import Field from typing import Dict, List, Type from sequoia.utils.logging_utils import get_logger logger = get_logger(__name__) class SettingMeta(Type["Setting"]): """Metaclass for the nodes in the Setting inheritance tree. Might remove this. Was experimenting with using this to create class properties for each Setting. What this currently does is to remove any keyword argument passed to the constructor if its value is marked as a 'constant'. TODO: A little while back I noticed some strange behaviour when trying to create a Setting class (either manually or through the command-line), and I attributed it to PL adding a `_DataModuleWrapper` metaclass to `LightningDataModule`, which seemed to be causing problems related to calling __init__ when using dataclasses. I don't quite recall exactly what was happening and was causing an issue, so it would be a good idea to try removing this metaclass and writing a test to make sure there was a problem to begin with, and also to make sure that adding back this class fixes it. """ def __call__(cls, *args, **kwargs): # This is used to filter the arguments passed to the constructor # of the Setting and only keep the ones that are fields with init=True. fields: Dict[str, Field] = {field.name: field for field in dataclasses.fields(cls)} init_fields: List[str] = [name for name, f in fields.items() if f.init] for key in list(kwargs.keys()): value = kwargs[key] if key not in fields: # We let this through, so that if there is a problem, it is # raised when calling the constructor below. continue # elif key in fields and key not in init_fields: # # We let this through, so that if there is a problem, it is # # raised when calling the constructor below. # logger.warning(RuntimeWarning( # f"Constructor Argument {key} is a field with init=False but" # f"but is being passed to the constructor." # )) # continue # Alternative: Raise a custom Exception directly: # raise RuntimeError(( # Other idea: go up two stackframes so that it looks like # `cls(blabla=123)` is what's causing the exception? field = fields[key] _missing = object() constant_value = field.metadata.get("constant", _missing) if constant_value is not _missing and value != constant_value: logger.warning( UserWarning( f"Ignoring argument {key}={value} when creating class " f"{cls}, since it has that field marked as constant with a " f"value of {constant_value}." ) ) kwargs.pop(key) return super().__call__(*args, **kwargs) def __instancecheck__(self, instance): from sequoia.client import SettingProxy if isinstance(instance, SettingProxy) or hasattr(instance, "_setting_type"): # If the setting is a proxy, then we check if its a proxy to a setting of # this type. return issubclass(instance._setting_type, self) return super().__instancecheck__(instance) ================================================ FILE: sequoia/settings/base/setting_test.py ================================================ import functools import inspect from dataclasses import dataclass from typing import Union import pytest from sequoia.methods import Method from sequoia.utils.utils import constant from .setting import Setting @dataclass class Setting1(Setting): foo: int = 1 bar: int = 2 def __post_init__(self): print(f"Setting1 __init__ ({self})") super().__post_init__() @dataclass class Setting2(Setting1): bar: int = constant(1) def __post_init__(self): print(f"Setting2 __init__ ({self})") super().__post_init__() @pytest.mark.xfail(reason="Changed this.") def test_settings_override_with_constant_take_init(): """Test that when a value for one of the constant fields is passed to the constructor, its value is ignored and getting that attribute on the object gives back the constant value. If the field isn't constant, the value should be set on the object as usual. """ bob1 = Setting1(foo=3, bar=7) assert bob1.foo == 3 assert bob1.bar == 7 bob2 = Setting2(foo=4, bar=4) assert bob2.bar == 1.0 assert bob2.foo == 4 def test_loading_benchmark_doesnt_overwrite_constant(): setting1 = Setting1.loads_json('{"foo":1, "bar":2}') assert setting1.foo == 1 assert setting1.bar == 2 setting2 = Setting2.loads_json('{"foo":1, "bar":2}') assert setting2.foo == 1 assert setting2.bar == 1 def test_init_still_works(): setting = Setting(val_fraction=0.01) assert setting.val_fraction == 0.01 def test_passing_unexpected_arg_raises_typeerror(): with pytest.raises(TypeError): bob2 = Setting2(foo=4, bar=4, baz=123123) @dataclass class SettingA(Setting): pass @dataclass class SettingA1(SettingA): pass @dataclass class SettingA2(SettingA): pass @dataclass class SettingB(Setting): pass class MethodA(Method, target_setting=SettingA): pass class MethodB(Method, target_setting=SettingB): pass class CoolGeneralMethod(Method, target_setting=Setting): pass def test_that_transforms_can_be_set_through_command_line(): from sequoia.common.transforms import Compose, Transforms setting = Setting(train_transforms=[]) assert setting.train_transforms == [] setting = Setting.from_args("--train_transforms channels_first") assert setting.train_transforms == [Transforms.channels_first] assert isinstance(setting.train_transforms, Compose) setting = Setting.from_args("--train_transforms channels_first") assert setting.train_transforms == [Transforms.channels_first] assert isinstance(setting.train_transforms, Compose) from typing import Any, ClassVar, Dict, Type from sequoia.common.config import Config from sequoia.methods.random_baseline import RandomBaselineMethod from .setting import Setting class SettingTests: """Class that groups all the tests for a given setting. You should create a test class for your new setting, ideally in a file placed next to the class under test, named with the "_test.py" suffix. The test class can be created in one of two ways: - Either using a 'Setting' class attribute: ```python from sequoia.settings.base.setting_test import SettingTests class TestMySetting(SettingTests): Setting = MySetting def test_something(self): setting = self.Setting(...) ... ``` - OR, by passing the `setting` keyword argument to the class statement: ```python class TestMySetting(SettingTests, setting=MySetting): def test_something(self): setting = self.Setting(...) ... ``` If your setting is based on something more concrete than just the `Setting` class, then you should use the associated test class as a base for your new test class: ```python # (Taking ContinualRLSetting here as an example) # *Important*: Remember to rename the test class if needed so that pytest doesn't also run them # when testing your module: from sequoia.settings.rl.continual.setting_test import TestContinualRLSetting as ContinualRLSettingTests from .my_custom_setting import MyCustomSetting class TestMyCustomSetting(ContinualRLSettingTests, setting=MyCustomSetting): def my_custom_test(self): ... # OR class TestMyCustomSetting(ContinualRLSettingTests): Setting = MyCustomSetting ``` This also generates a `dataset` fixture. """ Setting: ClassVar[Type[Setting]] # Autogenerated fixture that will yield each entry from the available dataset of the setting # class under test. dataset: pytest.fixture # The kwargs to be passed to the Setting when we want to create a 'short' setting. fast_dev_run_kwargs: ClassVar[Dict[str, Any]] = {} def __init_subclass__(cls, setting: Type[Setting] = None): """Autogenerates fixtures on the class under test.""" super().__init_subclass__() if not setting and not hasattr(cls, "Setting"): raise RuntimeError( "Need to either pass `setting` when subclassing or set " "a 'Sethod' class attribute." ) if setting is not None: # Make the setting accessible to tests as either self.Setting or cls.Setting for # classmethods. cls.Setting = setting cls.dataset: pytest.fixture = make_dataset_fixture(cls.Setting) def assert_chance_level(self, setting: Setting, results: Setting.Results): """Called during testing. Use this to assert that the results you get from applying your method on the given setting match your expectations. Args: setting results (Results): A given Results object. """ assert results is not None assert results.objective > 0 print(f"Objective when applied to a setting of type {type(setting)}: {results.objective}") @pytest.mark.timeout(60) def test_random_baseline(self, config: Config): """ Test that applies a random baseline to the Setting, and checks that the results are around chance level. """ # Create the Setting setting_type = self.Setting # if issubclass(setting_type, ContinualRLSetting): # kwargs.update(max_steps=100, test_steps_per_task=100) # if issubclass(setting_type, IncrementalRLSetting): # kwargs.update(nb_tasks=2) # if issubclass(setting_type, ClassIncrementalSetting): # kwargs = dict(nb_tasks=5) # if issubclass(setting_type, (TraditionalSLSetting, RLSetting)): # kwargs.pop("nb_tasks", None) # if isinstance(setting, SLSetting): # method.batch_size = 64 # elif isinstance(setting, RLSetting): # method.batch_size = None # setting.train_max_steps = 100 setting: Setting = setting_type(**self.fast_dev_run_kwargs) method = RandomBaselineMethod() results = setting.apply(method, config=config) self.assert_chance_level(setting, results=results) def make_dataset_fixture(setting_type: Union[Type[Setting], functools.partial]): """Create a parametrized fixture that will go through all the available datasets for a given setting.""" def dataset(_, request): dataset = request.param return dataset if isinstance(setting_type, functools.partial): setting_type = setting_type.args[0] assert inspect.isclass(setting_type) and issubclass(setting_type, Setting) datasets = set(setting_type.available_datasets.keys()) datasets_to_remove = set(["MT10", "MT50", "CW10", "CW20"]) # NOTE: Need deterministic ordering for the datasets for tests to be parallelizable # with pytest-xdist. datasets = sorted(list(datasets - datasets_to_remove)) return pytest.fixture( params=datasets, scope="module", )(dataset) ================================================ FILE: sequoia/settings/offline_rl/setting.py ================================================ from dataclasses import dataclass from typing import Any, ClassVar, Dict, List import gym from gym.wrappers import RecordEpisodeStatistics from matplotlib import pyplot as plt from simple_parsing.helpers import choice from sklearn.model_selection import train_test_split from torch.utils.data import DataLoader from sequoia import Results from sequoia.settings.base import Setting try: import d3rlpy except ImportError as err: raise RuntimeError(f"You need to have `d3rlpy` installed to use these methods.") from err @dataclass class OfflineRLResults(Results): # TODO: Write these methods def summary(self) -> str: return f"Offline RL results: {self.objective_name} = {self.objective}" def make_plots(self) -> Dict[str, plt.Figure]: return {} def to_log_dict(self, verbose: bool = False) -> Dict[str, Any]: return {self.objective_name: self.objective} # Metrics from online testing test_rewards: list test_episode_length: list test_episode_count: list objective_name: ClassVar[str] = "Average Reward" @property def objective(self): return sum(self.test_rewards) / len(self.test_rewards) # Offline datasets from d3rlpy (not including atari) offline_datasets_from_d3rlpy = { "cartpole-replay", "cartpole-random", "pendulum-replay", "pendulum-random", "hopper", "halfcheetah", "walker", "ant", } # Offline atari datasets from d3rlpy offline_atari_datasets_from_d3rlpy = set(d3rlpy.datasets.ATARI_GAMES) @dataclass class OfflineRLSetting(Setting): # A list of available offline rl datasets available_datasets: ClassVar[List[str]] = list(offline_datasets_from_d3rlpy) + list( offline_atari_datasets_from_d3rlpy ) # choice of dataset for the current setting dataset: str = choice(available_datasets, default="cartpole-replay") # size of validation set val_size: float = 0.2 # mask for control bootstrapping create_mask: bool = False mask_size: int = 1 def __post_init__(self): # Load d3rlpy offline dataset if ( self.dataset in offline_datasets_from_d3rlpy or self.dataset in offline_atari_datasets_from_d3rlpy ): mdp_dataset, self.env = d3rlpy.datasets.get_dataset( self.dataset, self.create_mask, self.mask_size ) self.train_dataset, self.valid_dataset = train_test_split( mdp_dataset, test_size=self.val_size ) # Load other dataset types here else: raise NotImplementedError def train_dataloader(self, batch_size: int = None) -> DataLoader: return DataLoader(self.train_dataset, batch_size=batch_size) def val_dataloader(self, batch_size: int = None) -> DataLoader: return DataLoader(self.valid_dataset, batch_size=batch_size) def test(self, method, test_env: gym.Env): """ Test self.algo on given test_env for self.test_steps iterations """ test_env = RecordEpisodeStatistics(test_env) obs = test_env.reset() for _ in range(method.test_steps): obs, reward, done, info = test_env.step( method.get_actions(obs, action_space=test_env.action_space) ) if done: break test_env.close() return test_env.episode_returns, test_env.episode_lengths, test_env.episode_count def apply(self, method) -> OfflineRLResults: method.configure(self) method.fit(train_env=self.train_dataset, valid_env=self.valid_dataset) # Test test_rewards, test_episode_length, test_episode_count = self.test(method, self.env) return OfflineRLResults( test_rewards=test_rewards, test_episode_length=test_episode_length, test_episode_count=test_episode_count, ) ================================================ FILE: sequoia/settings/presets/__init__.py ================================================ import os from pathlib import Path from typing import Dict presets_dir = Path(os.path.dirname(__file__)) setting_presets: Dict[str, Path] = {file.stem: file for file in presets_dir.rglob("*.yaml")} ================================================ FILE: sequoia/settings/presets/cartpole_pixels.yaml ================================================ dataset: PixelCartPole-v0 max_episodes: null nb_tasks: 3 train_max_steps: 3000 steps_per_task: 1000 test_max_steps: 3000 test_steps_per_task: 1000 train_task_schedule: 0: gravity: 10 length: 0.2 1000: gravity: 100 length: 1.2 2000: gravity: 10 length: 0.2 val_task_schedule: 0: gravity: 10 length: 0.2 1000: gravity: 100 length: 1.2 2000: gravity: 10 length: 0.2 test_task_schedule: 0: gravity: 10 length: 0.2 1000: gravity: 100 length: 1.2 2000: gravity: 10 length: 0.2 ================================================ FILE: sequoia/settings/presets/cartpole_state.yaml ================================================ dataset: CartPole-v0 max_episodes: null nb_tasks: 2 train_max_steps: 4000 test_max_steps: 1000 test_steps_per_task: 500 # TODO: Need to fix these task schedules: They probably won't work the same with # 'Continual' settings vs in the IncremementalRL Settings. Also need to decide what # happens with the last key in MultiTask RL. train_task_schedule: 0: gravity: 10 length: 0.3 2000: gravity: 10 length: 0.8 val_task_schedule: 0: gravity: 10 length: 0.3 2000: gravity: 10 length: 0.8 ================================================ FILE: sequoia/settings/presets/cifar10.yaml ================================================ dataset: cifar10 ================================================ FILE: sequoia/settings/presets/cifar100.yaml ================================================ dataset: cifar100 ================================================ FILE: sequoia/settings/presets/classic_control/cartpole.yaml ================================================ dataset: cartpole monitor_training_performance: true nb_tasks: 8 steps_per_task: 20_000 test_steps_per_task: 10_000 train_task_schedule: 0: force_mag: 10.0 gravity: 9.8 length: 0.5 masscart: 1.0 masspole: 0.1 tau: 0.02 1: force_mag: 8.666898797953921 gravity: 7.760853554007704 length: 0.5217446765844818 masscart: 0.8908045485782948 masspole: 0.15674543117467288 tau: 0.0220635245382657 2: force_mag: 7.458618324495651 gravity: 9.400984342498948 length: 0.6462064142932058 masscart: 1.3539692996769968 masspole: 0.133507111769919 tau: 0.021147855257131764 3: force_mag: 8.5574863595876 gravity: 6.7285307726150085 length: 0.38294798778813294 masscart: 0.8574588708166866 masspole: 0.0615236260048324 tau: 0.02307661947728138 4: force_mag: 8.02716944821746 gravity: 11.150504602382693 length: 0.4854716271338247 masscart: 1.0456215435706913 masspole: 0.10899768542795317 tau: 0.019865776370441367 5: force_mag: 11.700513704843809 gravity: 6.312815408929171 length: 0.45130592348981863 masscart: 1.0380878429865934 masspole: 0.07187238299019481 tau: 0.014052652786485233 6: force_mag: 13.934001347849406 gravity: 10.133200774940446 length: 0.4905968584092335 masscart: 0.9859796874461285 masspole: 0.08510387732488867 tau: 0.01695718912603805 7: force_mag: 10.523014205764852 gravity: 9.174287955179715 length: 0.560680060936186 masscart: 0.9513630929456718 masspole: 0.07683588323840541 tau: 0.016089633251709107 ================================================ FILE: sequoia/settings/presets/classic_control/mountaincar_continuous.yaml ================================================ dataset: MountainCarContinuous-v0 monitor_training_performance: true nb_tasks: 8 train_max_steps: 160_000 train_steps_per_task: 20_000 test_max_steps: 80_000 test_steps_per_task: 10_000 train_task_schedule: 0: goal_position: 0.45 goal_velocity: 0 1: goal_position: 0.4565062937130897 goal_velocity: 0 2: goal_position: 0.526503904898121 goal_velocity: 0 3: goal_position: 0.37901356007820275 goal_velocity: 0 4: goal_position: 0.5132810016616194 goal_velocity: 0 5: goal_position: 0.5023364056388072 goal_velocity: 0 6: goal_position: 0.47315246637784114 goal_velocity: 0 7: goal_position: 0.45239346485932264 goal_velocity: 0 ================================================ FILE: sequoia/settings/presets/fashion_mnist.yaml ================================================ dataset: fashion_mnist # Two classes per task: increment: 2 test_increment: 2 ================================================ FILE: sequoia/settings/presets/mnist.yaml ================================================ dataset: mnist ================================================ FILE: sequoia/settings/presets/monsterkong/monsterkong_3each.yaml ================================================ dataset: monsterkong steps_per_task: 10_000_000 test_steps_per_task: 10_000 train_task_schedule: 0: level: 0 1: level: 1 2: level: 2 3: level: 10 4: level: 11 5: level: 12 6: level: 20 7: level: 21 8: level: 22 ================================================ FILE: sequoia/settings/presets/monsterkong/monsterkong_4each.yaml ================================================ dataset: monsterkong steps_per_task: 10_000_000 test_steps_per_task: 10_000 train_task_schedule: 0: level: 0 1: level: 1 2: level: 2 3: level: 3 4: level: 10 5: level: 11 6: level: 12 7: level: 13 8: level: 20 9: level: 21 10: level: 22 11: level: 23 ================================================ FILE: sequoia/settings/presets/monsterkong/monsterkong_5each.yaml ================================================ dataset: monsterkong steps_per_task: 10_000_000 test_steps_per_task: 10_000 train_task_schedule: 0: level: 0 1: level: 1 2: level: 2 3: level: 3 4: level: 4 5: level: 10 6: level: 11 7: level: 12 8: level: 13 9: level: 14 10: level: 20 11: level: 21 12: level: 22 13: level: 23 14: level: 24 ================================================ FILE: sequoia/settings/presets/monsterkong/monsterkong_all.yaml ================================================ dataset: monsterkong steps_per_task: 10_000_000 test_steps_per_task: 10_000 train_task_schedule: 0: level: 0 1: level: 1 2: level: 2 3: level: 3 4: level: 4 5: level: 5 6: level: 6 7: level: 7 8: level: 8 9: level: 9 10: level: 10 11: level: 11 12: level: 12 13: level: 13 14: level: 14 15: level: 15 16: level: 16 17: level: 17 18: level: 18 19: level: 19 20: level: 20 21: level: 21 22: level: 22 23: level: 23 24: level: 24 25: level: 25 26: level: 26 27: level: 27 28: level: 28 29: level: 29 ================================================ FILE: sequoia/settings/presets/monsterkong/monsterkong_jumps.yaml ================================================ dataset: monsterkong steps_per_task: 10_000_000 test_steps_per_task: 10_000 train_task_schedule: 0: level: 0 1: level: 1 2: level: 2 3: level: 3 4: level: 4 5: level: 5 6: level: 6 7: level: 7 8: level: 8 9: level: 9 ================================================ FILE: sequoia/settings/presets/monsterkong/monsterkong_jumps_and_ladders.yaml ================================================ dataset: monsterkong steps_per_task: 10_000_000 test_steps_per_task: 10_000 train_task_schedule: 0: level: 20 1: level: 21 2: level: 22 3: level: 23 4: level: 24 5: level: 25 6: level: 26 7: level: 27 8: level: 28 9: level: 29 ================================================ FILE: sequoia/settings/presets/monsterkong/monsterkong_ladders.yaml ================================================ dataset: monsterkong steps_per_task: 10_000_000 test_steps_per_task: 10_000 train_task_schedule: 0: level: 10 1: level: 11 2: level: 12 3: level: 13 4: level: 14 5: level: 15 6: level: 16 7: level: 17 8: level: 18 9: level: 19 ================================================ FILE: sequoia/settings/presets/monsterkong/monsterkong_mix.yaml ================================================ dataset: monsterkong monitor_training_performance: true force_pixel_observations: true nb_tasks: 8 train_max_steps: 1_600_000 train_steps_per_task: 200_000 test_steps_per_task: 10_000 test_max_steps: 80_000 train_task_schedule: 0: level: 0 1: level: 1 2: level: 10 3: level: 11 4: level: 20 5: level: 21 6: level: 30 7: level: 31 ================================================ FILE: sequoia/settings/presets/mujoco/half_cheetah.yaml ================================================ dataset: ContinualHalfCheetah-v2 monitor_training_performance: true nb_tasks: 8 train_steps_per_task: 200_000 test_steps_per_task: 10_000 train_task_schedule: 0: gravity: -9.81 1: gravity: -7.3087968946619615 2: gravity: -5.615716866871361 3: gravity: -12.45890973547683 4: gravity: -7.6875976238634465 5: gravity: -5.807262467656652 6: gravity: -8.448144726367474 7: gravity: -7.750512896029625 ================================================ FILE: sequoia/settings/presets/rl_track.yaml ================================================ dataset: monsterkong known_task_boundaries_at_train_time: true known_task_boundaries_at_test_time: false task_labels_at_train_time: true task_labels_at_test_time: false monitor_training_performance: true steps_per_task: 200_000 test_steps_per_task: 10_000 train_task_schedule: 0: level: 0 1: level: 1 2: level: 10 3: level: 11 4: level: 20 5: level: 21 6: level: 30 7: level: 31 ================================================ FILE: sequoia/settings/presets/sl_track.yaml ================================================ dataset: synbols nb_tasks: 12 known_task_boundaries_at_train_time: true known_task_boundaries_at_test_time: false task_labels_at_train_time: true task_labels_at_test_time: false monitor_training_performance: true ================================================ FILE: sequoia/settings/rl/__init__.py ================================================ from .environment import RLEnvironment from .setting import RLSetting ActiveEnvironment = RLEnvironment from .continual import ContinualRLSetting, make_continuous_task from .discrete import DiscreteTaskAgnosticRLSetting, make_discrete_task from .incremental import IncrementalRLSetting, make_incremental_task # TODO: Properly Add the multi-task RL setting. from .multi_task import MultiTaskRLSetting from .task_incremental import TaskIncrementalRLSetting from .traditional import TraditionalRLSetting ================================================ FILE: sequoia/settings/rl/continual/__init__.py ================================================ from .environment import GymDataLoader from .objects import Actions, ActionType, Observations, ObservationType, Rewards, RewardType from .results import ContinualRLResults from .setting import ContinualRLSetting from .tasks import make_continuous_task ContinualRLEnvironment = GymDataLoader Results = ContinualRLResults ================================================ FILE: sequoia/settings/rl/continual/environment.py ================================================ """ Dataloader for a Gym Environment. Uses multiple parallel environments. TODO: @lebrice: We need to decide which of these two behaviours we want to support in the GymDataLoader, (if not both): - Either iterate over the dataset and get the usual 4-item tuples like gym, by using a policy to generate the actions, OR - Give back 3-item tuples (without the reward) and give the reward when users send back an action for the current observation. Users would either be required to send actions back after each observation or to provide a policy to "fill-in-the-gaps" and select the action when the model doesn't send one back. The traditional supervised dataloader can be easily recovered in this second case: since the reward doesn't depend on the action, we can just send back a random or None action to the dataloader, and group the returned reward with the batch of observations, before yielding the (observations, rewards) batch. In either case, we can easily keep the `step` API from gym available. Need to talk more about this for sure. """ import warnings from typing import Any, Iterable, Iterator, Optional, TypeVar, Union import gym import numpy as np from gym import Wrapper, spaces from gym.utils.colorize import colorize from gym.vector import AsyncVectorEnv, VectorEnv from gym.vector.utils import batch_space from torch import Tensor from torch.utils.data import IterableDataset from sequoia.common.gym_wrappers import EnvDataset, IterableWrapper from sequoia.common.gym_wrappers.policy_env import PolicyEnv from sequoia.common.gym_wrappers.utils import StepResult from sequoia.settings.base.objects import Actions from sequoia.settings.rl.environment import ActiveEnvironment from sequoia.utils.logging_utils import get_logger logger = get_logger(__name__) T = TypeVar("T") # TODO: The typing information from sequoia.settings.base.environment isn't quite # accurate here... The observations are bound by Tensors or numpy arrays, not # 'Batch' objects. # from sequoia.settings.base.environment import ObservationType, ActionType, RewardType ObservationType = TypeVar("ObservationType") ActionType = TypeVar("ActionType") RewardType = TypeVar("RewardType") class GymDataLoader( ActiveEnvironment[ObservationType, ActionType, RewardType], IterableWrapper, Iterable ): """Environment for RL settings. Exposes **both** the `gym.Env` as well as the "Active" DataLoader APIs. This is useful because it makes it easy to adapt a method originally made for SL so that it can also work in a reinforcement learning context, where the rewards (e.g. image labels, or correct/incorrect prediction, etc.) are only given *after* the action (e.g. y_pred) has been received by the environment. meaning you can use this in two different ways: 1. Gym-style using `step`: 1. Agent --------- action ----------------> Env 2. Agent <---(state, reward, done, info)--- Env 2. ActiveDataLoader style, using `iter` and `send`: 1. Agent <--- (state, done, info) --- Env 2. Agent ---------- action ---------> Env 3. Agent <--------- reward ---------- Env This would look something like this in code: ```python env = GymDataLoader("CartPole-v0", batch_size=32) for states, done, infos in env: actions = actor(states) rewards = env.send(actions) loss = loss_function(...) # OR: state = env.reset() for i in range(max_steps): action = self.actor(state) states, reward, done, info = env.step(action) loss = loss_function(...) ``` """ def __init__( self, env: Union[EnvDataset, PolicyEnv] = None, dataset: Union[EnvDataset, PolicyEnv] = None, batch_size: int = None, num_workers: int = None, **kwargs, ): assert not ( env is None and dataset is None ), "One of the `dataset` or `env` arguments must be passed." assert not ( env is not None and dataset is not None ), "Only one of the `dataset` and `env` arguments can be used." if not isinstance(env, IterableDataset): raise RuntimeError( f"The env {env} isn't an interable dataset! (You can use the " f"EnvDataset or PolicyEnv wrappers to make an IterableDataset " f"from a gym environment." ) if isinstance(env.unwrapped, VectorEnv): if batch_size is not None and batch_size != env.num_envs: logger.warning( UserWarning( f"The provided batch size {batch_size} will be ignored, since " f"the provided env is vectorized with a batch_size of " f"{env.unwrapped.num_envs}." ) ) batch_size = env.num_envs if isinstance(env.unwrapped, AsyncVectorEnv): num_workers = env.num_envs else: num_workers = 0 self.env = env # NOTE: The batch_size and num_workers attributes reflect the values from the # iterator (the VectorEnv), not those of the dataloader. # This is done in order to avoid pytorch workers being ever created, and also so # that pytorch-lightning stops warning us that the num_workers is too low. self._batch_size = batch_size self._num_workers = num_workers super().__init__( dataset=self.env, # The batch size is None, because the VecEnv takes care of # doing the batching for us. batch_size=None, num_workers=0, collate_fn=None, **kwargs, ) Wrapper.__init__(self, env=self.env) assert not isinstance(self.env, GymDataLoader), "Something very wrong is happening." # self.max_epochs: int = max_epochs self.observation_space: gym.Space = self.env.observation_space self.action_space: gym.Space = self.env.action_space self.reward_space: gym.Space if isinstance(env.unwrapped, VectorEnv): env: VectorEnv batch_size = env.num_envs # TODO: Overwriting the action space to be the 'batched' version of # the single action space, rather than a Tuple(Discrete, ...) as is # done in the gym.vector.VectorEnv. self.action_space = batch_space(env.single_action_space, batch_size) if not hasattr(self.env, "reward_space"): self.reward_space = spaces.Box( low=self.env.reward_range[0], high=self.env.reward_range[1], shape=(), dtype=np.float64, ) if isinstance(self.env.unwrapped, VectorEnv): # Same here, we use a 'batched' space rather than Tuple. self.reward_space = batch_space(self.reward_space, batch_size) # BUG: Fix this bug: the observation / action spaces don't accept Tensors as # valid samples, even though they should. # self.observation_space = add_tensor_support(self.observation_space) # self.action_space = add_tensor_support(self.action_space) # self.reward_space = add_tensor_support(self.reward_space) # assert has_tensor_support(self.observation_space) @property def num_workers(self) -> Optional[int]: return self._num_workers @num_workers.setter def num_workers(self, value: Any) -> Optional[int]: if value and value != self._num_workers: warnings.warn( RuntimeWarning( f"Can't set num_workers to {value}, it's hard-set to {self._num_workers}" ) ) @property def batch_size(self) -> Optional[int]: return self._batch_size @batch_size.setter def batch_size(self, value: Any) -> Optional[int]: if value != self._batch_size: warnings.warn( RuntimeWarning( f"Can't set batch size to {value}, it's hard-set to {self._batch_size}" ) ) def __next__(self) -> ObservationType: if self._iterator is None: self._iterator = self.__iter__() return next(self._iterator) # def __len__(self): # if isinstance(self.env, EnvDataset): # return self.env.max_steps # raise NotImplementedError(f"TODO: Can't tell the length of the env {self.env}.") def _obs_have_done_signal(self) -> bool: """Try to determine if the observations contain the 'done' signal or not.""" if ( isinstance(self.observation_space, spaces.Dict) and "done" in self.observation_space.spaces ): return True return False def __iter__(self) -> Iterator: # TODO: Pretty sure this could be greatly simplified by just always using the loop from EnvDataset. # return super().__iter__() # assert False, self.env.__iter__() if self.is_vectorized: # elif isinstance(self.observation_space, spaces.Tuple) if not self._obs_have_done_signal(): warnings.warn( RuntimeWarning( colorize( f"You are iterating over a vectorized env, but the observations " f"don't seem to contain the 'done' signal! You should definitely " f"consider applying something like an `AddDoneToObservation` " f"wrapper to each individual env before vectorization. ", "red", ) ) ) return self.env.__iter__() # yield from IterableWrapper.__iter__(self) # self.observation_ = self.reset() # self.done_ = False # self.action_ = None # self.reward_ = None # # Yield the first observation_. # # TODO: Maybe add something like 't' on the observations to make sure they # # line up with the rewards we get? # yield self.observation_ # if self.action_ is None: # raise RuntimeError( # f"You have to send an action using send() between every " # f"observation. (env = {self})" # ) # def done_is_true(done: Union[bool, np.ndarray, Sequence[bool]]) -> bool: # return done if isinstance(done, bool) or not done.shape else all(done) # while not any([done_is_true(self.done_), self.is_closed()]): # # logger.debug(f"step {self.n_steps_}/{self.max_steps}, (episode {self.n_episodes_})") # # Set those to None to force the user to call .send() # self.action_ = None # self.reward_ = None # yield self.observation_ # if self.action_ is None: # raise RuntimeError( # f"You have to send an action using send() between every " # f"observation. (env = {self})" # ) # def __iter__(self) -> Iterable[ObservationType]: # # This would give back a single-process dataloader iterator over the # # 'dataset' which in this case is the environment: # # return super().__iter__() # # This, on the other hand, completely bypasses the dataloader iterator, # # and instead just yields the samples from the dataset directly, which # # is actually what we want! # # BUG: Somehow this doesn't batch the samples correctly.. # return self.env.__iter__() # # TODO: BUG: Wrappers applied on top of the GymDataLoader won't have an # # effect on the values yielded by this iterator. Currently trying to fix # # this inside the IterableWrapper base class, but it's not that simple. # # return type(self.env).__iter__(self) # # if has_wrapper(self.env, EnvDataset): # # return EnvDataset.__iter__(self) # # elif has_wrapper(self.env, PolicyEnv): # # return PolicyEnv.__iter__(self) # # return type(self.env).__iter__(self) # # return iter(self.env) # # yield from self._iterator # # Could increment the number of epochs here also, if we wanted to keep # # count. # def random_actions(self): # return self.env.random_actions() def step(self, action: Union[ActionType, Any]) -> StepResult: # logger.debug(f"Calling step on self.env") return super().step(action) def send(self, action: Union[ActionType, Any]) -> RewardType: # TODO: Remove this unwrapping code, and instead only unwrap stuff if necessary # for the environment. if isinstance(action, Actions): action = action.y_pred if isinstance(action, Tensor): action = action.detach().cpu().numpy() if isinstance(action, np.ndarray) and not action.shape: action = action.item() if isinstance(self.env.action_space, spaces.Tuple) and isinstance(action, np.ndarray): action = action.tolist() assert action in self.env.action_space, (action, self.env.action_space) return super().send(action) # self.action_ = action # self.observation_, self.reward_, self.done_, self.info_ = su(action) # return self.reward_ # return self.env.send(action) ================================================ FILE: sequoia/settings/rl/continual/environment_test.py ================================================ from typing import ClassVar, Optional, Type import gym import numpy as np import pytest import torch from gym import spaces from gym.vector.utils import batch_space from torch import Tensor from sequoia.common.gym_wrappers import EnvDataset, PixelObservationWrapper from sequoia.conftest import param_requires_atari_py from sequoia.utils.logging_utils import get_logger from sequoia.utils.utils import take from .environment import GymDataLoader from .make_env import make_batched_env logger = get_logger(__name__) class TestGymDataLoader: # Grouping tests into a class so we can inherit from it in another test module, for # instance in the tests for EnvironmentProxy class. GymDataLoader: ClassVar[Type[GymDataLoader]] = GymDataLoader @pytest.mark.parametrize("batch_size", [1, 2, 5]) @pytest.mark.parametrize( "env_name", ["CartPole-v0", param_requires_atari_py("ALE/Breakout-v5")] ) def test_spaces(self, env_name: str, batch_size: int): dataset = EnvDataset(make_batched_env(env_name, batch_size=batch_size)) batched_obs_space = dataset.observation_space # NOTE: the VectorEnv class creates the 'batched' action space by creating a # Tuple of the single action space, of length 'N', which seems a bit weird. # batched_action_space = vector_env.action_space batched_action_space = batch_space(dataset.single_action_space, batch_size) dataloader_env = self.GymDataLoader(dataset, batch_size=batch_size) assert dataloader_env.observation_space == batched_obs_space assert dataloader_env.action_space == batched_action_space dataloader_env.reset() for observation_batch in take(dataloader_env, 3): if isinstance(observation_batch, Tensor): observation_batch = observation_batch.cpu().numpy() assert observation_batch in batched_obs_space actions = dataloader_env.action_space.sample() assert len(actions) == batch_size assert actions in batched_action_space rewards = dataloader_env.send(actions) # BUG: rewards has dtype np.float64, while the space has np.float32. assert len(rewards) == batch_size assert rewards in dataloader_env.reward_space @pytest.mark.parametrize("batch_size", [None, 1, 2, 5]) @pytest.mark.parametrize( "env_name", ["CartPole-v0", param_requires_atari_py("ALE/Breakout-v5")] ) def test_max_steps_is_respected(self, env_name: str, batch_size: int): max_steps = 5 env_name = "CartPole-v0" env = make_batched_env(env_name, batch_size=batch_size) dataset = EnvDataset(env) from sequoia.common.gym_wrappers.action_limit import ActionLimit dataset = ActionLimit(dataset, max_steps=max_steps * (batch_size or 1)) env: GymDataLoader = self.GymDataLoader(dataset) env.reset() i = 0 for i, obs in enumerate(env): assert obs in env.observation_space assert i < max_steps, f"Max steps should have been respected: {i}" env.send(env.action_space.sample()) assert i == max_steps - 1 env.close() @pytest.mark.parametrize("batch_size", [None, 1, 2, 5]) @pytest.mark.parametrize("seed", [None, 123, 456]) # @pytest.mark.parametrize( # "env_name", ["CartPole-v0", param_requires_atari_py("ALE/Breakout-v5")] # ) def test_multiple_epochs_works(self, batch_size: Optional[int], seed: Optional[int]): epochs = 3 max_steps_per_episode = 10 from gym.wrappers import TimeLimit from sequoia.common.gym_wrappers import AddDoneToObservation from sequoia.conftest import DummyEnvironment def env_fn(): # FIXME: Using the DummyEnvironment for now since it's easier to debug with. # env = gym.make(env_name) env = DummyEnvironment() env = AddDoneToObservation(env) env = TimeLimit(env, max_episode_steps=max_steps_per_episode) return env # assert False, [env_fn(i).unwrapped for i in range(4)] # env = gym.vector.make(env_name, num_envs=(batch_size or 1)) env = make_batched_env(env_fn, batch_size=batch_size) batched_env = env # from sequoia.common.gym_wrappers.episode_limit import EpisodeLimit # env = EpisodeLimit(env, max_episodes=epochs) from sequoia.common.gym_wrappers.convert_tensors import ConvertToFromTensors env = ConvertToFromTensors(env) env = EnvDataset(env, max_steps_per_episode=max_steps_per_episode) env: GymDataLoader = self.GymDataLoader(env) # BUG: Seems to be a little bug in the shape of the items yielded by the env due # to the concat_fn of the DataLoader. # if batch_size and batch_size >= 1: # assert False, (env.reset().shape, env.observation_space, next(iter(env)).shape) env.seed(seed) all_rewards = [] with env: for epoch in range(epochs): for step, obs in enumerate(env): print(f"'epoch' {epoch}, step {step}:, obs: {obs}") assert obs in env.observation_space, obs.shape assert ( # BUG: This isn't working: (sometimes!) step < max_steps_per_episode ), "Max steps per episode should have been respected." rewards = env.send(env.action_space.sample()) if batch_size is None: all_rewards.append(rewards) else: all_rewards.extend(rewards) # Since in the VectorEnv, 'episodes' are infinite, we must have # reached the limit of the number of steps, while in a single # environment, the episode might have been shorter. assert step <= max_steps_per_episode - 1 assert epoch == epochs - 1 if batch_size in [None, 1]: # Some episodes might last shorter than the max number of steps per episode, # therefore the total should be at most this much: assert len(all_rewards) <= epochs * max_steps_per_episode else: # The maximum number of steps per episode is set, but the env is vectorized, # so the number of 'total' rewards we get from all envs should be *exactly* # this much: assert len(all_rewards) == epochs * max_steps_per_episode * batch_size @pytest.mark.parametrize("batch_size", [1, 2, 5]) @pytest.mark.parametrize("env_name", [param_requires_atari_py("ALE/Breakout-v5")]) def test_reward_isnt_always_one(self, env_name: str, batch_size: int): epochs = 3 max_steps_per_episode = 100 env = make_batched_env(env_name, batch_size=batch_size) dataset = EnvDataset(env, max_steps_per_episode=max_steps_per_episode) env: GymDataLoader = self.GymDataLoader(env=dataset) all_rewards = [] with env: env.reset() for epoch in range(epochs): for i, batch in enumerate(env): rewards = env.send(env.action_space.sample()) all_rewards.extend(rewards) assert all_rewards != np.ones(len(all_rewards)).tolist() @pytest.mark.parametrize("env_name", ["CartPole-v0"]) @pytest.mark.parametrize("batch_size", [1, 2, 5, 10]) def test_batched_state(self, env_name: str, batch_size: int): max_steps_per_episode = 10 env = make_batched_env(env_name, batch_size=batch_size) dataset = EnvDataset(env, max_steps_per_episode=max_steps_per_episode) env: GymDataLoader = GymDataLoader( dataset, batch_size=batch_size, ) with gym.make(env_name) as temp_env: state_shape = temp_env.observation_space.shape action_shape = temp_env.action_space.shape state_shape = (batch_size, *state_shape) action_shape = (batch_size, *action_shape) reward_shape = (batch_size,) state = env.reset() assert state.shape == state_shape env.seed(123) i = 0 for obs_batch in take(env, 5): assert obs_batch.shape == state_shape random_actions = env.action_space.sample() assert torch.as_tensor(random_actions).shape == action_shape assert temp_env.action_space.contains(random_actions[0]) reward = env.send(random_actions) assert reward.shape == reward_shape i += 1 assert i == 5 @pytest.mark.parametrize("env_name", ["CartPole-v0"]) @pytest.mark.parametrize("batch_size", [1, 2, 5, 10]) def test_batched_pixels(self, env_name: str, batch_size: int): max_steps_per_episode = 10 pyglet = pytest.importorskip("pyglet") wrappers = [PixelObservationWrapper] env = make_batched_env(env_name, wrappers=wrappers, batch_size=batch_size) dataset = EnvDataset(env, max_steps_per_episode=max_steps_per_episode) with gym.make(env_name) as temp_env: for wrapper in wrappers: temp_env = wrapper(temp_env) state_shape = temp_env.observation_space.shape action_shape = temp_env.action_space.shape state_shape = (batch_size, *state_shape) action_shape = (batch_size, *action_shape) reward_shape = (batch_size,) env = self.GymDataLoader( dataset, batch_size=batch_size, ) assert isinstance(env.observation_space, spaces.Box) assert len(env.observation_space.shape) == 4 assert env.observation_space.shape[0] == batch_size env.seed(1234) for i, batch in enumerate(env): assert len(batch) == batch_size if isinstance(batch, Tensor): batch = batch.cpu().numpy() assert batch in env.observation_space random_actions = env.action_space.sample() assert torch.as_tensor(random_actions).shape == action_shape assert temp_env.action_space.contains(random_actions[0]) reward = env.send(random_actions) assert reward.shape == reward_shape ================================================ FILE: sequoia/settings/rl/continual/make_env.py ================================================ """Creates an IterableDataset from a gym env by applying different wrappers. """ import multiprocessing as mp import warnings from functools import partial from typing import Callable, Dict, Iterable, List, Optional, Tuple, Type, TypeVar, Union import gym from gym import Wrapper from gym.vector import AsyncVectorEnv, SyncVectorEnv, VectorEnv from sequoia.utils.logging_utils import get_logger logger = get_logger(__name__) W = TypeVar("W", bound=Union[gym.Env, gym.Wrapper]) WrapperAndKwargs = Tuple[Type[gym.Wrapper], Dict] def make_batched_env( base_env: Union[str, Callable], batch_size: int = 10, wrappers: Iterable[Union[Type[Wrapper], WrapperAndKwargs]] = None, shared_memory: bool = True, num_workers: Optional[int] = None, **kwargs, ) -> VectorEnv: """Create a vectorized environment from multiple copies of an environment. NOTE: This function does pretty much the same as `gym.vector.make`, but with a bit more flexibility: - Allows passing an env factory to start with, rather than only taking ids. - Allows passing wrappers to be added to the env on each worker, as well as wrappers to add on top of the returned (batched) env. - Allows passing tuples of (Type[Wrapper, kwargs]) Parameters ---------- base_env : str The environment ID (or an environment factory). This must be a valid ID from the registry. batch_size : int Number of copies of the environment (as well as batch size). num_workers : Optional[int] Number of workers to use. When `None` (default), uses as many workers as there are CPUs on this machine. When 0, the returned environment will be a `SyncVectorEnv`. When `num_workers` == `batch_size`, returns an AsyncVectorEnv. When `num_workers` != `batch_size`, returns a `BatchVectorEnv`. wrappers : Callable or Iterable of Callables (default: `None`) If not `None`, then apply the wrappers to each internal environment during creation. **kwargs : Dict Keyword arguments to be passed to `gym.make` when `base_env` is an id. Returns ------- env : `gym.vector.VectorEnv` instance The vectorized environment. Example ------- >>> import gym >>> env = gym.vector.make('CartPole-v1', 3) >>> env.seed([123, 456, 789]) >>> env.reset() array([[ 0.01823519, -0.0446179 , -0.02796401, -0.03156282], [-0.00303268, -0.00523447, -0.03759432, 0.025485 ], [-0.04084033, -0.0285856 , 0.01318461, -0.03327109]], dtype=float32) """ # Get the default wrappers, if needed. wrappers = wrappers or [] base_env_factory: Callable[[], gym.Env] if isinstance(base_env, str): base_env_factory = partial(gym.make, base_env) elif callable(base_env): base_env_factory = base_env else: raise NotImplementedError( f"Unsupported base env: {base_env}. Must be " f"either a string or a callable for now." ) def pre_batch_env_factory(): env = base_env_factory(**kwargs) for wrapper in wrappers: if isinstance(wrapper, tuple): assert len(wrapper) == 2 and isinstance(wrapper[1], dict) wrapper = partial(wrapper[0], **wrapper[1]) env = wrapper(env) return env if batch_size is None: return pre_batch_env_factory() env_fns = [pre_batch_env_factory for _ in range(batch_size)] if num_workers is None: if batch_size == 1: num_workers = 0 else: num_workers = min(mp.cpu_count(), batch_size) if num_workers == 0: if batch_size > 1: warnings.warn( UserWarning( f"Running {batch_size} environments in series, which might be " f"slow. Consider setting the `num_workers` argument, perhaps to " f"the number of CPUs on your machine." ) ) return SyncVectorEnv(env_fns) if num_workers == batch_size: return AsyncVectorEnv(env_fns, shared_memory=shared_memory) raise RuntimeError(f"Need num_workers to match batch_size for now.") return AsyncVectorEnv(env_fns, shared_memory=shared_memory, n_workers=num_workers) def wrap(env: gym.Env, wrappers: Iterable[Union[Type[Wrapper], WrapperAndKwargs]]) -> Wrapper: wrappers = list(wrappers) # Convert the list of wrapper types or (wrapper_type, kwargs) tuples into # a list of callables that we can apply successively to the env. wrapper_fns = _make_wrapper_fns(wrappers) for wrapper_fn in wrapper_fns: env = wrapper_fn(env) return env def _make_wrapper_fns( wrappers_and_args: Iterable[Union[Type[Wrapper], Tuple[Type[Wrapper], Dict]]] ) -> List[Callable[[Wrapper], Wrapper]]: """Given a list of either wrapper classes or (wrapper, kwargs) tuples, returns a list of callables, each of which just takes an env and wraps it using the wrapper and the kwargs, if present. """ wrappers_and_args = list(wrappers_and_args or []) wrapper_functions: List[Callable[[gym.Wrapper], gym.Wrapper]] = [] for wrapper_and_args in wrappers_and_args: if isinstance(wrapper_and_args, (tuple, list)): # List element was a tuple with (wrapper, (args?), kwargs). wrapper, *args, kwargs = wrapper_and_args logger.debug(f"Wrapper: {wrapper}, args: {args}, kwargs: {kwargs}") wrapper_fn = partial(wrapper, *args, **kwargs) else: # list element is a type of Wrapper or some kind of callable. wrapper_fn = wrapper_and_args wrapper_functions.append(wrapper_fn) return wrapper_functions ================================================ FILE: sequoia/settings/rl/continual/make_env_test.py ================================================ """ Tests that check that combining wrappers works fine in combination. """ from typing import Union import gym import pytest import torch from gym.vector import AsyncVectorEnv, SyncVectorEnv from sequoia.conftest import requires_pyglet, slow_param from .make_env import make_batched_env @pytest.mark.parametrize("env_name", ["CartPole-v0"]) @pytest.mark.parametrize("batch_size", [1, 5, slow_param(10)]) def test_make_batched_env(env_name: str, batch_size: int): env = make_batched_env(base_env=env_name, batch_size=batch_size) start_state = env.reset() assert start_state.shape == (batch_size, 4) for i in range(10): action = env.action_space.sample() assert torch.as_tensor(action).shape == (batch_size,) obs, reward, done, info = env.step(action) assert obs.shape == (batch_size, 4) assert reward.shape == (batch_size,) @pytest.mark.xfail( reason="Not sure that the 'id' function gives an 'absolute' memory adress, or if " "the address is process-relative, in which case it might be an explanation as to " "why these tests don't work." ) @pytest.mark.parametrize("env_name", ["CartPole-v0"]) @pytest.mark.parametrize("batch_size", [4]) @pytest.mark.parametrize("num_workers", [0, 4]) def test_make_batched_env_envs_have_distinct_ids(env_name: str, batch_size: int, num_workers: int): # NOTE: We get a SyncVectorEnv if num_workers == 0, else we get an AsyncVectorEnv if # num_workers == batch_size, else we get a BatchVectorEnv. from gym.wrappers import TimeLimit def base_env_fn(): env = gym.make(env_name) return TimeLimit(env, max_episode_steps=10) env: Union[SyncVectorEnv, AsyncVectorEnv] = make_batched_env( base_env=base_env_fn, batch_size=batch_size, num_workers=num_workers ) if isinstance(env, SyncVectorEnv): envs = env.envs # Assert that the wrappers are distinct objects assert len(set(id(env) for env in envs)) == batch_size # Assert that the unwrapped envs are distinct objects assert len(set(id(env.unwrapped) for env in envs)) == batch_size else: assert isinstance(env, AsyncVectorEnv) ids = env.apply(id) assert len(set(ids)) == batch_size unwrapped_ids = env.apply(get_unwrapped_id) assert len(set(unwrapped_ids)) == batch_size def get_unwrapped_id(env): return id(env.unwrapped) @requires_pyglet @pytest.mark.parametrize("env_name", ["CartPole-v0"]) @pytest.mark.parametrize("batch_size", [1, 5, slow_param(10)]) def test_make_env_with_wrapper(env_name: str, batch_size: int): env = make_batched_env( base_env=env_name, batch_size=batch_size, wrappers=[PixelObservationWrapper], ) start_state = env.reset() expected_state_shape = (batch_size, 400, 600, 3) assert start_state.shape == expected_state_shape for i in range(10): action = env.action_space.sample() assert torch.as_tensor(action).shape == (batch_size,) obs, reward, done, info = env.step(action) assert obs.shape == expected_state_shape assert reward.shape == (batch_size,) from gym.vector import AsyncVectorEnv from sequoia.common.gym_wrappers import MultiTaskEnvironment, PixelObservationWrapper @pytest.mark.xfail(reason="TODO: Check if gym supports remote getattr now.") @pytest.mark.parametrize("env_name", ["CartPole-v0"]) @pytest.mark.parametrize("batch_size", [1, 5, slow_param(10)]) def test_make_env_with_wrapper_and_kwargs(env_name: str, batch_size: int): # NOTE: Since BatchVectorEnv and our subclasses of the vectorenvs in gym got removed, we lost # the ability to use the remote getattr feature. task_schedule = {0: dict(length=0.5), 50: dict(length=1.5)} env = make_batched_env( base_env=env_name, batch_size=batch_size, wrappers=[ PixelObservationWrapper, lambda env: MultiTaskEnvironment(env, task_schedule=task_schedule), ], # For now, setting the number of workers to the batch size, just so we # get an AsyncVectorEnv rather than the BatchedVectorEnv (so the remote_getattr works). num_workers=batch_size, ) start_state = env.reset() expected_state_shape = (batch_size, 400, 600, 3) assert start_state.shape == expected_state_shape for i in range(100): action = env.action_space.sample() assert torch.as_tensor(action).shape == (batch_size,) assert env.length == [2.0 for i in range(batch_size)] obs, reward, done, info = env.step(action) assert obs.shape == expected_state_shape assert reward.shape == (batch_size,) ================================================ FILE: sequoia/settings/rl/continual/objects.py ================================================ from dataclasses import dataclass from typing import Optional, Sequence, TypeVar, Union from torch import Tensor from sequoia.settings.assumptions.continual import ContinualAssumption from sequoia.settings.rl import RLSetting @dataclass(frozen=True) class Observations(RLSetting.Observations, ContinualAssumption.Observations): """Observations from a Continual Reinforcement Learning environment.""" x: Tensor task_labels: Optional[Tensor] = None # The 'done' that is normally returned by the 'step' method. # We add this here in case a method were to iterate on the environments in the # dataloader-style so they also have access to those (i.e. for the BaseMethod). done: Optional[Union[bool, Sequence[bool]]] = None @dataclass(frozen=True) class Actions(RLSetting.Actions, ContinualAssumption.Actions): """Actions to be sent to a Continual Reinforcement Learning environment.""" y_pred: Tensor @dataclass(frozen=True) class Rewards(RLSetting.Rewards, ContinualAssumption.Rewards): """Rewards obtained from a Continual Reinforcement Learning environment.""" y: Tensor ObservationType = TypeVar("ObservationType", bound=Observations) ActionType = TypeVar("ActionType", bound=Actions) RewardType = TypeVar("RewardType", bound=Rewards) ================================================ FILE: sequoia/settings/rl/continual/results.py ================================================ from typing import ClassVar, Generic, TypeVar from sequoia.common.metrics.rl_metrics import EpisodeMetrics from sequoia.settings.assumptions.continual import ContinualResults from sequoia.utils.plotting import autolabel, plt MetricType = TypeVar("MetricType", bound=EpisodeMetrics) class ContinualRLResults(ContinualResults, Generic[MetricType]): """Results for a ContinualRLSetting.""" # Higher mean reward / episode => better lower_is_better: ClassVar[bool] = False objective_name: ClassVar[str] = "Mean reward per episode" # Minimum runtime considered (in hours). # (No extra points are obtained for going faster than this.) min_runtime_hours: ClassVar[float] = 1.5 # Maximum runtime allowed (in hours). max_runtime_hours: ClassVar[float] = 12.0 def mean_reward_plot(self): raise NotImplementedError("TODO") figure: plt.Figure axes: plt.Axes figure, axes = plt.subplots() x = list(range(self.num_tasks)) y = [metrics.accuracy for metrics in self.average_metrics_per_task] rects = axes.bar(x, y) axes.set_title("Task Accuracy") axes.set_xlabel("Task") axes.set_ylabel("Accuracy") axes.set_ylim(0, 1.0) autolabel(axes, rects) return figure ================================================ FILE: sequoia/settings/rl/continual/setting.py ================================================ """ Current most general Setting in the Reinforcement Learning side of the tree. """ import difflib import json import textwrap import warnings from dataclasses import dataclass, fields from functools import partial from pathlib import Path from typing import Any, Callable, ClassVar, Dict, List, Optional, Type, Union import gym import numpy as np from gym import spaces from gym.envs.registration import EnvSpec, registry from gym.utils import colorize from gym.wrappers import TimeLimit from simple_parsing import choice, field, list_field from simple_parsing.helpers import dict_field try: from stable_baselines3.common.atari_wrappers import AtariWrapper as SB3AtariWrapper except ImportError: class SB3AtariWrapper: pass from gym.wrappers.atari_preprocessing import AtariPreprocessing as GymAtariWrapper import wandb from sequoia.common import Config from sequoia.common.gym_wrappers import ( AddDoneToObservation, MultiTaskEnvironment, RenderEnvWrapper, SmoothTransitions, TransformObservation, TransformReward, ) from sequoia.common.gym_wrappers.action_limit import ActionLimit from sequoia.common.gym_wrappers.convert_tensors import add_tensor_support from sequoia.common.gym_wrappers.env_dataset import EnvDataset from sequoia.common.gym_wrappers.episode_limit import EpisodeLimit from sequoia.common.gym_wrappers.pixel_observation import ImageObservations from sequoia.common.gym_wrappers.utils import is_atari_env from sequoia.common.spaces import Sparse, TypedDictSpace from sequoia.common.transforms import Transforms from sequoia.settings.assumptions.continual import ContinualAssumption from sequoia.settings.base import Method from sequoia.settings.rl import ActiveEnvironment, RLSetting from sequoia.settings.rl.wrappers import ( HideTaskLabelsWrapper, MeasureRLPerformanceWrapper, TypedObjectsWrapper, ) from sequoia.utils import get_logger from sequoia.utils.generic_functions import move from sequoia.utils.utils import flag, pairwise from .environment import GymDataLoader from .make_env import make_batched_env from .objects import Actions, Observations, Rewards # type: ignore from .results import ContinualRLResults from .tasks import ContinuousTask, TaskSchedule, is_supported, make_continuous_task, names_match from .test_environment import ContinualRLTestEnvironment logger = get_logger(__name__) # Type alias for the Environment returned by `train/val/test_dataloader`. Environment = ActiveEnvironment[ "ContinualRLSetting.Observations", "ContinualRLSetting.Observations", "ContinualRLSetting.Rewards", ] # NOTE: Takes about 0.2 seconds to check for all compatible envs (with loading), and # only happens once. supported_envs: Dict[str, EnvSpec] = { spec.id: spec for env_id, spec in registry.env_specs.items() if is_supported(env_id) } available_datasets: Dict[str, str] = {env_id: env_id for env_id in supported_envs} # available_datasets.update( # {camel_case(env_id.split("-v")[0]): env_id for env_id in supported_envs} # ) @dataclass class ContinualRLSetting(RLSetting, ContinualAssumption): """Reinforcement Learning Setting where the environment changes over time. This is an Active setting which uses gym environments as sources of data. These environments' attributes could change over time following a task schedule. An example of this could be that the gravity increases over time in cartpole, making the task progressively harder as the agent interacts with the environment. """ # (NOTE: commenting out SLSetting.Observations as it is the same class # as Setting.Observations, and we want a consistent method resolution order. Observations: ClassVar[Type[Observations]] = Observations Actions: ClassVar[Type[Actions]] = Actions Rewards: ClassVar[Type[Rewards]] = Rewards # The type of results returned by an RL experiment. Results: ClassVar[Type[Results]] = ContinualRLResults # The type wrapper used to wrap the test environment, and which produces the # results. TestEnvironment: ClassVar[Type[TestEnvironment]] = ContinualRLTestEnvironment # Dict of all available options for the 'dataset' field below. available_datasets: ClassVar[Dict[str, Union[str, Any]]] = available_datasets # The function used to create the tasks for the chosen env. _task_sampling_function: ClassVar[Callable[..., ContinuousTask]] = make_continuous_task # Which environment (a.k.a. "dataset") to learn on. # The dataset could be either a string (env id or a key from the # available_datasets dict), a gym.Env, or a callable that returns a # single environment. dataset: str = choice(available_datasets, default="CartPole-v0") # The number of "tasks" that will be created for the training, valid and test # environments. # NOTE: In the case of settings with smooth task boundaries, this is the number of # "base" tasks which are created, and the task space consists of interpolations # between these base tasks. # When left unset, will use a default value that makes sense # (something like 5). nb_tasks: int = field(5, alias=["n_tasks", "num_tasks"]) # Environment/dataset to use for validation. Defaults to the same as `dataset`. train_dataset: Optional[str] = None # Environment/dataset to use for validation. Defaults to the same as `dataset`. val_dataset: Optional[str] = None # Environment/dataset to use for testing. Defaults to the same as `dataset`. test_dataset: Optional[str] = None # Wether the task boundaries are smooth or sudden. smooth_task_boundaries: bool = True # Wether the tasks are sampled uniformly. (This is set to True in MultiTaskRLSetting # and below) stationary_context: bool = False # Max number of training steps in total. (Also acts as the "length" of the training # and validation "Datasets") train_max_steps: int = 100_000 # Maximum number of episodes in total. # TODO: Add tests for this 'max episodes' and 'episodes_per_task'. train_max_episodes: Optional[int] = None # Total number of steps in the test loop. (Also acts as the "length" of the testing # environment.) test_max_steps: int = 10_000 test_max_episodes: Optional[int] = None # Standard deviation of the multiplicative Gaussian noise that is used to # create the values of the env attributes for each task. task_noise_std: float = 0.2 # NOTE: THIS ARG IS DEPRECATED! Only keeping it here so previous config yaml files # don't cause a crash. observe_state_directly: Optional[bool] = None # NOTE: Removing those, in favor of just using the registered Pixel<...>-v? variant. # force_pixel_observations: bool = False # """ Wether to use the "pixel" version of `self.dataset`. # When `False`, does nothing. # When `True`, will do one of the following, depending on the choice of environment: # - For classic control envs, it adds a `PixelObservationsWrapper` to the env. # - For atari envs: # - If `self.dataset` is a regular atari env (e.g. "ALE/Breakout-v5"), does nothing. # - if `self.dataset` is the 'RAM' version of an atari env, raises an error. # - For mujoco envs, this raises a NotImplementedError, as we don't yet know how to # make a pixel-version the Mujoco Envs. # - For other envs: # - If the environment's observation space appears to be image-based, an error # will be raised. # - If the environment's observation space doesn't seem to be image-based, does # nothing. # """ # force_state_observations: bool = False # """ Wether to use the "state" version of `self.dataset`. # When `False`, does nothing. # When `True`, will do one of the following, depending on the choice of environment: # - For classic control envs, it does nothing, as they are already state-based. # - TODO: For atari envs, the 'RAM' version of the chosen env will be used. # - For mujoco envs, it doesn nothing, as they are already state-based. # - For other envs, if this is set to True, then # - If the environment's observation space appears to be image-based, an error # will be raised. # - If the environment's observation space doesn't seem to be image-based, does # nothing. # """ # NOTE: Removing this from the continual setting. # By default 1 for this setting, meaning that the context is a linear interpolation # between the start context (usually the default task for the environment) and a # randomly sampled task. # nb_tasks: int = field(5, alias=["n_tasks", "num_tasks"]) # Wether to convert the observations / actions / rewards of the envs (and their # spaces) such that they return Tensors rather than numpy arrays. # TODO: Maybe switch this to True by default? prefer_tensors: bool = False # Path to a json file from which to read the train task schedule. train_task_schedule_path: Optional[Path] = None # Path to a json file from which to read the validation task schedule. val_task_schedule_path: Optional[Path] = None # Path to a json file from which to read the test task schedule. test_task_schedule_path: Optional[Path] = None # Wether observations from the environments whould include # the end-of-episode signal. Only really useful if your method will iterate # over the environments in the dataloader style # (as does the baseline method). add_done_to_observations: bool = False # The maximum number of steps per episode. When None, there is no limit. max_episode_steps: Optional[int] = None # Transforms to be applied by default to the observatons of the train/valid/test # environments. transforms: List[Transforms] = list_field() # Transforms to be applied to the training environment, in addition to those already # in `transforms`. train_transforms: List[Transforms] = list_field() # Transforms to be applied to the validation environment, in addition to those # already in `transforms`. val_transforms: List[Transforms] = list_field() # Transforms to be applied to the testing environment, in addition to those already # in `transforms`. test_transforms: List[Transforms] = list_field() # When True, a Monitor-like wrapper will be applied to the training environment # and monitor the 'online' performance during training. Note that in SL, this will # also cause the Rewards (y) to be withheld until actions are passed to the `send` # method of the Environment. monitor_training_performance: bool = flag(True) # # -------- Fields below don't have corresponding command-line arguments. ----------- # train_task_schedule: Dict[int, Dict[str, float]] = dict_field(cmd=False) val_task_schedule: Dict[int, Dict[str, float]] = dict_field(cmd=False) test_task_schedule: Dict[int, Dict[str, float]] = dict_field(cmd=False) # TODO: Naming is a bit inconsistent, using `valid` here, whereas we use `val` # elsewhere. train_wrappers: List[Callable[[gym.Env], gym.Env]] = list_field(cmd=False) val_wrappers: List[Callable[[gym.Env], gym.Env]] = list_field(cmd=False) test_wrappers: List[Callable[[gym.Env], gym.Env]] = list_field(cmd=False) # keyword arguments to be passed to the base environment through gym.make(base_env, **kwargs). base_env_kwargs: Dict = dict_field(cmd=False) batch_size: Optional[int] = field(default=None, cmd=False) num_workers: Optional[int] = field(default=None, cmd=False) # Maximum number of training steps per task. # NOTE: In this particular setting there aren't clear 'tasks' to speak of. train_steps_per_task: Optional[int] = None # Number of test steps per task. # NOTE: In this particular setting there aren't clear 'tasks' to speak of. test_steps_per_task: Optional[int] = None # # Deprecated: use `train_max_steps` instead. # max_steps: Optional[int] = deprecated_property(redirects_to="train_max_steps") # # Deprecated: use `test_max_steps` instead. # test_steps: Optional[int] = deprecated_property(redirects_to="test_max_steps") # # Deprecated, use `train_steps_per_task` instead. # steps_per_task: Optional[int] = deprecated_property(redirects_to="train_steps_per_task") def __post_init__(self): defaults = {f.name: f.default for f in fields(self)} super().__post_init__() # TODO: Fix nnoying little issues with this trio of fields that are interlinked: if self.test_steps_per_task is not None: # We need set the value of self.test_max_steps and self.test_steps_per_task if self.test_task_schedule and max(self.test_task_schedule) != len( self.test_task_schedule ): self.test_max_steps = max(self.test_task_schedule) elif self.test_max_steps == defaults["test_max_steps"]: self.test_max_steps = self.nb_tasks * self.test_steps_per_task else: self.nb_tasks = self.test_max_steps // self.test_steps_per_task # if self.max_steps is not None: # warnings.warn(DeprecationWarning("'max_steps' is deprecated, use 'train_max_steps' instead.")) # self.train_max_steps = self.max_steps # if self.test_steps is not None: # warnings.warn(DeprecationWarning("'test_steps' is deprecated, use 'test_max_steps' instead.")) if self.dataset and self.dataset not in self.available_datasets.values(): try: self.dataset = find_matching_dataset(self.available_datasets, self.dataset) except NotImplementedError as e: logger.info(f"Will try to use custom dataset {self.dataset}.") except Exception as e: if getattr(self, "train_envs", []): logger.info(f"Using custom environments / datasets.") else: raise gym.error.UnregisteredEnv( f"({e}) The chosen dataset/environment ({self.dataset}) isn't in the dict of " f"available datasets/environments, and a task schedule was not passed, " f"so this Setting ({type(self).__name__}) doesn't know how to create " f"tasks for that env!\n" f"Supported envs:\n" + ("\n".join(f"- {k}: {v}" for k, v in self.available_datasets.items())) ) # The ids of the train/valid/test environments. self.train_dataset: Union[str, Callable[[], gym.Env]] = self.train_dataset or self.dataset self.val_dataset: Union[str, Callable[[], gym.Env]] = self.val_dataset or self.dataset self.test_dataset: Union[str, Callable[[], gym.Env]] = self.test_dataset or self.dataset logger.info(f"Chosen dataset: {textwrap.shorten(str(self.train_dataset), 50)}") # # The environment 'ID' associated with each 'simple name'. # self.train_dataset_id: str = self._get_dataset_id(self.train_dataset) # self.val_dataset_id: str = self._get_dataset_id(self.val_dataset) # self.train_dataset_id: str = self._get_dataset_id(self.train_dataset) # Set the number of tasks depending on the increment, and vice-versa. # (as only one of the two should be used). assert self.train_max_steps, "assuming this should always be set, for now." # Load the task schedules from the corresponding files, if present. if self.train_task_schedule_path: self.train_task_schedule = _load_task_schedule(self.train_task_schedule_path) self.nb_tasks = len(self.train_task_schedule) - 1 if self.val_task_schedule_path: self.val_task_schedule = _load_task_schedule(self.val_task_schedule_path) if self.test_task_schedule_path: self.test_task_schedule = _load_task_schedule(self.test_task_schedule_path) self.train_env: gym.Env self.valid_env: gym.Env self.test_env: gym.Env # Temporary environments which are created and used only for creating the task # schedules and the 'base' observation spaces, and then closed right after. self._temp_train_env: Optional[gym.Env] = self._make_env(self.train_dataset) self._temp_val_env: Optional[gym.Env] = None self._temp_test_env: Optional[gym.Env] = None # Create the task schedules, using the 'task sampling' function from `tasks.py`. # TODO: PLEASE HELP I'm going mad because of the validation logic for these # fields!! if not self.train_task_schedule: self.train_task_schedule = self.create_train_task_schedule() elif max(self.train_task_schedule) == len(self.train_task_schedule) - 1: # If the keys correspond to the task ids rather than the steps: if self.nb_tasks in [defaults["nb_tasks"], None]: self.nb_tasks = len(self.train_task_schedule) - 1 if self.nb_tasks < 1: raise RuntimeError(f"Need at least 2 entries in the task schedule!") logger.info( f"Assuming that the last entry in the provided task schedule is " f"the final state, and that there are {self.nb_tasks} tasks. " ) self.train_steps_per_task = ( self.train_steps_per_task or self.train_max_steps // self.nb_tasks ) new_keys = np.linspace( 0, self.train_max_steps, self.nb_tasks + 1, endpoint=True, dtype=int ).tolist() assert len(new_keys) == len(self.train_task_schedule) self.train_task_schedule = type(self.train_task_schedule)( { new_key: self.train_task_schedule[old_key] for new_key, old_key in zip(new_keys, sorted(self.train_task_schedule.keys())) } ) elif self.smooth_task_boundaries: # We have a task schedule for Continual RL. if self.train_max_steps == defaults["train_max_steps"]: self.train_max_steps = max(self.train_task_schedule) if self.smooth_task_boundaries: # NOTE: Need to have an entry at the final step last_task_step = max(self.train_task_schedule.keys()) last_task = self.train_task_schedule[last_task_step] if self.train_max_steps not in self.train_task_schedule: # FIXME Duplicating the last task for now? self.train_task_schedule[self.train_max_steps] = last_task if 0 not in self.train_task_schedule.keys(): raise RuntimeError( "`train_task_schedule` needs an entry at key 0, as the initial state" ) if self.train_max_steps != max(self.train_task_schedule): if self.train_max_steps in [defaults["train_max_steps"], None]: # TODO: This might be wrong no? self.train_max_steps = max(self.train_task_schedule) logger.info(f"Setting `train_max_steps` to {self.train_max_steps}") elif self.smooth_task_boundaries: raise RuntimeError( f"For now, the train task schedule needs to have a value at key " f"`train_max_steps` ({self.train_max_steps})." ) else: last_task_step = max(self.train_task_schedule) last_task = self.train_task_schedule[last_task_step] logger.debug("Using the last task as the final state.") self.train_task_schedule[self.train_max_steps] = last_task if not self.val_task_schedule: # Avoid creating an additional env, just reuse the train_temp_env. self._temp_val_env = ( self._temp_train_env if self.val_dataset == self.train_dataset else self._make_env(self.val_dataset) ) self.val_task_schedule = self.create_val_task_schedule() elif max(self.val_task_schedule) == len(self.val_task_schedule) - 1: # If the keys correspond to the task ids rather than the transition steps expected_nb_tasks = len(self.val_task_schedule) old_keys = sorted(self.val_task_schedule.keys()) new_keys = np.linspace( 0, self.train_max_steps, self.nb_tasks + 1, endpoint=True, dtype=int ).tolist() assert len(new_keys) == len(self.train_task_schedule) self.val_task_schedule = type(self.val_task_schedule)( { new_key: self.val_task_schedule[old_key] for new_key, old_key in zip(new_keys, old_keys) } ) if not self.test_task_schedule: self._temp_test_env = ( self._temp_train_env if self.test_dataset == self.train_dataset else self._make_env(self.val_dataset) ) self.test_task_schedule = self.create_test_task_schedule() elif max(self.test_task_schedule) == len(self.test_task_schedule) - 1: # If the keys correspond to the task ids rather than the transition steps old_keys = sorted(self.test_task_schedule.keys()) new_keys = np.linspace( 0, self.test_max_steps, self.nb_tasks + 1, endpoint=True, dtype=int ).tolist() self.test_task_schedule = type(self.test_task_schedule)( { new_key: self.test_task_schedule[old_key] for new_key, old_key in zip(new_keys, old_keys) } ) if 0 not in self.test_task_schedule.keys(): raise RuntimeError("`test_task_schedule` needs an entry at key 0, as the initial state") if self.test_max_steps != max(self.test_task_schedule): if self.test_max_steps == defaults["test_max_steps"]: self.test_max_steps = max(self.test_task_schedule) logger.info(f"Setting `test_max_steps` to {self.test_max_steps}") elif self.smooth_task_boundaries: raise RuntimeError( f"For now, the test task schedule needs to have a value at key " f"`test_max_steps` ({self.test_max_steps}). " ) # Close the temporary environments. # NOTE: Avoid closing the envs for now in case 'live' envs were passed to the Setting. if self._temp_train_env: # self._temp_train_env.close() pass if self._temp_val_env and self._temp_val_env is not self._temp_train_env: # self._temp_val_env.close() pass if self._temp_test_env and self._temp_test_env is not self._temp_train_env: # self._temp_test_env.close() pass train_task_lengths: List[int] = [ task_b_step - task_a_step for task_a_step, task_b_step in pairwise(sorted(self.train_task_schedule.keys())) ] # TODO: This will crash if nb_tasks is 1, right? # train_max_steps = train_last_boundary + train_task_lengths[-1] test_task_lengths: List[int] = [ task_b_step - task_a_step for task_a_step, task_b_step in pairwise(sorted(self.test_task_schedule.keys())) ] if not ( len(self.train_task_schedule) == len(self.test_task_schedule) == len(self.val_task_schedule) ): raise RuntimeError( "Training, validation and testing task schedules should have the same " "number of items for now." ) train_last_boundary = max(set(self.train_task_schedule.keys()) - {self.train_max_steps}) test_last_boundary = max(set(self.test_task_schedule.keys()) - {self.test_max_steps}) # TODO: Really annoying validation logic for these fields needs to be simplified # somehow. # if self.train_steps_per_task is None: # # if self.nb_tasks # train_steps_per_task = self.train_max_steps // self.nb_tasks # if self.train_task_schedule: # task_lengths = [ # b - a for a, b in pairwise(self.train_task_schedule.keys()) # ] # if any( # abs(task_length - train_steps_per_task) > 1 # for task_length in task_lengths # ): # raise RuntimeError( # f"Trying to set a value for `train_steps_per_task`, but " # f"the keys of the task schedule are either uneven, or not " # f"equal to {train_steps_per_task}: " # f"task schedule keys: {self.train_task_schedule.keys()}" # ) # self.train_steps_per_task = train_steps_per_task # FIXME: This is quite confusing: expected_nb_tasks = len(self.train_task_schedule) - 1 # if ( # self.train_max_steps not in [defaults["train_max_steps"], None] # and self.train_max_steps == max(self.train_task_schedule) # ) or self.smooth_task_boundaries: # expected_nb_tasks -= 1 if self.nb_tasks != expected_nb_tasks: if self.nb_tasks in [None, defaults["nb_tasks"]]: assert len(self.train_task_schedule) == len(self.test_task_schedule) self.nb_tasks = len(self.train_task_schedule) - 1 logger.info(f"`nb_tasks` set to {self.nb_tasks} based on the task schedule") else: raise RuntimeError( f"The passed number of tasks ({self.nb_tasks}) is inconsistent " f"with train_max_steps ({self.train_max_steps}) and the " f"passed task schedule (with keys " f"{self.train_task_schedule.keys()}): " f"Expected nb_tasks to be None or {expected_nb_tasks}." ) if not train_task_lengths: assert not test_task_lengths assert expected_nb_tasks == 1 assert self.train_max_steps > 0 assert self.test_max_steps > 0 train_max_steps = self.train_max_steps test_max_steps = self.test_max_steps else: train_max_steps = sum(train_task_lengths) test_max_steps = sum(test_task_lengths) # train_max_steps = round(train_last_boundary + train_task_lengths[-1]) # test_max_steps = round(test_last_boundary + test_task_lengths[-1]) if self.train_max_steps != train_max_steps: if self.train_max_steps == defaults["train_max_steps"]: self.train_max_steps = train_max_steps else: raise RuntimeError( f"Value of train_max_steps ({self.train_max_steps}) is " f"inconsistent with the given train task schedule, which has " f"the last task boundary at step {train_last_boundary}, with " f"task lengths of {train_task_lengths}, as it suggests the maximum " f"total number of steps to be {train_last_boundary} + " f"{train_task_lengths[-1]} => {train_max_steps}!" ) if self.test_max_steps != test_max_steps: if self.test_max_steps == defaults["test_max_steps"]: self.test_max_steps = test_max_steps else: raise RuntimeError( f"Value of test_max_steps ({self.test_max_steps}) is " f"inconsistent with the given test task schedule (which has keys " f"{self.test_task_schedule.keys()}). Expected the last key to be " f"{test_max_steps}" ) if self.train_steps_per_task is None: self.train_steps_per_task = self.train_max_steps // self.nb_tasks # TODO: Fix these annoying interactions once and for all. assert self.train_max_steps // self.nb_tasks == self.train_steps_per_task, ( self.train_max_steps, self.nb_tasks, self.train_steps_per_task, self.train_task_schedule.keys(), ) if self.test_steps_per_task is None: self.test_steps_per_task = self.test_max_steps // self.nb_tasks assert self.test_max_steps // self.nb_tasks == self.test_steps_per_task, ( self.test_max_steps, self.nb_tasks, self.test_steps_per_task, self.test_task_schedule.keys(), ) def create_train_task_schedule(self) -> TaskSchedule: # change_steps = [0, self.train_max_steps] # Ex: nb_tasks == 5, train_max_steps = 10_000: # change_steps = [0, 2_000, 4_000, 6_000, 8_000, 10_000] if self.train_steps_per_task is not None: train_max_steps = self.train_steps_per_task * self.nb_tasks # if self.smooth_task_boundaries: # train_max_steps = self.train_steps_per_task * self.nb_tasks # else: # train_max_steps = self.train_steps_per_task * self.nb_tasks else: train_max_steps = self.train_max_steps assert self.nb_tasks is not None task_schedule_keys = np.linspace( 0, train_max_steps, self.nb_tasks + 1, endpoint=True, dtype=int ).tolist() return self.create_task_schedule( temp_env=self._temp_train_env, change_steps=task_schedule_keys, # # TODO: Add properties for the train/valid/test seeds? seed=self.config.seed if self.config else 123, ) def create_val_task_schedule(self) -> TaskSchedule: # Always the same as train task schedule for now. return self.train_task_schedule.copy() def create_test_task_schedule(self) -> TaskSchedule[ContinuousTask]: # Re-scale the steps in the task schedule based on self.test_max_steps # NOTE: Using the same task schedule as in training and validation for now. if self.train_task_schedule: nb_tasks = len(self.train_task_schedule) - 1 else: nb_tasks = self.nb_tasks # TODO: Do we want to re-allow the `test_steps_per_task` argument? if self.test_steps_per_task is not None: test_max_steps = self.test_steps_per_task * nb_tasks else: test_max_steps = self.test_max_steps test_task_schedule_keys = np.linspace( 0, test_max_steps, nb_tasks + 1, endpoint=True, dtype=int ).tolist() return { step: task for step, task in zip(test_task_schedule_keys, self.train_task_schedule.values()) } def create_task_schedule( self, temp_env: gym.Env, change_steps: List[int], seed: int = None, ) -> Dict[int, Dict]: """Create the task schedule, which maps from a step to the changes that will occur in the environment when that step is reached. Uses the provided `temp_env` to generate the random tasks at the steps given in `change_steps` (a list of integers). Returns a dictionary mapping from integers (the steps) to the changes that will occur in the env at that step. TODO: For now in ContinualRL we use an interpolation of a dict of attributes to be set on the unwrapped env, but in IncrementalRL it is possible to pass callables to be applied on the environment at a given timestep. """ task_schedule: Dict[int, Dict] = {} # TODO: Make it possible to use something other than steps as keys in the task # schedule, something like a NamedTuple[int, DeltaType], e.g. Episodes(10) or Steps(10) # something like that! # IDEA: Even fancier, we could use a TimeDelta to say "do one hour of task 0"!! for step in change_steps: # TODO: Pass wether its for training/validation/testing? task = type(self)._task_sampling_function( temp_env, step=step, change_steps=change_steps, seed=seed, ) task_schedule[step] = task return task_schedule @property def observation_space(self) -> TypedDictSpace: """The un-batched observation space, based on the choice of dataset and the transforms at `self.transforms` (which apply to the train/valid/test environments). The returned spaces is a TypedDictSpace, with the following properties/items: - `x`: observation space (e.g. `Image` space) - `task_labels`: Union[Discrete, Sparse[Discrete]] The task labels for each sample when task labels are available, otherwise the task labels space is `Sparse`, and entries will be `None`. """ # TODO: Is it right that we set the observation space on the Setting to be the # observation space of the current train environment? # In what situation could there be any difference between those? # - Changing the 'transforms' attributes after training? # if self.train_env is not None: # # assert self._observation_space == self.train_env.observation_space # return self.train_env.observation_space if isinstance(self._temp_train_env.observation_space, TypedDictSpace): x_space = self._temp_train_env.observation_space.x task_label_space = self._temp_train_env.observation_space.task_labels else: x_space = self._temp_train_env.observation_space # apply the transforms to the observation space. for transform in self.transforms: x_space = transform(x_space) task_label_space = self.task_label_space done_space = spaces.Box(0, 1, shape=(), dtype=bool) if not self.add_done_to_observations: done_space = Sparse(done_space, sparsity=1) observation_space = TypedDictSpace( x=x_space, task_labels=task_label_space, done=done_space, dtype=self.Observations, ) if self.prefer_tensors: observation_space = add_tensor_support(observation_space) assert isinstance(observation_space, TypedDictSpace) return observation_space @property def task_label_space(self) -> gym.Space: # TODO: Explore an alternative design for the task sampling, based more around # gym spaces rather than the generic function approach that's currently used? # FIXME: This isn't really elegant, there isn't a `nb_tasks` attribute on the # ContinualRLSetting anymore, so we have to do a bit of a hack.. Would be # cleaner to maybe put this in the assumption class, under # `self.task_label_space`? task_label_space = spaces.Box(0.0, 1.0, shape=()) if not self.task_labels_at_train_time or not self.task_labels_at_test_time: sparsity = 1 if self.task_labels_at_train_time ^ self.task_labels_at_test_time: # We have task labels "50%" of the time, ish: sparsity = 0.5 task_label_space = Sparse(task_label_space, sparsity=sparsity) return task_label_space @property def action_space(self) -> gym.Space: # TODO: Convert the action/reward spaces so they also use TypedDictSpace (even # if they just have one item), so that it correctly reflects the objects that # the envs accept. y_pred_space = self._temp_train_env.action_space # action_space = TypedDictSpace(y_pred=y_pred_space, dtype=self.Actions) return y_pred_space @property def reward_space(self) -> gym.Space: reward_range = self._temp_train_env.reward_range return getattr( self._temp_train_env, "reward_space", spaces.Box(reward_range[0], reward_range[1], shape=()), ) def apply(self, method: Method, config: Config = None) -> "ContinualRLSetting.Results": """Apply the given method on this setting to producing some results.""" # Use the supplied config, or parse one from the arguments that were # used to create `self`. self.config = config or self._setup_config(method) logger.debug(f"Config: {self.config}") # TODO: Test to make sure that this doesn't cause any other bugs with respect to # the display of stuff: # Call this method, which creates a virtual display if necessary. self.config.get_display() # TODO: Should we really overwrite the method's 'config' attribute here? if not getattr(method, "config", None): method.config = self.config # TODO: Remove `Setting.configure(method)` entirely, from everywhere, # and use the `prepare_data` or `setup` methods instead (since these # `configure` methods aren't using the `method` anyway.) method.configure(setting=self) # BUG This won't work if the task schedule uses callables as the values (as # they aren't json-serializable.) if self.stationary_context: logger.info( "Train tasks: " + json.dumps(list(self.train_task_schedule.values()), indent="\t") ) else: try: logger.info( "Train task schedule:" + json.dumps(self.train_task_schedule, indent="\t") ) # BUG: Sometimes the task schedule isnt json-serializable! except TypeError: logger.info("Train task schedule: ") for key, value in self.train_task_schedule.items(): logger.info(f"{key}: {value}") if self.config.debug: logger.debug("Test task schedule:" + json.dumps(self.test_task_schedule, indent="\t")) # Run the Training loop (which is defined in ContinualAssumption). results = self.main_loop(method) logger.info("Results summary:") logger.info(results.to_log_dict()) logger.info(results.summary()) method.receive_results(self, results=results) return results # Run the Test loop (which is defined in IncrementalAssumption). # results: RlResults = self.test_loop(method) def setup(self, stage: str = None) -> None: # Called before the start of each task during training, validation and # testing. super().setup(stage=stage) if stage in {"fit", None}: self.train_wrappers = self.create_train_wrappers() if stage in {"validate", None}: self.valid_wrappers = self.create_valid_wrappers() elif stage in {"test", None}: self.test_wrappers = self.create_test_wrappers() def prepare_data(self, *args, **kwargs) -> None: # We don't really download anything atm. if self.config is None: self.config = Config() super().prepare_data(*args, **kwargs) def train_dataloader( self, batch_size: int = None, num_workers: int = None ) -> ActiveEnvironment: """Create a training gym.Env/DataLoader for the current task. Parameters ---------- batch_size : int, optional The batch size, which in this case is the number of environments to run in parallel. When `None`, the env won't be vectorized. Defaults to None. num_workers : int, optional The number of workers (processes) to use in the vectorized env. When None, the envs are run in sequence, which could be very slow. Only applies when `batch_size` is not None. Defaults to None. Returns ------- GymDataLoader A (possibly vectorized) environment/dataloader for the current task. """ if not self.has_prepared_data: self.prepare_data() # NOTE: We actually want to call setup every time, so we re-create the # wrappers for each task. self.setup("fit") batch_size = batch_size or self.batch_size num_workers = num_workers if num_workers is not None else self.num_workers train_seed = self.config.seed if self.config else None env_factory = partial( self._make_env, base_env=self.train_dataset, wrappers=self.train_wrappers, **self.base_env_kwargs, ) env_dataloader = self._make_env_dataloader( env_factory, batch_size=batch_size, num_workers=num_workers, max_steps=self.steps_per_phase, max_episodes=self.train_max_episodes, seed=train_seed, ) if self.monitor_training_performance: # NOTE: It doesn't always make sense to log stuff with the current task ID! wandb_prefix = "Train" if self.known_task_boundaries_at_train_time: wandb_prefix += f"/Task {self.current_task_id}" env_dataloader = MeasureRLPerformanceWrapper(env_dataloader, wandb_prefix=wandb_prefix) if self.config.render and batch_size is None: env_dataloader = RenderEnvWrapper(env_dataloader) self.train_env = env_dataloader # BUG: There is a mismatch between the train env's observation space and the # shape of its observations. # self.observation_space = self.train_env.observation_space return self.train_env def val_dataloader(self, batch_size: int = None, num_workers: int = None) -> Environment: """Create a validation gym.Env/DataLoader for the current task. Parameters ---------- batch_size : int, optional The batch size, which in this case is the number of environments to run in parallel. When `None`, the env won't be vectorized. Defaults to None. num_workers : int, optional The number of workers (processes) to use in the vectorized env. When None, the envs are run in sequence, which could be very slow. Only applies when `batch_size` is not None. Defaults to None. Returns ------- GymDataLoader A (possibly vectorized) environment/dataloader for the current task. """ if not self.has_prepared_data: self.prepare_data() # Need to force this to happen every time, because the wrappers might change # between tasks. self._has_setup_validate = False self.setup("validate") env_factory = partial( self._make_env, base_env=self.val_dataset, wrappers=self.valid_wrappers, **self.base_env_kwargs, ) valid_seed = self.config.seed if self.config else None env_dataloader = self._make_env_dataloader( env_factory, batch_size=batch_size or self.batch_size, num_workers=num_workers if num_workers is not None else self.num_workers, max_steps=self.steps_per_phase, # TODO: Create a new property to limit validation episodes? max_episodes=self.train_max_episodes, seed=valid_seed, ) if self.monitor_training_performance: # NOTE: We also add it here, just so it logs metrics to wandb. # NOTE: It doesn't always make sense to log stuff with the current task ID! wandb_prefix = "Valid" if self.known_task_boundaries_at_train_time: wandb_prefix += f"/Task {self.current_task_id}" env_dataloader = MeasureRLPerformanceWrapper(env_dataloader, wandb_prefix=wandb_prefix) self.val_env = env_dataloader return self.val_env def test_dataloader(self, batch_size: int = None, num_workers: int = None) -> TestEnvironment: """Create the test 'dataloader/gym.Env' for all tasks. NOTE: This test environment isn't just for the current task, it actually contains the sequence of all tasks. This is different than the train or validation environments, since if the task labels are available at train time, then calling train/valid_dataloader` returns the envs for the current task only, and the `.fit` method is called once per task. This environment is also different in that it is wrapped with a Monitor, which we might eventually use to save the results/gifs/logs of the testing runs. Parameters ---------- batch_size : int, optional The batch size, which in this case is the number of environments to run in parallel. When `None`, the env won't be vectorized. Defaults to None. num_workers : int, optional The number of workers (processes) to use in the vectorized env. When None, the envs are run in sequence, which could be very slow. Only applies when `batch_size` is not None. Defaults to None. Returns ------- TestEnvironment A testing environment which keeps track of the performance of the actor and accumulates logs/statistics that are used to eventually create the 'Result' object. """ if not self.has_prepared_data: self.prepare_data() # NOTE: New for PL: The call doesn't go through if self._has_setup_test is True # Need to force this to happen every time, because the wrappers might change # between tasks. self._has_setup_test = False self.setup("test") # BUG: gym.wrappers.Monitor doesn't want to play nice when applied to # Vectorized env, it seems.. # FIXME: Remove this when the Monitor class works correctly with # batched environments. batch_size = batch_size or self.batch_size if batch_size is not None: logger.warning( UserWarning( colorize( f"WIP: Only support batch size of `None` (i.e., a single env) " f"for the test environments of RL Settings at the moment, " f"because the Monitor class from gym doesn't work with " f"VectorEnvs. (batch size was {batch_size})", "yellow", ) ) ) batch_size = None num_workers = num_workers if num_workers is not None else self.num_workers test_seed = self.config.seed if self.config else None env_factory = partial( self._make_env, base_env=self.test_dataset, wrappers=self.test_wrappers, **self.base_env_kwargs, ) # TODO: Pass the max_steps argument to this `_make_env_dataloader` method, # rather than to a `step_limit` on the TestEnvironment. env_dataloader = self._make_env_dataloader( env_factory, batch_size=batch_size, num_workers=num_workers, ) if self.test_max_episodes is not None: raise NotImplementedError(f"TODO: Use `self.test_max_episodes`") test_loop_max_steps = self.test_max_steps // (batch_size or 1) # TODO: Find where to configure this 'test directory' for the outputs of # the Monitor. if wandb.run: test_dir = wandb.run.dir else: test_dir = self.config.log_dir # TODO: Split this up into an ActionLimit wrapper, a RecordVideo wrapper, # and a RecordEpisodeStatistics wrapper. self.test_env = self.TestEnvironment( env_dataloader, task_schedule=self.test_task_schedule, directory=test_dir, step_limit=test_loop_max_steps, config=self.config, force=True, video_callable=None if wandb.run or self.config.render else False, ) self.test_env.seed(seed=test_seed) self.test_env.action_space.seed(seed=test_seed) self.test_env.observation_space.seed(seed=test_seed) return self.test_env @property def phases(self) -> int: """The number of training 'phases', i.e. how many times `method.fit` will be called. In the case of ContinualRL and DiscreteTaskAgnosticRL, fit is only called once, with an environment that shifts between all the tasks. In IncrementalRL, fit is called once per task, while in TraditionalRL and MultiTaskRL, fit is called once. """ return 1 @property def steps_per_phase(self) -> Optional[int]: """Returns the number of steps per training "phase", i.e. the max number of (steps for now) that can be taken in the training environment passed to `Method.fit` In most settings, this is the same as `steps_per_task`. Returns ------- Optional[int] `None` if `max_steps` is None, else `max_steps // phases`. """ return None if self.train_max_steps is None else self.train_max_steps // self.phases @staticmethod def _make_env( base_env: Union[str, gym.Env, Callable[[], gym.Env]], wrappers: List[Callable[[gym.Env], gym.Env]] = None, **base_env_kwargs: Dict, ) -> gym.Env: """Helper function to create a single (non-vectorized) environment.""" env: gym.Env if isinstance(base_env, str): env = gym.make(base_env, **base_env_kwargs) elif isinstance(base_env, gym.Env): env = base_env elif callable(base_env): env = base_env(**base_env_kwargs) else: raise RuntimeError( f"base_env should either be a string, a callable, or a gym " f"env. (got {base_env})." ) wrappers = wrappers or [] for wrapper in wrappers: env = wrapper(env) return env def _make_env_dataloader( self, env_factory: Callable[[], gym.Env], batch_size: Optional[int], num_workers: Optional[int] = None, seed: Optional[int] = None, max_steps: Optional[int] = None, max_episodes: Optional[int] = None, ) -> GymDataLoader: """Helper function for creating a (possibly vectorized) environment.""" logger.debug(f"batch_size: {batch_size}, num_workers: {num_workers}, seed: {seed}") env: Union[gym.Env, gym.vector.VectorEnv] if batch_size is None: env = env_factory() else: env = make_batched_env( env_factory, batch_size=batch_size, num_workers=num_workers, # TODO: Still debugging shared memory + custom spaces (e.g. Sparse). shared_memory=False, ) if max_steps: env = ActionLimit(env, max_steps=max_steps) if max_episodes: env = EpisodeLimit(env, max_episodes=max_episodes) # Apply the "post-batch" wrappers: # from sequoia.common.gym_wrappers import ConvertToFromTensors # TODO: Only the BaseMethod requires this, we should enable it only # from the BaseMethod, and leave it 'off' by default. if self.add_done_to_observations: env = AddDoneToObservation(env) if self.prefer_tensors and self.config.device: # TODO: Put this before or after the image transforms? env = TransformObservation(env, f=partial(move, device=self.config.device)) env = TransformReward(env, f=partial(move, device=self.config.device)) # # Convert the samples to tensors and move them to the right device. # env = ConvertToFromTensors(env) # env = ConvertToFromTensors(env, device=self.config.device) # Add a wrapper that converts numpy arrays / etc to Observations/Rewards # and from Actions objects to numpy arrays. env = TypedObjectsWrapper( env, observations_type=self.Observations, rewards_type=self.Rewards, actions_type=self.Actions, ) # Create an IterableDataset from the env using the EnvDataset wrapper. dataset = EnvDataset(env) # Create a GymDataLoader for the EnvDataset. env_dataloader = GymDataLoader(dataset) if batch_size and seed: # Seed each environment with its own seed (based on the base seed). env.seed([seed + i for i in range(env_dataloader.num_envs)]) else: env.seed(seed) env.action_space.seed(seed) env.observation_space.seed(seed) return env_dataloader def create_train_wrappers(self) -> List[Callable[[gym.Env], gym.Env]]: """Get the list of wrappers to add to each training environment. The result of this method must be pickleable when using multiprocessing. Returns ------- List[Callable[[gym.Env], gym.Env]] [description] """ # We add a restriction to prevent users from getting data from # previous or future tasks. # NOTE: This assumes that tasks all have the same length. return self._make_wrappers( base_env=self.train_dataset, task_schedule=self.train_task_schedule, # TODO: Removing this, but we have to check that it doesn't change when/how # the task boundaries are given to the Method. # sharp_task_boundaries=self.known_task_boundaries_at_train_time, task_labels_available=self.task_labels_at_train_time, transforms=self.transforms + self.train_transforms, starting_step=0, max_steps=self.train_max_steps, new_random_task_on_reset=self.stationary_context, ) def create_valid_wrappers(self) -> List[Callable[[gym.Env], gym.Env]]: """Get the list of wrappers to add to each validation environment. The result of this method must be pickleable when using multiprocessing. Returns ------- List[Callable[[gym.Env], gym.Env]] [description] TODO: Decide how this 'validation' environment should behave in comparison with the train and test environments. """ return self._make_wrappers( base_env=self.val_dataset, task_schedule=self.val_task_schedule, # sharp_task_boundaries=self.known_task_boundaries_at_train_time, task_labels_available=self.task_labels_at_train_time, transforms=self.transforms + self.val_transforms, starting_step=0, # TODO: Should there be a limit on the validation steps/episodes? max_steps=self.train_max_steps, new_random_task_on_reset=self.stationary_context, ) def create_test_wrappers(self) -> List[Callable[[gym.Env], gym.Env]]: """Get the list of wrappers to add to a single test environment. The result of this method must be pickleable when using multiprocessing. Returns ------- List[Callable[[gym.Env], gym.Env]] [description] """ return self._make_wrappers( base_env=self.test_dataset, task_schedule=self.test_task_schedule, # sharp_task_boundaries=self.known_task_boundaries_at_test_time, task_labels_available=self.task_labels_at_test_time, transforms=self.transforms + self.test_transforms, starting_step=0, max_steps=self.test_max_steps, new_random_task_on_reset=self.stationary_context, ) def _make_wrappers( self, base_env: Union[str, gym.Env, Callable[[], gym.Env]], task_schedule: Dict[int, Dict], # sharp_task_boundaries: bool, task_labels_available: bool, transforms: List[Transforms] = None, starting_step: int = None, max_steps: int = None, new_random_task_on_reset: bool = False, ) -> List[Callable[[gym.Env], gym.Env]]: """helper function for creating the train/valid/test wrappers. These wrappers get applied *before* the batching, if applicable. """ wrappers: List[Callable[[gym.Env], gym.Env]] = [] # TODO: Add some kind of Wrapper around the dataset to make it # semi-supervised? if self.max_episode_steps: wrappers.append(partial(TimeLimit, max_episode_steps=self.max_episode_steps)) # NOTE: Removing this 'ActionLimit' from the 'pre-batch' wrappers. # wrappers.append(partial(ActionLimit, max_steps=max_steps)) # if is_classic_control_env(base_env): # If we are in a classic control env, and we dont want the state to # be fully-observable (i.e. we want pixel observations rather than # getting the pole angle, velocity, etc.), then add the # PixelObservation wrapper to the list of wrappers. # if self.force_pixel_observations: # wrappers.append(PixelObservationWrapper) # TODO: Temporary fix for the `is_atari_env` function, which is used to check if the env # needs a `AtariPreprocessing` wrapper added. if isinstance(base_env, (str, gym.Env)) and is_atari_env(base_env): # TODO: Figure out the differences (if there are any) between the # AtariWrapper from SB3 and the AtariPreprocessing wrapper from gym. wrappers.append(GymAtariWrapper) if transforms: # Apply image transforms if the env will have image-like obs space # Wrapper to 'wrap' the observation space into an Image space (subclass of # Box with useful fields like `c`, `h`, `w`, etc.) wrappers.append(ImageObservations) # Wrapper to apply the image transforms to the env. wrappers.append(partial(TransformObservation, f=transforms)) if task_schedule is not None: # Add a wrapper which will add non-stationarity to the environment. # The "task" transitions will either be sharp or smooth. # In either case, the task ids for each sample are added to the # observations, and the dicts containing the task information (e.g. the # current values of the env attributes from the task schedule) get added # to the 'info' dicts. nb_tasks = None if self.smooth_task_boundaries: # Add a wrapper that creates smooth tasks. cl_wrapper = SmoothTransitions else: assert self.nb_tasks >= 1 # Add a wrapper that creates sharp tasks. # NOTE: The naming here is less than ideal! This isn't "multi-task" as-in stationary # by default. It just means an env which can do multiple tasks. However, when the # `new_random_task_on_reset` argument is set, then it does sample tasks IID. cl_wrapper = MultiTaskEnvironment nb_tasks = self.nb_tasks assert starting_step is not None assert max_steps is not None wrappers.append( partial( cl_wrapper, noise_std=self.task_noise_std, task_schedule=task_schedule, add_task_id_to_obs=True, add_task_dict_to_info=False, starting_step=starting_step, nb_tasks=nb_tasks, new_random_task_on_reset=new_random_task_on_reset, max_steps=max_steps, ) ) # If the task labels aren't available, we then add another wrapper that # hides that information (setting both of them to None) and also marks # those spaces as `Sparse`. if not task_labels_available: # NOTE: This sets the task labels to None, rather than removing # them entirely. # wrappers.append(RemoveTaskLabelsWrapper) wrappers.append(HideTaskLabelsWrapper) return wrappers def _get_objective_scaling_factor(self) -> float: """Return the factor to be multiplied with the mean reward per episode in order to produce a 'performance score' between 0 and 1. Returns ------- float The scaling factor to use. """ # TODO: remove this, currently used just so we can get a 'scaling factor' to use # to scale the 'mean reward per episode' to a score between 0 and 1. # TODO: Add other environments, for instance 1/200 for cartpole. # TODO: Rework this so its based on the reward threshold! max_reward_per_episode = 1 if isinstance(self.dataset, str) and self.dataset.startswith("MetaMonsterKong"): max_reward_per_episode = 100 elif isinstance(self.dataset, str) and self.dataset == "CartPole-v0": max_reward_per_episode = 200 else: warnings.warn( RuntimeWarning( f"Unable to determine the right scaling factor to use for dataset " f"{self.dataset} when calculating the performance score! " f"The CL Score of this run will most probably not be accurate." ) ) return 1 / max_reward_per_episode def _get_simple_name(self, env_name_or_id: str) -> Optional[str]: """Returns the 'simple name' for the given environment ID. For example, when passed "CartPole-v0", returns "cartpole". When not found, returns None. """ if env_name_or_id in self.available_datasets.keys(): return env_name_or_id if env_name_or_id in self.available_datasets.values(): simple_name: str = [ k for k, v in self.available_datasets.items() if v == env_name_or_id ][0] return simple_name return None def _load_task_schedule(file_path: Path) -> Dict[int, Dict]: """Load a task schedule from the given path.""" with open(file_path) as f: task_schedule = json.load(f) return {int(k): task_schedule[k] for k in sorted(task_schedule.keys())} if __name__ == "__main__": ContinualRLSetting.main() def find_matching_dataset( available_datasets: Dict[str, Union[str, Any]], dataset: str ) -> Optional[Union[str, Any]]: """Compares `dataset` with the keys in the `available_datasets` dict and return the value of the matching key if found, else returns None. """ if dataset in available_datasets: return available_datasets[dataset] if not isinstance(dataset, str): raise NotImplementedError(dataset) chosen_env_name, _, chosen_version = dataset.partition("-v") for key, env_id in available_datasets.items(): if dataset == key: assert False, "this should be reached, since we do that check above" env_name, _, env_version = key.partition("-v") if chosen_version: # chosen: half_cheetah # key: HalfCheetah-v2 # HalfCheetah-v2 # halfcheetah-v2 # half_cheetah_v2 if chosen_version != env_version: continue if names_match(chosen_env_name, env_name): return env_id elif names_match(chosen_env_name, env_name): # Look for matching entries with that name, and select the highest # available version. datasets_with_that_name = { other_key: other_env_id for other_key, other_env_id in available_datasets.items() if names_match(chosen_env_name, other_key.partition("-v")[0]) } if len(datasets_with_that_name) == 1: return env_id versions = { other_key: int(other_key.partition("-v")[-1]) for other_key in datasets_with_that_name } return max(datasets_with_that_name, key=versions.get) closest_matches = difflib.get_close_matches(dataset, available_datasets) if closest_matches: closest_match_key: str = closest_matches[0] closest_match: Union[str, Any] = available_datasets[closest_match_key] if chosen_version: # Find the 'version' number of the closest match, and check that it fits. closest_match_version = closest_match_key.partition("-v")[-1] if not closest_match_version: assert isinstance(closest_match, str) closest_match_version = closest_match.partition("-v")[-1] if chosen_version == closest_match_version: return closest_match raise gym.error.UnregisteredEnv( f"Can't find any matching entries for chosen dataset {dataset} " f"with that same version (closest entries: {closest_matches}) " ) warnings.warn( RuntimeWarning( f"Can't find matching entry for chosen dataset {dataset}, using " f"closest match: {closest_match}" ) ) return closest_match # raise RuntimeError(f"Can't find any matching entries for chosen dataset {dataset}. " # f"Closest entries: {closest_matches}") raise gym.error.UnregisteredEnv( f"Can't find any matching entries for chosen dataset {dataset}." ) # assert False, (dataset, closest_matches) ================================================ FILE: sequoia/settings/rl/continual/setting_test.py ================================================ import dataclasses from dataclasses import asdict, is_dataclass, replace from functools import partial, singledispatch from pathlib import Path from typing import Any, Callable, Union, ClassVar, Dict, List, Optional, Sequence, Type import typing import gym import matplotlib.pyplot as plt import numpy as np import pytest from gym import spaces from gym.vector.utils import batch_space from sequoia.common.config import Config from sequoia.common.spaces import TypedDictSpace from sequoia.common.spaces.sparse import Sparse from sequoia.conftest import ( MUJOCO_INSTALLED, mujoco_required, param_requires_monsterkong, param_requires_mujoco, ) from sequoia.settings.assumptions.incremental_test import DummyMethod as _DummyMethod from sequoia.settings.base.setting_test import SettingTests from sequoia.settings.rl.incremental.setting import IncrementalRLSetting from sequoia.settings.rl.setting_test import DummyMethod from sequoia.utils.utils import pairwise, take from sequoia.settings.base import Setting from .setting import ContinualRLSetting @pytest.mark.parametrize( "dataset", [ "CartPole-v8", "Breakout-v9", param_requires_mujoco("Ant-v0"), param_requires_monsterkong("MetaMonsterKong-v0"), ], ) def test_passing_unsupported_dataset_raises_error(dataset: Any): with pytest.raises((gym.error.Error, NotImplementedError)): _ = ContinualRLSetting(dataset=dataset) def test_acrobot_attributes_change_over_time(): from sequoia.settings.rl.setting_test import CheckAttributesWrapper from sequoia.settings.rl.wrappers import MeasureRLPerformanceWrapper from sequoia.settings.rl.continual.environment import GymDataLoader from sequoia.common.gym_wrappers.env_dataset import EnvDataset from sequoia.settings.rl.wrappers import TypedObjectsWrapper from sequoia.common.gym_wrappers.action_limit import ActionLimit from sequoia.settings.rl.wrappers import HideTaskLabelsWrapper from sequoia.common.gym_wrappers.smooth_environment import SmoothTransitions task_schedule = { 0: { "LINK_LENGTH_1": 1.0, "LINK_LENGTH_2": 1.0, "LINK_MASS_1": 1.0, "LINK_MASS_2": 1.0, "LINK_COM_POS_1": 0.5, "LINK_COM_POS_2": 0.5, "LINK_MOI": 1.0, }, 100: { "LINK_LENGTH_1": 1.077662352662672, "LINK_LENGTH_2": 1.0029158956681965, "LINK_MASS_1": 1.284506509206828, "LINK_MASS_2": 1.3452415995540132, "LINK_COM_POS_1": 0.3838164987591757, "LINK_COM_POS_2": 0.6022014573018389, "LINK_MOI": 0.866228909018773, }, 200: { "LINK_LENGTH_1": 0.9787461324812216, "LINK_LENGTH_2": 1.1761685623559348, "LINK_MASS_1": 1.0598898754474704, "LINK_MASS_2": 1.1760598598046939, "LINK_COM_POS_1": 0.4523967193123413, "LINK_COM_POS_2": 0.4100516516032442, "LINK_MOI": 1.010250702300972, }, } from .objects import Observations attributes = list(task_schedule[0].keys()) assert Observations is ContinualRLSetting.Observations max_steps = 200 max_episode_steps = 10 # List of w wrapper_fns = [] from gym.envs.classic_control.acrobot import AcrobotEnv from gym.wrappers import TimeLimit base_env: AcrobotEnv = gym.make("Acrobot-v1") # type: ignore base_env = AcrobotEnv() base_env = TimeLimit(base_env, max_episode_steps=max_episode_steps) env = wrap( base_env, lambda env: SmoothTransitions( env, task_schedule=task_schedule, add_task_id_to_obs=True, only_update_on_episode_end=False, ), HideTaskLabelsWrapper, lambda env: ActionLimit(env, max_steps=10_000), lambda env: TypedObjectsWrapper( env, observations_type=ContinualRLSetting.Observations, # observation_space=TypedDictSpace(x:Box([ -1. -1. -1. -1. -12.566371 -28.274334], [ 1. 1. 1. ...one:Sparse(Box(False, True, (), bool), sparsity=1), dtype=) observation_space=TypedDictSpace( x=spaces.Box( np.asfarray([-1.0, -1.0, -1.0, -1.0, -12.566371, -28.274334]), np.asfarray([1.0, 1.0, 1.0, 1.0, 12.566371, 28.274334]), (6,), np.float32, ), task_labels=Sparse(spaces.Box(0.0, 1.0, (), np.float32), sparsity=1), done=Sparse(spaces.Box(False, True, (), bool), sparsity=1), dtype=Observations, ), action_space=spaces.Discrete(3), actions_type=ContinualRLSetting.Actions, rewards_type=ContinualRLSetting.Rewards, reward_space=spaces.Box(-np.inf, np.inf, (), np.float32), ), EnvDataset, GymDataLoader, MeasureRLPerformanceWrapper, lambda env: CheckAttributesWrapper(env, attributes=attributes), ) import itertools env.seed(123) episodes = max_steps // max_episode_steps done = False total_steps = 0 for episode in range(episodes): obs = env.reset() done = False step: int = 0 for step in itertools.count(): action = env.action_space.sample() obs, reward, done, info = env.step(action) total_steps += 1 link_length_1 = env.LINK_LENGTH_1 if done: break current_values = env.values[max(env.values)] # assert current_values == env.current_task # NOTE: A bit too fine-grained. This is slightly different. print( f"End of episode {episode} at step {total_steps} (lasted {step} steps): \n\t{current_values}" ) values_at_each_step = env.values for attribute in attributes: train_values: List[float] = [ values_dict[attribute] for step, values_dict in values_at_each_step.items() ] # We store the values before and after each step, so it's fine if they are the same at that last # step. assert train_values[0] == train_values[1] assert len(train_values) == len(set(train_values)) + 1 from typing import TypeVar E = TypeVar("E", bound=gym.Env) W = TypeVar("W", bound=gym.Wrapper) def wrap( env: E, *wrapper_fns: Union[Type[W], Callable[[Union[E, W]], W]] ) -> Union[E, W, Union[W, E]]: """Wraps the environment `env` with the provided wrapper types or wrapper functions. The wrapper functions are applied in order to `env`, meaning the first item is the innermost wrapper, and the last item in `wrapper_fns` is the outermost wrapper. Parameters ---------- env : E [description] Returns ------- Union[W, E] [description] """ wrapped_env: Union[W, E] = env for wrapper_fn in wrapper_fns: wrapped_env = wrapper_fn(wrapped_env) if typing.TYPE_CHECKING: assert isinstance(wrapped_env, (E, W)) return wrapped_env def wrap_reversed( env: E, *wrapper_fns: Union[Type[W], Callable[[Union[E, W]], W]] ) -> Union[E, W, Union[W, E]]: return wrap(env, *reversed(wrapper_fns)) @singledispatch def _equal(a: Any, b: Any) -> bool: """Utility function used to check if two thing are equal. NOTE: This is only really useful/necessary because `functools.partial` objects can be present as attributes on the setting, usually either in the task schedule (or in the [train/val/test]_envs for the IncrementalRLSetting subclasses). The `functools.partial` class doesn't support equality: two partial objects with the same funcs, args and kwargs are still not considered equal for some reason. This function has a special handler for `partial` objects, so that they are considered equal if and only if their funcs, args and keywords are the same. This makes it possible to easily check for equality between settings, which is used for example in the tests below. """ if is_dataclass(a): return is_dataclass(b) and _equal(asdict(a), asdict(b)) return a == b @_equal.register def _partials_equal(a: partial, b: partial) -> bool: # NOTE: Using the recursive call so we can compare nested partials. return ( isinstance(b, partial) and _equal(a.func, b.func) and _equal(a.args, b.args) and _equal(a.keywords, b.keywords) ) # NOTE: Need to also register handlers for list and dict, since they might have partials as # items. @_equal.register(list) def _lists_equal(a: List, b: List) -> bool: return len(a) == len(b) and all(_equal(v_a, v_b) for v_a, v_b in zip(a, b)) @_equal.register(dict) def _dicts_equal(a: Dict, b: Dict) -> bool: if a.keys() != b.keys(): return False for k in a: v_a, v_b = a[k], b[k] if not _equal(v_a, v_b): print(f"Values differ at key {k}: {v_a}, {v_b}") return False return True def all_different_from_next(sequence: Sequence) -> bool: """Returns True if each value in the sequence is different from the next.""" return not any(_equal(v, next_v) for v, next_v in pairwise(sequence)) class TestContinualRLSetting(SettingTests): Setting: ClassVar[Type[Setting]] = ContinualRLSetting dataset: pytest.fixture @pytest.fixture() def setting_kwargs(self, dataset: str, config: Config): """Fixture used to pass keyword arguments when creating a Setting.""" return {"dataset": dataset, "config": config} def test_passing_supported_dataset(self, setting_kwargs: Dict): setting = self.Setting(**setting_kwargs) assert setting.train_task_schedule assert setting.val_task_schedule assert setting.test_task_schedule # Passing the dataset created a task schedule. assert all(setting.train_task_schedule.values()), "Should have non-empty tasks." assert all(setting.val_task_schedule.values()), "Should have non-empty tasks." assert all(setting.test_task_schedule.values()), "Should have non-empty tasks." @pytest.mark.parametrize("seed", [123, 456]) def test_task_schedule_is_reproducible(self, dataset: str, seed: Optional[int]): setting_a = self.Setting(dataset=dataset, config=Config(seed=seed)) setting_b = self.Setting(dataset=dataset, config=Config(seed=seed)) assert setting_a.train_task_schedule == setting_b.train_task_schedule assert setting_a.val_task_schedule == setting_b.val_task_schedule assert setting_a.test_task_schedule == setting_b.test_task_schedule @pytest.mark.xfail( reason="Reworking/removing this mechanism, makes things a bit too complicated." ) def test_using_deprecated_fields(self): # BUG: It's tough to get this to raise a warning, because it's happening # inside the constructor in the dataclasses.py file, so we have to mess with # descriptors etc, which isn't great. # with pytest.raises(DeprecationWarning): # setting = self.Setting(nb_tasks=5, max_steps=123) setting = self.Setting(nb_tasks=5, max_steps=123) assert setting.train_max_steps == 123 with pytest.warns(DeprecationWarning): setting.max_steps = 456 assert setting.train_max_steps == 456 with pytest.warns(DeprecationWarning): setting = self.Setting(nb_tasks=5, test_max_steps=123) assert setting.test_max_steps == 123 with pytest.warns(DeprecationWarning): setting.test_steps = 456 assert setting.test_max_steps == 456 def test_tasks_are_different(self, setting_kwargs: Dict[str, Any], config: Config): """Check that the tasks different from the next.""" config = setting_kwargs.pop("config", config) assert config.seed is not None setting = self.Setting(**setting_kwargs, config=config) # Check that each task is different from the next. assert all_different_from_next(setting.train_task_schedule.values()) assert all_different_from_next(setting.val_task_schedule.values()) assert all_different_from_next(setting.test_task_schedule.values()) def test_settings_attributes_are_the_same_for_given_seed( self, setting_kwargs: Dict[str, Any], config: Config ): """Make sure that the settings' attributes are the same if passed the same seed.""" # Make sure that there is a random seed set, otherwise use the one present in `config`. config: Config = setting_kwargs.pop("config", config) assert config.seed is not None setting_1 = self.Setting(**setting_kwargs, config=config) # Uses the same config and seed, and check that the attributes of the two settings are # identical. setting_2 = self.Setting(**setting_kwargs, config=config) # Check that the settings have the same attributes. assert _equal(dataclasses.asdict(setting_1), dataclasses.asdict(setting_2)) # These next lines are redundant, but just to be clear: assert setting_1.train_task_schedule == setting_2.train_task_schedule assert setting_1.val_task_schedule == setting_2.val_task_schedule assert setting_1.test_task_schedule == setting_2.test_task_schedule def test_tasks_are_different_when_seed_is_different( self, setting_kwargs: Dict[str, Any], config: Config ): # Create another setting with a different seed, and check that at least the generated tasks # are different. config = setting_kwargs.pop("config", config) assert config.seed is not None setting_1 = self.Setting(**setting_kwargs, config=config) assert setting_1.train_task_schedule different_seed = config.seed + 123 setting_3 = self.Setting(**setting_kwargs, config=replace(config, seed=different_seed)) setting_1_dict = dataclasses.asdict(setting_1) setting_3_dict = dataclasses.asdict(setting_3) # Remove the seeds, which are obviously different, and then check that the dicts from the # two settings are still different. assert setting_1_dict["config"].pop("seed") == config.seed assert setting_3_dict["config"].pop("seed") == different_seed if "LPG-FTW" in setting_1.dataset: # NOTE: The rest of the setting's attributes might be identical (they currently are, but # this could change), so skipping these datasets seems like the right thing to do. pytest.skip("LPG-FTW datasets always create the same tasks, no matter the seed.") assert not _equal(setting_1_dict, setting_3_dict) # Additionally, explicitly check that either the train schedule or the train envs are # different, since the check above could have passed due to some other attribute being # different between the two settings. if isinstance(setting_1, IncrementalRLSetting) and setting_1.train_envs: assert isinstance(setting_3, IncrementalRLSetting) # Using custom envs for each task. assert not _equal(setting_1.train_envs, setting_3.train_envs) assert not _equal(setting_1.val_envs, setting_3.val_envs) assert not _equal(setting_1.test_envs, setting_3.test_envs) else: # Using a single env with a task schedule. assert not _equal(setting_1.train_task_schedule, setting_3.train_task_schedule) assert not _equal(setting_1.val_task_schedule, setting_3.val_task_schedule) assert not _equal(setting_1.test_task_schedule, setting_3.test_task_schedule) def test_env_attributes_change(self, setting_kwargs: Dict[str, Any], config: Config): """Check that the values of the given attributes do change at each step during training. """ setting_kwargs.setdefault("nb_tasks", 2) setting_kwargs.setdefault("train_max_steps", 1000) setting_kwargs.setdefault("max_episode_steps", 50) setting_kwargs.setdefault("test_max_steps", 1000) setting = self.Setting(**setting_kwargs) assert setting.train_task_schedule # NOTE: Have to check for `setting.train_envs` because in that case the task schedule won't # be used. from sequoia.settings.rl.incremental.setting import IncrementalRLSetting if isinstance(setting, IncrementalRLSetting) and setting._using_custom_envs_foreach_task: # It would be pretty hard to check for the "task values" in this case, because the # custom envs for each task might not be just the same env type but with different # attributes! pytest.skip("Using custom envs for each task instead of a task schedule.") assert all(setting.train_task_schedule.values()) assert setting.nb_tasks == setting_kwargs["nb_tasks"] assert setting.train_steps_per_task == setting_kwargs["train_max_steps"] // setting.nb_tasks assert setting.train_max_steps == setting_kwargs["train_max_steps"] attributes = set().union(*[task.keys() for task in setting.train_task_schedule.values()]) method = DummyMethod() results = setting.apply(method, config=config) assert results self.validate_results(setting, method, results) # TODO: Need to limit the episodes per step in MonsterKong. # In MonsterKong, we might have 0 reward, since this might not even # constitute a full episode. # assert results.objective for attribute in attributes: train_values: List[float] = [ values[attribute] for values_dict in method.all_train_values for step, values in values_dict.items() ] assert train_values task_schedule_values: List[float] = { step: task[attribute] for step, task in setting.train_task_schedule.items() } self.validate_env_value_changes( setting=setting, attribute=attribute, task_schedule_for_attr=task_schedule_values, train_values=train_values, ) @staticmethod def validate_env_value_changes( setting: ContinualRLSetting, attribute: str, task_schedule_for_attr: Dict[str, float], train_values: List[float], ): """Given an attribute name, and the values of that attribute in the task schedule, check that the actual values for that attribute encountered during training make sense, based on the type of non-stationarity present in this Setting. """ assert len(set(task_schedule_for_attr.values())) == setting.nb_tasks + 1, ( f"Task schedule should have had {setting.nb_tasks + 1} distinct values for " f"attribute {attribute}: {task_schedule_for_attr}" ) if setting.smooth_task_boundaries: # Should have one (unique) value for the attribute at each step during training # This is the truth condition for the ContinualRLSetting. # NOTE: There's an offset by 1 here because of when the env is closed. # NOTE: This test won't really work with integer values, but that doesn't matter # right now because we don't/won't support changing the values of integer # parameters in this "continuous" task setting. assert len(set(train_values)) == setting.train_max_steps, ( f"Should have encountered {setting.train_max_steps} distinct values " f"for attribute {attribute}: during training!" ) else: from ..discrete.setting import DiscreteTaskAgnosticRLSetting setting: DiscreteTaskAgnosticRLSetting train_tasks = setting.nb_tasks unique_attribute_values = set(train_values) assert setting.train_task_schedule.keys() == task_schedule_for_attr.keys() for k, v in task_schedule_for_attr.items(): task_dict = setting.train_task_schedule[k] assert attribute in task_dict assert task_dict[attribute] == v assert len(unique_attribute_values) == train_tasks, ( type(setting), attribute, unique_attribute_values, task_schedule_for_attr, setting.nb_tasks, ) def validate_results( self, setting: ContinualRLSetting, method: DummyMethod, results: ContinualRLSetting.Results, ) -> None: assert results assert results.objective assert method.n_task_switches == 0 assert method.n_fit_calls == 1 assert not method.received_task_ids assert not method.received_while_training @pytest.mark.parametrize( "batch_size", [None, 1, 3], ) @pytest.mark.timeout(60) def test_check_iterate_and_step( self, setting_kwargs: Dict[str, Any], batch_size: Optional[int], ): """Test that the observations are of the right type and shape, regardless of wether we iterate on the env by calling 'step' or by using it as a DataLoader. """ setting_kwargs.setdefault("num_workers", 0) dataset: str = setting_kwargs["dataset"] from gym.envs.registration import registry if dataset in registry.env_specs: with gym.make(dataset) as temp_env: expected_x_space = temp_env.observation_space expected_action_space = temp_env.action_space else: # NOTE: Not ideal: Have to create a setting just to get the observation space temp_setting = self.Setting(**setting_kwargs) # NOTE: Using the test dataloader so the task labels space is a Sparse(Discrete(n)) in # the worst case, and so all observations (None or integers) are valid samples. with temp_setting.test_dataloader() as temp_env: # e = temp_env # while e.unwrapped is not e: # print(f"Wrapper of type {type(e)} has obs space of {e.observation_space}") # e = e.env # print(f"Unwrapped obs space is {e.observation_space}") # assert False, temp_env expected_x_space = temp_env.observation_space.x expected_action_space = temp_env.action_space del temp_setting setting = self.Setting(**setting_kwargs) if batch_size is not None: expected_batched_x_space = batch_space(expected_x_space, batch_size) expected_batched_action_space = batch_space(setting.action_space, batch_size) else: expected_batched_x_space = expected_x_space expected_batched_action_space = expected_action_space assert setting.observation_space.x == expected_x_space assert setting.action_space == expected_action_space # TODO: This is changing: assert setting.train_transforms == [] # assert setting.train_transforms == [Transforms.to_tensor, Transforms.three_channels] def check_env_spaces(env: gym.Env) -> None: if env.batch_size is not None: # TODO: This might not be totally accurate, for example because the # TransformObservation wrapper applied to a VectorEnv doesn't change the # single_observation_space, AFAIR. assert env.single_observation_space.x == expected_x_space assert env.single_action_space == expected_action_space assert isinstance(env.observation_space, TypedDictSpace), ( env, env.observation_space, ) assert env.observation_space.x == expected_batched_x_space assert env.action_space == expected_batched_action_space else: assert env.observation_space.x == expected_x_space assert env.action_space == expected_action_space # FIXME: Move this to an instance method on the test class so that subclasses # can change stuff in it. def check_obs(obs: ContinualRLSetting.Observations) -> None: if isinstance(self.Setting, partial): # NOTE: This Happens when we sneakily switch out the self.Setting # attribute in other tests (for the SettingProxy for example). assert isinstance(obs, self.Setting.args[0].Observations) else: assert isinstance(obs, self.Setting.Observations) assert obs.x in expected_batched_x_space # In this particular case here, the task labels should be None. # FIXME: For InrementalRL, this isn't correct! TestIncrementalRL should # therefore have its own version of this function. if self.Setting is ContinualRLSetting: assert obs.task_labels is None or all( task_label == None for task_label in obs.task_labels ) with setting.train_dataloader(batch_size=batch_size, num_workers=0) as env: assert env.batch_size == batch_size check_env_spaces(env) # BUG: The dataset's observation space has task_labels as a Discrete, but the task # labels are None. setting: ContinualRLSetting if setting.task_labels_at_train_time: if batch_size is not None: assert isinstance(env.observation_space.task_labels, spaces.MultiDiscrete) else: assert isinstance(env.observation_space.task_labels, spaces.Discrete) elif setting.known_task_boundaries_at_train_time: assert isinstance(env.observation_space.task_labels, Sparse) obs = env.reset() # BUG: TODO: The observation space that we use should actually check with # isinstance and over the fields that fit in the space. Here there is a bug # because the env observations also have a `done` field, while the space # doesnt. # assert obs in env.observation_space assert obs.x in env.observation_space.x # this works though. # BUG: This doesn't currently work: (would need a tuple value rather than an # array. # assert obs.task_labels in env.observation_space.task_labels assert obs.task_labels in env.observation_space.task_labels if batch_size: assert obs.x[0] in setting.observation_space.x assert ( obs.task_labels is None or obs.task_labels[0] in setting.observation_space.task_labels ) else: assert obs in setting.observation_space reset_obs = env.reset() check_obs(reset_obs) # BUG: Environment is closed? (batch_size = 3, dataset = 'CartPole-v0') step_obs, *_ = env.step(env.action_space.sample()) check_obs(step_obs) for iter_obs in take(env, 3): check_obs(iter_obs) _ = env.send(env.action_space.sample()) with setting.val_dataloader(batch_size=batch_size, num_workers=0) as env: assert env.batch_size == batch_size check_env_spaces(env) reset_obs = env.reset() check_obs(reset_obs) step_obs, *_ = env.step(env.action_space.sample()) check_obs(step_obs) for iter_obs in take(env, 3): check_obs(iter_obs) _ = env.send(env.action_space.sample()) # NOTE: Limitting the batch size at test time to None (i.e. a single env) # because of how the Monitor class works atm. batch_size = None expected_batched_x_space = expected_x_space expected_batched_action_space = expected_action_space # NOTE: Need to make sure that the 'directory' passed to the Monitor # wrapper is a temp dir. Should be the case, but just checking. assert setting.config.log_dir != Path("results") with setting.test_dataloader(batch_size=batch_size, num_workers=0) as env: assert env.batch_size is None check_env_spaces(env) reset_obs = env.reset() check_obs(reset_obs) step_obs, *_ = env.step(env.action_space.sample()) check_obs(step_obs) # NOTE: Can't do this here, unless the episode is over, because the Monitor # doesn't want us to end an episode early! # for iter_obs in take(env, 3): # check_obs(iter_obs) # _ = env.send(env.action_space.sample()) with setting.test_dataloader(batch_size=batch_size) as env: assert not env.is_closed() # NOTE: Can't do this here, unless the episode is over, because the Monitor # doesn't want us to end an episode early! for iter_obs in take(env, 3): check_obs(iter_obs) _ = env.send(env.action_space.sample()) @pytest.mark.no_xvfb @pytest.mark.timeout(20) @pytest.mark.skipif( (not Path("temp").exists()), reason="Need temp dir for saving the figure this test creates.", ) @mujoco_required def test_show_distributions(self, config: Config): setting = self.Setting( dataset="half_cheetah", max_steps=1_000, max_episode_steps=100, config=config, ) fig, axes = plt.subplots(2, 3) name_to_env_fn = { "train": setting.train_dataloader, "valid": setting.val_dataloader, "test": setting.test_dataloader, } for i, (name, env_fn) in enumerate(name_to_env_fn.items()): env = env_fn(batch_size=None, num_workers=None) gravities: List[float] = [] task_labels: List[Optional[int]] = [] total_steps = 0 while not env.is_closed(): obs = env.reset() done = False steps_in_episode = 0 while not done: t = obs.task_labels obs, reward, done, info = env.step(env.action_space.sample()) total_steps += 1 steps_in_episode += 1 y = reward.y gravities.append(env.gravity) print(total_steps, env.gravity) if total_steps > 100: assert env.gravity != -9.81 task_labels.append(t) x = np.arange(len(gravities)) axes[0, i].plot(x, gravities, label="gravities") axes[0, i].legend() axes[0, i].set_title(f"{name} gravities") axes[0, i].set_xlabel("Step index") axes[0, i].set_ylabel("Value") # for task_id in task_ids: # y = [t_counter.get(task_id) for t_counter in t_counters] # axes[1, i].plot(x, y, label=f"task_id={task_id}") # axes[1, i].legend() # axes[1, i].set_title(f"{name} task_id") # axes[1, i].set_xlabel("Batch index") # axes[1, i].set_ylabel("Count in batch") plt.legend() Path("temp").mkdir(exist_ok=True) fig.set_size_inches((6, 4), forward=False) plt.savefig(f"temp/{self.Setting.__name__}.png") # plt.waitforbuttonpress(10) # plt.show() # @pytest.mark.xfail(reason="TODO: pl_bolts DQN only accepts string environment names..") # def test_dqn_on_env(tmp_path: Path): # """ TODO: Would be nice if we could have the models work directly on the # gym envs.. # """ # from pl_bolts.models.rl import DQN # from pytorch_lightning import Trainer # setting = ContinualRLSetting() # env = setting.train_dataloader(batch_size=None) # model = DQN(env) # trainer = Trainer(fast_dev_run=True, default_root_dir=tmp_path) # success = trainer.fit(model) # assert success == 1 def test_passing_task_schedule_sets_other_attributes_correctly(): # TODO: Figure out a way to test that the tasks are switching over time. setting = ContinualRLSetting( dataset="CartPole-v0", train_task_schedule={ 0: {"gravity": 5.0}, 100: {"gravity": 10.0}, 200: {"gravity": 20.0}, }, test_max_steps=10_000, ) assert setting.phases == 1 assert setting.nb_tasks == 2 # assert setting.steps_per_task == 100 assert setting.test_task_schedule == { 0: {"gravity": 5.0}, 5_000: {"gravity": 10.0}, 10_000: {"gravity": 20.0}, } assert setting.test_max_steps == 10_000 # assert setting.test_steps_per_task == 5_000 setting = ContinualRLSetting( dataset="CartPole-v0", train_task_schedule={ 0: {"gravity": 5.0}, 100: {"gravity": 10.0}, 200: {"gravity": 20.0}, }, test_max_steps=2000, # test_steps_per_task=100, ) assert setting.phases == 1 # assert setting.nb_tasks == 2 # assert setting.steps_per_task == 100 assert setting.test_task_schedule == { 0: {"gravity": 5.0}, 1000: {"gravity": 10.0}, 2000: {"gravity": 20.0}, } assert setting.test_max_steps == 2000 # assert setting.test_steps_per_task == 100 def test_fit_and_on_task_switch_calls(): setting = ContinualRLSetting( dataset="CartPole-v0", # nb_tasks=5, # train_steps_per_task=100, train_max_steps=500, test_max_steps=500, # test_steps_per_task=100, train_transforms=[], test_transforms=[], val_transforms=[], ) method = _DummyMethod() _ = setting.apply(method) # == 30 task switches in total. if MUJOCO_INSTALLED: from sequoia.settings.rl.envs.mujoco import ( ContinualHalfCheetahEnv, ContinualHalfCheetahV2Env, ContinualHalfCheetahV3Env, ContinualHopperEnv, ContinualHopperV2Env, ContinualHopperV3Env, ContinualWalker2dV2Env, ContinualWalker2dV3Env, ) @mujoco_required @pytest.mark.parametrize( "dataset, expected_env_type", [ ("half_cheetah", ContinualHalfCheetahEnv), ("halfcheetah", ContinualHalfCheetahEnv), ("HalfCheetah-v2", ContinualHalfCheetahV2Env), ("HalfCheetah-v3", ContinualHalfCheetahV3Env), ("ContinualHalfCheetah-v2", ContinualHalfCheetahV2Env), ("ContinualHalfCheetah-v3", ContinualHalfCheetahV3Env), ("ContinualHopper-v2", ContinualHopperEnv), ("hopper", ContinualHopperEnv), ("Hopper-v2", ContinualHopperV2Env), ("Hopper-v3", ContinualHopperV3Env), ("walker2d", ContinualWalker2dV3Env), ("Walker2d-v2", ContinualWalker2dV2Env), ("Walker2d-v3", ContinualWalker2dV3Env), ("ContinualWalker2d-v2", ContinualWalker2dV2Env), ("ContinualWalker2d-v3", ContinualWalker2dV3Env), ], ) def test_mujoco_env_name_maps_to_continual_variant( dataset: str, expected_env_type: Type[gym.Env] ): setting = ContinualRLSetting(dataset=dataset, train_max_steps=10_000, test_max_steps=10_000) train_env = setting.train_dataloader() assert isinstance(train_env.unwrapped, expected_env_type) ================================================ FILE: sequoia/settings/rl/continual/tasks.py ================================================ """ Handlers for creating tasks in different environments. TODO: Add more envs: - [ ] PyBullet! - [ ] Box2d! - [ ] ProcGen! - [ ] dm_control! from gym.envs.box2d import BipedalWalker, BipedalWalkerHardcore """ import difflib import inspect import warnings from functools import partial, singledispatch from typing import Any, Callable, Dict, List, Type, TypeVar, Union import gym import numpy as np from gym.envs.classic_control import ( AcrobotEnv, CartPoleEnv, Continuous_MountainCarEnv, MountainCarEnv, PendulumEnv, ) from gym.envs.registration import EnvRegistry, EnvSpec, load, registry from sequoia.common.gym_wrappers.multi_task_environment import make_env_attributes_task from sequoia.settings.rl.envs import MUJOCO_INSTALLED, sequoia_registry from sequoia.utils.utils import camel_case # Idea: Create a true 'Task' class? Task = Any ContinuousTask = Dict[str, float] TaskType = TypeVar("TaskType", bound=ContinuousTask) # TODO: Create a fancier class for the TaskSchedule, as described in the test file. # IDEA: Have the Task Schedule be a 'list' of Task objects, each of which has a # 'duration' parameter, which are accumulated to create the 'keys' of the task schedule! # TaskSchedule = Dict[int, TaskType] class TaskSchedule(Dict[int, TaskType]): pass class EnvironmentNotSupportedError(gym.error.UnregisteredEnv): """Error raised when we don't know how to create a task for the given environment.""" def names_match(name_a: str, name_b: str) -> bool: a_variants = (name_a, name_a.lower(), camel_case(name_a)) b_variants = (name_b, name_b.lower(), camel_case(name_b)) # TODO: Not sure about this 'endswith' stuff, e.g. with MountainCarContinuous vs MountainCar? return ( name_a in b_variants or name_b in a_variants ) # or name_a.endswith(b_variants) or name_b.endswith(a_variants) def _is_supported( env_id: str, _make_task_function: Callable[..., ContinuousTask], env_registry: EnvRegistry = registry, ) -> bool: """Returns wether Sequoia is able to create (continuous) tasks for the given environment. WIP: It is better not to use this directly, and instead use the equivalent `is_supported` function which is created dynamically below. """ def _has_handler(some_env_type: Type[gym.Env]) -> bool: """Returns wether the "make task" function has a registered handler for the given envs. """ return some_env_type in _make_task_function.registry or ( not inspect.isfunction(some_env_type) and _make_task_function.dispatch(some_env_type) is not _make_task_function.dispatch(object) ) if isinstance(env_id, str): env_spec = env_registry.spec(env_id) elif isinstance(env_id, EnvSpec): env_spec = env_id env_id = env_spec.id elif inspect.isclass(env_id) and issubclass(env_id, gym.Env): env_type = env_id env_spec = None if _has_handler(env_type): return True env_id = env_type.__name__ class_name = env_type.__name__ else: raise NotImplementedError(env_id, type(env_id)) assert isinstance(env_id, str) if env_spec: assert isinstance(env_spec, EnvSpec) if callable(env_spec.entry_point): if _has_handler(env_spec.entry_point): return True class_name = env_spec.entry_point.__name__ else: assert isinstance(env_spec.entry_point, str) _module, _, class_name = env_spec.entry_point.partition(":") registered_class_names = tuple(c.__name__ for c in _make_task_function.registry) if class_name in registered_class_names: return True elif class_name.startswith(registered_class_names): return True close_matches = difflib.get_close_matches(class_name, registered_class_names) if not close_matches: return False return False def task_sampling_function( env_registry: EnvRegistry = registry, based_on: Callable[[gym.Env], TaskType] = None ) -> Callable[[gym.Env], TaskType]: """Decorator for a "make_task" function (e.g. `make_continuous_task`, `make_discrete_task`, etc.) that does the following: 1. Creates a singledispatch callable from the given function, if necessary; 2. Registers three useful handlers, for strings, environment types, and wrappers to the new function. 3. Adds a 'is_supported' function on that function (see NOTE below); 4. Adds all the registered handlers from the `based_on` function, if passed; NOTE (@lebrice): not sure about this is_supported being created and set on the function itself. It would probably be cleaner to create a class like TaskCreator or something that has the same methods as the underlying singledispatch callable. NOTE: A task sampling function should give back the same task when given the same seed, step and change_steps. """ def _wrapper(make_task_fn: Callable[[gym.Env], TaskType]) -> Callable[[gym.Env], TaskType]: if not hasattr(make_task_fn, "registry"): make_task_fn = singledispatch(make_task_fn) @make_task_fn.register(type) def make_discrete_task_from_type(env_type: Type[gym.Env], **kwargs) -> ContinuousTask: try: # Try to create a task without actually instantiating the env, by passing the # type of env as the 'env' argument, rather than an env instance. env_handler_function = make_task_fn.dispatch(env_type) return env_handler_function(env_type, **kwargs) except Exception as exc: raise RuntimeError( f"Unable to create a task based only on the env type {env_type}: {exc}\n" ) from exc @make_task_fn.register(str) def make_discrete_task_by_id( env: str, **kwargs, ) -> Union[Dict[str, Any], Any]: # Load the entry-point class, and use it to determine what handler to use. # TODO: Actually instantiate the env here? or just dispatch based on the env class? if env not in env_registry.env_specs: raise RuntimeError( f"Can't create a task for env id {env}, since it isn't a registered env id." ) env_spec: EnvSpec = env_registry.env_specs[env] env_entry_point: Callable[..., gym.Env] = load(env_spec.entry_point) # import inspect try: task: ContinuousTask = make_discrete_task_from_type(env_entry_point, **kwargs) return task except RuntimeError as exc: warnings.warn( RuntimeWarning( f"A temporary environment will have to be created in order to make a task: {exc}" ) ) with gym.make(env) as temp_env: # IDEA: Could avoid re-creating the env between calls to this function, for # instance by saving a single temp env in a global variable and overwriting # it if `env` is of a different type. return make_task_fn(temp_env, **kwargs) @make_task_fn.register def make_discrete_for_wrapped_env( env: gym.Wrapper, step: int, change_steps: List[int] = None, **kwargs, ) -> Union[Dict[str, Any], Any]: # NOTE: Not sure if this is totally a good idea... # If someone registers a handler for some kind of Wrapper, than all envs wrapped # with that wrapper will use that handler, instead of their base environment type. return make_task_fn(env.env, step=step, change_steps=change_steps, **kwargs) if based_on is not None: for registered_type, registered_handler in based_on.registry.items(): # NOTE: Skipping these types since we register new handlers above. Not # sure if it's necessary, since it might just overwrite an old handler # to register a new one for the same type? if registered_type not in [object, str, type, gym.Wrapper]: make_task_fn.register(registered_type, registered_handler) make_task_fn.is_supported = partial(_is_supported, _make_task_fn=make_task_fn) return make_task_fn return _wrapper @singledispatch def make_continuous_task( env: gym.Env, step: int, change_steps: List[int], seed: int = None, **kwargs, ) -> ContinuousTask: """Generic function used by Sequoia's RL settings to create a "task" that will be applied to an environment like `env`. To add support for a new type of environment, simply register a handler function: ``` @make_continuous_task.register(SomeGymEnvClass) def make_task_for_my_env(env: SomeGymEnvClass, step: int, change_steps: List[int], **kwargs,): return {"my_attribute": random.random()} ``` NOTE: In order to create tasks for an environment through its string 'id', and to avoid having to actually instantiate an environment, `env` could perhaps be a type of environment rather than an actual environment instance. If your function can't handle this (raises an exception somehow), then a temporary environment will be created, and a warning will be raised. TODO: remove / rename this 'change_steps' to 'max_steps' instead. """ raise NotImplementedError(f"Don't currently know how to create tasks for env {env}") make_continuous_task = task_sampling_function(env_registry=sequoia_registry)(make_continuous_task) is_supported = partial(_is_supported, _make_task_function=make_continuous_task) # from functools import _SingleDispatchCallable # Dictionary mapping from environment type to a dict of environment values which can be # modified with multiplicative gaussian noise. _ENV_TASK_ATTRIBUTES: Dict[Union[Type[gym.Env]], Dict[str, float]] = { CartPoleEnv: { "gravity": 9.8, "masscart": 1.0, "masspole": 0.1, "length": 0.5, "force_mag": 10.0, "tau": 0.02, }, PendulumEnv: { "max_speed": 8.0, "max_torque": 2.0, # "dt" = .05 "g": 10.0, "m": 1.0, "l": 1.0, }, MountainCarEnv: { "gravity": 0.0025, "goal_position": 0.45, # was 0.5 in gym, 0.45 in Arnaud de Broissia's version # BUG: Since we use multiplicative noise, this won't change over time. # "goal_velocity": 0, }, Continuous_MountainCarEnv: { "goal_position": 0.45, # was 0.5 in gym, 0.45 in Arnaud de Broissia's version # BUG: Since we use multiplicative noise, this won't change over time. # "goal_velocity": 0, }, # TODO: Test AcrobotEnv AcrobotEnv: { "LINK_LENGTH_1": 1.0, # [m] "LINK_LENGTH_2": 1.0, # [m] "LINK_MASS_1": 1.0, #: [kg] mass of link 1 "LINK_MASS_2": 1.0, #: [kg] mass of link 2 "LINK_COM_POS_1": 0.5, #: [m] position of the center of mass of link 1 "LINK_COM_POS_2": 0.5, #: [m] position of the center of mass of link 2 "LINK_MOI": 1.0, #: moments of inertia for both links }, # TODO: Add more of the classic control envs here. # TODO: Need to get the attributes to modify in each environment type and # add them here. # AtariEnv: [ # # TODO: Maybe have something like the difficulty as the CL 'task' ? # # difficulties = temp_env.ale.getAvailableDifficulties() # # "game_difficulty", # ], } @make_continuous_task.register(CartPoleEnv) @make_continuous_task.register(PendulumEnv) @make_continuous_task.register(MountainCarEnv) @make_continuous_task.register(Continuous_MountainCarEnv) @make_continuous_task.register(AcrobotEnv) def make_task_for_classic_control_env( env: gym.Env, step: int, change_steps: List[int] = None, task_params: Union[List[str], Dict[str, Any]] = None, seed: int = None, noise_std: float = 0.2, ): # NOTE: `step` doesn't matter here, all tasks are independant. task_params = task_params or _ENV_TASK_ATTRIBUTES[type(env.unwrapped)] if step == 0: # Use the 'default' task as the first task. return task_params.copy() # Make this more reproducible: When given the same seed and same step, return the # same task. if seed is not None: rng = np.random.default_rng(seed + step) else: rng = None # Default back to the 'env attributes' task, which multiplies the default values # with normally distributed scaling coefficients. # TODO: Need to refactor the whole MultiTaskEnv/SmoothTransition wrappers / tasks # etc. return make_env_attributes_task( env, task_params=task_params, rng=rng, noise_std=noise_std, ) # IDEA: Could probably not have these big ugly IF statements since we have the stubs for # the different mujoco env classes anyway. if MUJOCO_INSTALLED: from sequoia.settings.rl.envs.mujoco import ( ContinualHalfCheetahV2Env, ContinualHalfCheetahV3Env, ContinualHopperV2Env, ContinualHopperV3Env, ContinualWalker2dV2Env, ContinualWalker2dV3Env, ModifiedGravityEnv, ) default_mujoco_gravity = -9.81 @make_continuous_task.register(ContinualHopperV2Env) @make_continuous_task.register(ContinualHopperV3Env) @make_continuous_task.register(ContinualWalker2dV2Env) @make_continuous_task.register(ContinualWalker2dV3Env) @make_continuous_task.register(ContinualHalfCheetahV2Env) @make_continuous_task.register(ContinualHalfCheetahV3Env) def make_task_for_modified_gravity_env( env: ModifiedGravityEnv, step: int, change_steps: List[int], seed: int = None, **kwargs, ) -> Union[Dict[str, Any], Any]: step_seed = seed * step if seed is not None else None # NOTE: np.random.default_rng(None) will NOT give the same result every first # time it is called, so this won't cause any issues with the same gravity being # sampled for all tasks if `seed` is None. rng = np.random.default_rng(step_seed) if step == 0: coefficient = 1 else: coefficient = rng.uniform() + 0.5 # TODO: Do we want to start with normal gravity? gravity = coefficient * default_mujoco_gravity return {"gravity": gravity} ================================================ FILE: sequoia/settings/rl/continual/tasks_test.py ================================================ from typing import Type import pytest from sequoia.conftest import mujoco_required from sequoia.settings.rl.envs import ( ContinualHalfCheetahEnv, ContinualHalfCheetahV2Env, ContinualHalfCheetahV3Env, ContinualHopperEnv, ContinualWalker2dEnv, MujocoEnv, ) from .tasks import is_supported, make_continuous_task @mujoco_required @pytest.mark.parametrize( "env_type", [ ContinualHalfCheetahV2Env, ContinualHalfCheetahV3Env, ContinualHopperEnv, ContinualWalker2dEnv, ContinualHalfCheetahEnv, ], ) def test_mujoco_tasks(env_type: Type[MujocoEnv]): assert is_supported("HalfCheetah-v2") from gym.envs.mujoco import HalfCheetahEnv # We shouldn't mark the *original* envs as supported, rather, we should only mark # our variants as supported. assert not is_supported(HalfCheetahEnv) assert is_supported(env_type) task = make_continuous_task(env_type, step=0, change_steps=[0, 100, 200]) assert task == {"gravity": -9.81} task_a = make_continuous_task(env_type, step=100, change_steps=[0, 100, 200], seed=123) task_b = make_continuous_task(env_type, step=100, change_steps=[0, 100, 200], seed=123) task_c = make_continuous_task(env_type, step=100, change_steps=[0, 100, 200], seed=456) # NOTE: Not sure that this will always give exactly the same result, since idk how # seeding is dependant on the machine running the code. # assert task == {'gravity': -10.134188877055529} assert task_a == task_b assert task_a != task_c ================================================ FILE: sequoia/settings/rl/continual/test_environment.py ================================================ import itertools import math from typing import Dict from sequoia.common.metrics.rl_metrics import EpisodeMetrics from sequoia.settings.assumptions.continual import ContinualResults, TestEnvironment # TODO: Refactor those so they are based on the MeasureRLPerformanceWrapper, which works # with vectorized envs. class ContinualRLTestEnvironment(TestEnvironment): def __init__(self, *args, task_schedule: Dict, **kwargs): super().__init__(*args, **kwargs) self.task_schedule = task_schedule self.boundary_steps = [step // (self.batch_size or 1) for step in self.task_schedule.keys()] def __len__(self): return math.ceil(self.step_limit / (getattr(self.env, "batch_size", 1) or 1)) def get_results(self) -> ContinualResults[EpisodeMetrics]: # TODO: Place the metrics in the right 'bin' at the end of each episode during # testing depending on the task at that time, rather than what's happening here, # where we're getting all the rewards and episode lengths at the end and then # sort it out into the bins based on the task schedule. ALSO: this would make it # easier to support monitoring batched RL environments, since these `Monitor` # methods (get_episode_rewards, get_episode_lengths, etc) assume the environment # isn't batched. rewards = self.get_episode_rewards() lengths = self.get_episode_lengths() task_schedule: Dict[int, Dict] = self.task_schedule task_steps = sorted(task_schedule.keys()) assert 0 in task_steps test_results = ContinualResults() for step, episode_reward, episode_length in zip( itertools.accumulate(lengths), rewards, lengths ): # Given the step, find the task id. episode_metric = EpisodeMetrics( n_samples=1, mean_episode_reward=episode_reward, mean_episode_length=episode_length, ) test_results.metrics.append(episode_metric) return test_results def render(self, mode="human", **kwargs): # TODO: This might not be setup right. Need to check. image_batch = super().render(mode=mode, **kwargs) if mode == "rgb_array" and self.batch_size: return tile_images(image_batch) return image_batch def _after_reset(self, observation): # Is this going to work fine when the observations are batched though? return super()._after_reset(observation) ================================================ FILE: sequoia/settings/rl/discrete/__init__.py ================================================ from .setting import DiscreteTaskAgnosticRLSetting from .tasks import make_discrete_task ================================================ FILE: sequoia/settings/rl/discrete/multienv_wrappers.py ================================================ """ Wrappers that around multiple environments. These wrappers can be used to get different kinds of multi-task environments, or even to concatenate environments. """ from abc import ABC, abstractmethod from typing import Any, Callable, List, Optional, Sequence, Union import gym import numpy as np from gym import spaces from sequoia.common.gym_wrappers import IterableWrapper from sequoia.common.gym_wrappers.multi_task_environment import add_task_labels from sequoia.common.gym_wrappers.utils import MayCloseEarly from sequoia.utils.generic_functions import concatenate from sequoia.utils.logging_utils import get_logger logger = get_logger(__name__) def instantiate_env(env: Union[str, gym.Env, Callable[[], gym.Env]]) -> gym.Env: if isinstance(env, gym.Env): return env if isinstance(env, str): return gym.make(env) assert callable(env) return env() class MultiEnvWrapper(IterableWrapper, ABC): """TODO: Wrapper like that iterates over the envs. Could look a little bit like this: https://github.com/rlworkgroup/garage/blob/master/src/garage/envs/multi_env_wrapper.py """ def __init__(self, envs: List[gym.Env], add_task_ids: bool = False): self._envs = envs.copy() self._current_task_id = 0 self.nb_tasks = len(envs) self._envs_is_closed: Sequence[bool] = np.zeros([self.nb_tasks], dtype=bool) self._add_task_labels = add_task_ids self.rng: np.random.Generator = np.random.default_rng() self._instantiate_env(self._current_task_id) super().__init__(env=self._envs[self._current_task_id]) self.task_label_space = spaces.Discrete(self.nb_tasks) if self._add_task_labels: self.observation_space = add_task_labels( self.env.observation_space, self.task_label_space ) def _instantiate_env(self, index: int) -> None: self._envs[index] = instantiate_env(self._envs[index]) def set_task(self, task_id: int) -> None: if self.is_closed(env_index=None): raise gym.error.ClosedEnvironmentError( f"Can't call set_task on the env, since it's already closed." ) self._current_task_id = task_id # Use super().__init__() to reset the `self.env` attribute in gym.Wrapper. # TODO: This also resets the '_is_closed' on self. # TODO: This resets the 'observation_' and 'action_' etc objects that are saved # in the constructor of the 'IterableWrapper' self._instantiate_env(self._current_task_id) gym.Wrapper.__init__(self, env=self._envs[self._current_task_id]) if self._add_task_labels: self.observation_space = add_task_labels( self.env.observation_space, self.task_label_space ) @abstractmethod def next_task(self) -> int: pass def reset(self): if all(self._envs_is_closed): self.close() elif isinstance(self.env, MayCloseEarly) and self.env.is_closed(): self._envs_is_closed[self._current_task_id] = True self.set_task(self.next_task()) obs = super().reset() return self.observation(obs) def step(self, action): obs, rewards, done, info = super().step(action) obs = self.observation(obs) return obs, rewards, done, info def is_closed(self, env_index: int = None): """returns `True` if the environment at index `env_index` is closed, otherwise if `env_index` is None, returns `True` if `close()` was called on the wrapper. (todo: or if all envs are closed.) """ if env_index is None: # Return wether this wrapper itself was closed manually (from outside). # TODO: Should we also check if all envs are closed? If so, should we close # this env manually? if self._is_closed: return True elif all(self.is_closed(env_id) for env_id in range(self.nb_tasks)): self.close(env_index=None) return True return False assert isinstance(env_index, int) # Return wether the env at that index is closed. if isinstance(self._envs[env_index], MayCloseEarly): env_is_closed = self._envs[env_index].is_closed() # NOTE: These shouls always be the same, but just in case: self._envs_is_closed[env_index] = env_is_closed return self._envs_is_closed[env_index] def close(self, env_index: int = None) -> None: """Close the environment for the given index, or of all envs if `env_index` is `None`. """ if env_index is None: logger.info(f"Closing all envs") for env_index, (env_is_closed, env) in enumerate(zip(self._envs_is_closed, self._envs)): if not env_is_closed: self._envs_is_closed[env_index] = True env.close() # BUG: Not sure why this is actually causing a recursion error.. The idea # was to call `MayCloseEarly.close()`. # super().close() self._is_closed = True else: if self._envs_is_closed[env_index]: raise RuntimeError(f"Env at index {env_index} is already closed...") self._envs_is_closed[env_index] = True self._envs[env_index].close() def seed(self, seed: Optional[int] = None) -> List[int]: """Sets the seed for this env's random number generator(s). Note: Some environments use multiple pseudorandom number generators. We want to capture all such seeds used in order to ensure that there aren't accidental correlations between multiple generators. Returns: list: Returns the list of seeds used in this env's random number generators. The first value in the list should be the "main" seed, or the value which a reproducer should pass to 'seed'. Often, the main seed equals the provided 'seed', but this won't be true if seed=None, for example. """ self.rng = np.random.default_rng(seed) env_seeds = self.rng.integers(0, 1e8, size=len(self._envs)).tolist() seeds = env_seeds.copy() for index, env_seed in enumerate(env_seeds): # NOTE: Would be nice to be able to NOT instantiate all the envs and just # seed them when they get created, but then we wouldn't be able to return # the seeds from all envs here (which I'm not 100% sure its thaaat useful..) self._instantiate_env(index) env = self._envs[index] env_seeds: Optional[List[int]] = env.seed(env_seed) seeds.extend(env_seeds or []) return seeds def observation(self, observation): if self._add_task_labels: return add_task_labels(observation, task_labels=self._current_task_id) return observation class ConcatEnvsWrapper(MultiEnvWrapper): """Wrapper that exhausts the current environment before moving onto the next.""" def __init__( self, envs: List[gym.Env], add_task_ids: bool = False, on_task_switch_callback: Callable[[Optional[int]], Any] = None, ): super().__init__(envs, add_task_ids=add_task_ids) self.on_task_switch_callback = on_task_switch_callback def set_task(self, task_id: int) -> None: # NOTE: If any wrappers try to store things onto the unwrapped env, then those # would need to be transfered over to the new env here. super().set_task(task_id) def reset(self): old_task = self._current_task_id observation = super().reset() new_task = self._current_task_id if self.on_task_switch_callback and old_task != new_task: self.on_task_switch_callback(new_task if self._add_task_labels else None) return observation def next_task(self) -> int: assert not all(self._envs_is_closed) if not self._envs_is_closed[self._current_task_id]: return self._current_task_id # TODO: Close the env when we reach the end? or leave that up to the wrapper? return (self._current_task_id + 1) % self.nb_tasks def __iter__(self): return super().__iter__() def send(self, action): return super().send(action) # Register this as a 'concat' handler for gym environments! @concatenate.register(gym.Env) def _concatenate_gym_envs(first_env: gym.Env, *other_envs: gym.Env) -> ConcatEnvsWrapper: return ConcatEnvsWrapper([first_env, *other_envs]) class RoundRobinWrapper(MultiEnvWrapper): """MultiEnvWrapper that alternates between the non-closed environments in a round-robin fashion. """ def __init__(self, envs, add_task_ids=False): super().__init__(envs, add_task_ids=add_task_ids) self._current_task_id = -1 def next_task(self) -> int: assert not all(self._envs_is_closed) next_task = (self._current_task_id + 1) % self.nb_tasks while self._envs_is_closed[next_task]: next_task += 1 next_task %= self.nb_tasks return next_task class RandomMultiEnvWrapper(MultiEnvWrapper): def next_task(self) -> int: assert not all(self._envs_is_closed) available_ids = np.arange(self.nb_tasks)[~self._envs_is_closed].tolist() return self.rng.choice(available_ids) class CustomMultiEnvWrapper(MultiEnvWrapper): """MultiEnvWrapper that uses a custom callable to determine which env to use next.""" def __init__( self, envs: List[gym.Env], add_task_ids: bool = False, custom_new_task_fn: Callable[[MultiEnvWrapper], int] = None, ): super().__init__(envs, add_task_ids=add_task_ids) assert custom_new_task_fn, "Must pass a custom function to this wrapper." self._custom_new_task_fn = custom_new_task_fn def next_task(self): return self._custom_new_task_fn return super().next_task() ================================================ FILE: sequoia/settings/rl/discrete/multienv_wrappers_test.py ================================================ from collections import Counter from functools import partial from typing import List, Optional import gym import pytest from gym import spaces from gym.wrappers import TimeLimit from sequoia.common.gym_wrappers.env_dataset import EnvDataset from sequoia.common.gym_wrappers.episode_limit import EpisodeLimit from sequoia.common.spaces import TypedDictSpace from sequoia.settings.rl.continual.make_env import wrap from sequoia.utils.utils import unique_consecutive_with_index from .multienv_wrappers import ConcatEnvsWrapper, RandomMultiEnvWrapper, RoundRobinWrapper class TestMultiEnvWrappers: @pytest.fixture() def iterable_env(self) -> gym.Env: return EnvDataset(gym.make("CartPole-v0")) @pytest.mark.parametrize("add_task_ids", [False, True]) @pytest.mark.parametrize("nb_tasks", [5, 1]) @pytest.mark.parametrize("pass_fn_instead_of_env", [False, True]) def test_concat(self, add_task_ids: bool, nb_tasks: int, pass_fn_instead_of_env: bool): def set_attributes(env: gym.Env, **attributes) -> gym.Env: for k, v in attributes.items(): setattr(env.unwrapped, k, v) return env max_episodes_per_task = 5 envs = [ partial( EpisodeLimit, TimeLimit( set_attributes(gym.make("CartPole-v0"), length=0.1 + 0.2 * i), max_episode_steps=10, ), max_episodes=max_episodes_per_task, ) for i in range(nb_tasks) ] if not pass_fn_instead_of_env: envs = [env_fn() for env_fn in envs] env = ConcatEnvsWrapper(envs, add_task_ids=add_task_ids) assert env.nb_tasks == nb_tasks if add_task_ids: assert env.observation_space["task_labels"] == spaces.Discrete(env.nb_tasks) lengths = [] for episode in range(nb_tasks * max_episodes_per_task): print(f"Episode: {episode}, length: {round(env.unwrapped.length, 5)}") obs = env.reset() lengths.append(env.unwrapped.length) env_id = episode // max_episodes_per_task assert env._current_task_id == env_id, episode if add_task_ids: assert obs["task_labels"] == env_id step = 0 done = False while not done: obs, rewards, done, info = env.step(env.action_space.sample()) step += 1 if step == 10: assert done assert step <= 10 # NOTE: It's pretty cool that we actually recover something like the task # schedule here! :D episode_task_schedule = dict(unique_consecutive_with_index(lengths)) assert episode_task_schedule == { i * max_episodes_per_task: 0.1 + 0.2 * i for i in range(nb_tasks) } assert env.is_closed() # TODO: This does the same with an additional StepLimit (ActionLimit) wrapper, # and isn't stable because it depends on each episode being 10 long, and # CartPole ends earlier sometimes. # envs = [ # ActionLimit(TimeLimit(gym.make("CartPole-v0"), max_episode_steps=10), max_steps=50) # for i in range(5) # ] # env = ConcatEnvsWrapper(envs) # assert env.nb_tasks == 5 # for episode in range(25): # print(f"Episode: {episode}") # print(env.max_steps, env.step_count()) # obs = env.reset() # env_id = episode // 5 # assert env._current_task_id == env_id, episode # step = 0 # done = False # while not done: # print(step) # obs, rewards, done, info = env.step(env.action_space.sample()) # step += 1 # if step == 10: # assert done # assert step <= 10 # assert env.is_closed() @pytest.mark.parametrize("add_task_ids", [False, True]) @pytest.mark.parametrize("nb_tasks", [5, 1]) def test_roundrobin(self, add_task_ids: bool, nb_tasks: int): max_episodes_per_task = 5 max_episode_steps = 10 envs = [ EpisodeLimit( TimeLimit(gym.make("CartPole-v0"), max_episode_steps=max_episode_steps), max_episodes=max_episodes_per_task, ) for i in range(nb_tasks) ] env = RoundRobinWrapper(envs, add_task_ids=add_task_ids) assert env.nb_tasks == nb_tasks if add_task_ids: assert env.observation_space["task_labels"] == spaces.Discrete(env.nb_tasks) else: assert env.observation_space == env._envs[0].observation_space for episode in range(nb_tasks * max_episodes_per_task): print(f"Episode: {episode}") obs = env.reset() env_id = episode % nb_tasks assert env._current_task_id == env_id, episode step = 0 done = False while not done: print(step) obs, rewards, done, info = env.step(env.action_space.sample()) step += 1 if step == max_episode_steps: assert done assert step <= max_episode_steps assert env.is_closed() def test_random(self): episodes_per_task = 5 max_episode_steps = 10 nb_tasks = 5 envs = [ EpisodeLimit( TimeLimit(gym.make("CartPole-v0"), max_episode_steps=max_episode_steps), max_episodes=episodes_per_task, ) for i in range(nb_tasks) ] env = RandomMultiEnvWrapper(envs) env.seed(123) assert env.nb_tasks == nb_tasks task_ids: List[int] = [] for episode in range(nb_tasks * episodes_per_task): print(f"Episode: {episode}") obs = env.reset() env_id = episode // nb_tasks task_ids.append(env._current_task_id) step = 0 done = False print(env._envs_is_closed) while not done: print(step) obs, rewards, done, info = env.step(env.action_space.sample()) step += 1 if step == max_episode_steps: assert done assert step <= max_episode_steps assert env.is_closed() from collections import Counter # Assert that the task ids are 'random': import torch assert len(torch.unique_consecutive(torch.as_tensor(task_ids))) > nb_tasks assert Counter(task_ids) == {i: episodes_per_task for i in range(nb_tasks)} def test_iteration(self, iterable_env: gym.Env): """TODO: Interesting bug! Might be because when switching between envs, we're setting the 'cached' attributes onto the unwrapped env, and so when we move to another env, we all of a sudden don't have those attributes! """ max_episode_steps = 10 episodes_per_task = 5 add_task_ids = True nb_tasks = 5 def set_attributes(env: gym.Env, **attributes) -> gym.Env: for k, v in attributes.items(): setattr(env.unwrapped, k, v) return env from functools import partial envs = [ wrap( gym.make("CartPole-v0"), [ partial(TimeLimit, max_episode_steps=max_episode_steps), partial(set_attributes, length=0.1 + 0.2 * i), partial(EpisodeLimit, max_episodes=episodes_per_task), ], ) for i in range(nb_tasks) ] on_task_switch_received_task_ids: List[Optional[int]] = [] def on_task_switch(task_id: Optional[int]) -> None: print(f"On task switch: {task_id}.") on_task_switch_received_task_ids.append(task_id) env = ConcatEnvsWrapper( envs, add_task_ids=add_task_ids, on_task_switch_callback=on_task_switch ) env = EnvDataset(env) env.seed(123) assert env.nb_tasks == nb_tasks if add_task_ids: assert env.observation_space == TypedDictSpace( x=env.env._envs[0].observation_space, task_labels=spaces.Discrete(nb_tasks), ) else: assert env.observation_space == env.env._envs[0].observation_space assert env.observation_space.sample() in env.observation_space task_ids: List[int] = [] lengths_at_each_step = [] lengths_at_each_episode = [] for episode in range(nb_tasks * episodes_per_task): env_id = episode // episodes_per_task episode_task_ids: List[int] = [] for step, obs in enumerate(env): assert obs in env.observation_space print(f"Episode {episode}, Step {step}: obs: {obs}, length: {env.unwrapped.length}") if step == 0: lengths_at_each_episode.append(env.unwrapped.length) lengths_at_each_step.append(env.unwrapped.length) if add_task_ids: assert list(obs.keys()) == ["x", "task_labels"] obs_task_id = obs["task_labels"] episode_task_ids.append(obs_task_id) print(f"obs Task id: {obs_task_id}") rewards = env.send(env.action_space.sample()) if step > max_episode_steps: assert False, "huh?" if add_task_ids: assert ( len(set(episode_task_ids)) == 1 ), f"all observations within an episode should have the same task id.: {episode_task_ids}" # Add the unique task id from this episode to the list of all task ids. task_ids.extend(set(episode_task_ids)) actual_task_schedule = dict(unique_consecutive_with_index(lengths_at_each_step)) assert len(actual_task_schedule) == nb_tasks assert env.is_closed() if add_task_ids: assert task_ids == sum([[i] * episodes_per_task for i in range(nb_tasks)], []) # should have received one per boundary assert on_task_switch_received_task_ids == list(range(1, nb_tasks)) assert Counter(task_ids) == {i: episodes_per_task for i in range(nb_tasks)} else: assert on_task_switch_received_task_ids == [None] * (nb_tasks - 1) def test_adding_envs(self): from sequoia.common.gym_wrappers.env_dataset import EnvDataset env_1 = EnvDataset( EpisodeLimit(TimeLimit(gym.make("CartPole-v1"), max_episode_steps=10), max_episodes=5) ) env_2 = EnvDataset( EpisodeLimit(TimeLimit(gym.make("CartPole-v1"), max_episode_steps=10), max_episodes=5) ) chained_env = env_1 + env_2 assert chained_env._envs[0] is env_1 assert chained_env._envs[1] is env_2 # TODO: Do we add a 'len' attribute? # assert False, len(chained_env) # assert def test_batched_envs(): """TODO: Not sure how this will work with batched envs, but if it did, we could allow batch_size > 1 in Discrete, or batched custom envs in Incremental. """ ================================================ FILE: sequoia/settings/rl/discrete/results.py ================================================ from typing import ClassVar, TypeVar from sequoia.common.metrics.rl_metrics import EpisodeMetrics from sequoia.settings.assumptions.discrete_results import TaskSequenceResults MetricType = TypeVar("MetricsType", bound=EpisodeMetrics) class DiscreteTaskAgnosticRLResults(TaskSequenceResults[MetricType]): """Results for a sequence of tasks in an RL Setting This can be seen as one row of a transfer matrix. NOTE: This is not the entire transfer matrix because in the Discrete settings we don't evaluate after learning each task. """ # Higher mean reward / episode => better lower_is_better: ClassVar[bool] = False objective_name: ClassVar[str] = "Mean reward per episode" # Minimum runtime considered (in hours). # (No extra points are obtained for going faster than this.) min_runtime_hours: ClassVar[float] = 1.5 # Maximum runtime allowed (in hours). max_runtime_hours: ClassVar[float] = 12.0 ================================================ FILE: sequoia/settings/rl/discrete/setting.py ================================================ from dataclasses import dataclass from typing import Any, Callable, ClassVar, Dict, Optional, Type, Union from gym.envs.registration import EnvSpec, registry from simple_parsing import field from simple_parsing.helpers import choice from sequoia.common.gym_wrappers.utils import is_monsterkong_env from sequoia.settings.assumptions.context_discreteness import DiscreteContextAssumption from sequoia.settings.rl.continual.tasks import TaskSchedule, registry from sequoia.utils.logging_utils import get_logger from sequoia.utils.utils import dict_union from ..continual.setting import ContinualRLSetting from ..continual.setting import supported_envs as _parent_supported_envs from .tasks import DiscreteTask, is_supported, make_discrete_task from .test_environment import DiscreteTaskAgnosticRLTestEnvironment logger = get_logger(__name__) supported_envs: Dict[str, EnvSpec] = dict_union( _parent_supported_envs, { spec.id: spec for env_id, spec in registry.env_specs.items() if spec.id not in _parent_supported_envs and is_supported(env_id) }, ) available_datasets: Dict[str, str] = {env_id: env_id for env_id in supported_envs} from .results import DiscreteTaskAgnosticRLResults @dataclass class DiscreteTaskAgnosticRLSetting(DiscreteContextAssumption, ContinualRLSetting): """Continual Reinforcement Learning Setting where there are clear task boundaries, but where the task information isn't available. """ # TODO: Update the type or results that we get for this Setting. Results: ClassVar[Type[Results]] = DiscreteTaskAgnosticRLResults # The type wrapper used to wrap the test environment, and which produces the # results. TestEnvironment: ClassVar[Type[TestEnvironment]] = DiscreteTaskAgnosticRLTestEnvironment # The function used to create the tasks for the chosen env. _task_sampling_function: ClassVar[Callable[..., DiscreteTask]] = make_discrete_task # Class variable that holds the dict of available environments. available_datasets: ClassVar[Dict[str, Union[str, Any]]] = available_datasets # Which environment (a.k.a. "dataset") to learn on. # The dataset could be either a string (env id or a key from the # available_datasets dict), a gym.Env, or a callable that returns a # single environment. dataset: str = choice(available_datasets, default="CartPole-v0") # The number of "tasks" that will be created for the training, valid and test # environments. When left unset, will use a default value that makes sense # (something like 5). nb_tasks: int = field(5, alias=["n_tasks", "num_tasks"]) # Maximum number of training steps per task. train_steps_per_task: Optional[int] = None # Number of test steps per task. test_steps_per_task: Optional[int] = None # # Maximum number of episodes in total. # train_max_episodes: Optional[int] = None # # TODO: Add tests for this 'max episodes' and 'episodes_per_task'. # train_max_episodes_per_task: Optional[int] = None # # Total number of steps in the test loop. (Also acts as the "length" of the testing # # environment.) # test_max_steps_per_task: int = 10_000 # test_max_episodes_per_task: Optional[int] = None # # Max number of steps per training task. When left unset and when `train_max_steps` # # is set, takes the value of `train_max_steps` divided by `nb_tasks`. # train_max_steps_per_task: Optional[int] = None # # (WIP): Maximum number of episodes per training task. When left unset and when # # `train_max_episodes` is set, takes the value of `train_max_episodes` divided by # # `nb_tasks`. # train_max_episodes_per_task: Optional[int] = None # # Maximum number of steps per task in the test loop. When left unset and when # # `test_max_steps` is set, takes the value of `test_max_steps` divided by `nb_tasks`. # test_max_steps_per_task: Optional[int] = None # # (WIP): Maximum number of episodes per test task. When left unset and when # # `test_max_episodes` is set, takes the value of `test_max_episodes` divided by # # `nb_tasks`. # test_max_episodes_per_task: Optional[int] = None # def warn(self, warning: Warning): # logger.warning(warning) # warnings.warn(warning) def __post_init__(self): # TODO: Rework all the messy fields from before by just considering these as eg. # the maximum number of steps per task, rather than the fixed number of steps # per task. assert not self.smooth_task_boundaries super().__post_init__() if self.max_episode_steps is None: if is_monsterkong_env(self.dataset): self.max_episode_steps = 500 def create_train_task_schedule(self) -> TaskSchedule[DiscreteTask]: # IDEA: Could convert max_episodes into max_steps if max_steps_per_episode is # set. return super().create_train_task_schedule() def create_val_task_schedule(self) -> TaskSchedule[DiscreteTask]: # Always the same as train task schedule for now. return super().create_val_task_schedule() def create_test_task_schedule(self) -> TaskSchedule[DiscreteTask]: return super().create_test_task_schedule() ================================================ FILE: sequoia/settings/rl/discrete/setting_test.py ================================================ from dataclasses import fields from typing import Any, ClassVar, Dict, Optional, Type import gym import pytest from sequoia.common.config import Config from sequoia.conftest import monsterkong_required, param_requires_monsterkong from sequoia.methods import Method from sequoia.settings.assumptions.incremental_test import DummyMethod as _DummyMethod from sequoia.settings.rl.envs import MetaMonsterKongEnv from ..continual.setting_test import TestContinualRLSetting as ContinualRLSettingTests from .setting import DiscreteTaskAgnosticRLSetting class TestDiscreteTaskAgnosticRLSetting(ContinualRLSettingTests): Setting: ClassVar[Type[Setting]] = DiscreteTaskAgnosticRLSetting dataset: pytest.fixture @pytest.fixture(params=[1, 3]) def nb_tasks(self, request): n = request.param return n @pytest.fixture() def setting_kwargs(self, dataset: str, nb_tasks: int, config: Config): """Fixture used to pass keyword arguments when creating a Setting.""" return {"dataset": dataset, "nb_tasks": nb_tasks, "config": config} @pytest.mark.parametrize( "dataset, expected_resulting_name", [ param_requires_monsterkong("monsterkong", "MetaMonsterKong-v0"), param_requires_monsterkong("monsterkong-v0", "MetaMonsterKong-v0"), param_requires_monsterkong("meta_monsterkong", "MetaMonsterKong-v0"), ("cartpole", "CartPole-v1"), ], ) def test_passing_name_variant_works(self, dataset: str, expected_resulting_name: str): assert self.Setting(dataset=dataset).dataset == expected_resulting_name def validate_results( self, setting: DiscreteTaskAgnosticRLSetting, method: Method, results: DiscreteTaskAgnosticRLSetting.Results, ) -> None: assert results assert results.objective assert len(results.task_results) == setting.nb_tasks assert [ sum(task_result.metrics) == task_result.average_metrics for task_result in results.task_results ] assert ( sum(task_result.average_metrics for task_result in results.task_results) == results.average_metrics ) @pytest.mark.parametrize("give_nb_tasks", [True, False]) @pytest.mark.parametrize("give_train_max_steps", [True, False]) @pytest.mark.parametrize( "give_train_task_schedule, ids_instead_of_steps", [(True, False), (True, True), (False, False)], ) @pytest.mark.parametrize( "nb_tasks, train_max_steps, train_task_schedule", [ (1, 10_000, {0: {"gravity": 5.0}, 10_000: {"gravity": 10}}), ( 4, 100_000, { 0: {"gravity": 5.0}, 25_000: {"gravity": 10}, 50_000: {"gravity": 10}, 75_000: {"gravity": 10}, 100_000: {"gravity": 20}, }, ), ], ) def test_fields_are_consistent( self, nb_tasks: Optional[int], train_max_steps: Optional[int], train_task_schedule: Optional[Dict[str, Any]], give_nb_tasks: bool, give_train_max_steps: bool, give_train_task_schedule: bool, ids_instead_of_steps: bool, ): # give_nb_tasks = True # give_max_steps = True # give_task_schedule = True defaults = {f.name: f.default for f in fields(self.Setting)} default_max_train_steps = defaults["train_max_steps"] default_nb_tasks = defaults["nb_tasks"] # TODO: Same test for test_max_steps? full_kwargs = dict( nb_tasks=nb_tasks, train_max_steps=train_max_steps, train_task_schedule=train_task_schedule, ) # TODO: Should also pass nothing, and expect an error to be raised? kwargs = full_kwargs.copy() if not give_nb_tasks: kwargs.pop("nb_tasks") if not give_train_max_steps: kwargs.pop("train_max_steps") if not give_train_task_schedule: kwargs.pop("train_task_schedule") elif ids_instead_of_steps: kwargs["train_task_schedule"] = { i: task for i, (step, task) in enumerate(train_task_schedule.items()) } setting = self.Setting(**kwargs) assert ( setting.nb_tasks == nb_tasks if give_nb_tasks else len(train_task_schedule) if give_train_task_schedule else default_nb_tasks ) assert ( setting.train_max_steps == train_max_steps if give_train_max_steps else max(train_task_schedule) if give_train_task_schedule else default_max_train_steps ) assert list(setting.train_task_schedule.keys()) == [ i * (setting.train_max_steps / setting.nb_tasks) for i in range(0, setting.nb_tasks + 1) ] assert list(setting.val_task_schedule.keys()) == [ i * (setting.train_max_steps / setting.nb_tasks) for i in range(0, setting.nb_tasks + 1) ] assert list(setting.test_task_schedule.keys()) == [ i * (setting.test_max_steps / setting.nb_tasks) for i in range(0, setting.nb_tasks + 1) ] # When giving only the number of tasks: from typing import Any, Dict, Optional def test_fit_and_on_task_switch_calls(config: Config): setting = DiscreteTaskAgnosticRLSetting( dataset="CartPole-v0", # nb_tasks=5, # train_steps_per_task=100, train_max_steps=500, test_max_steps=500, # test_steps_per_task=100, train_transforms=[], test_transforms=[], val_transforms=[], config=config, ) method = _DummyMethod() _ = setting.apply(method) # == 30 task switches in total. assert method.n_task_switches == 0 assert method.n_fit_calls == 1 assert not method.received_task_ids assert not method.received_while_training @monsterkong_required @pytest.mark.parametrize( "dataset, expected_env_type", [ ("MetaMonsterKong-v0", MetaMonsterKongEnv), ("monsterkong", MetaMonsterKongEnv), ("PixelMetaMonsterKong-v0", MetaMonsterKongEnv), ("monster_kong", MetaMonsterKongEnv), ("monster_kong", MetaMonsterKongEnv), # ("halfcheetah", ContinualHalfCheetahEnv), # ("HalfCheetah-v2", ContinualHalfCheetahV2Env), # ("HalfCheetah-v3", ContinualHalfCheetahV3Env), # ("ContinualHalfCheetah-v2", ContinualHalfCheetahV2Env), # ("ContinualHalfCheetah-v3", ContinualHalfCheetahV3Env), # ("ContinualHopper-v2", ContinualHopperEnv), # ("hopper", ContinualHopperEnv), # ("Hopper-v2", ContinualHopperEnv), # ("walker2d", ContinualWalker2dV3Env), # ("Walker2d-v2", ContinualWalker2dV2Env), # ("Walker2d-v3", ContinualWalker2dV3Env), # ("ContinualWalker2d-v2", ContinualWalker2dV2Env), # ("ContinualWalker2d-v3", ContinualWalker2dV3Env), ], ) def test_monsterkong_env_name_maps_to_continual_variant( dataset: str, expected_env_type: Type[gym.Env] ): setting = DiscreteTaskAgnosticRLSetting( dataset=dataset, train_max_steps=10_000, test_max_steps=10_000 ) train_env = setting.train_dataloader() assert isinstance(train_env.unwrapped, expected_env_type) ================================================ FILE: sequoia/settings/rl/discrete/tasks.py ================================================ """ Functions that create 'discrete' tasks for an environment. TODO: Once we have a wrapper that can seamlessly switch from one env to the next, then move the "incremental" tasks from `incremental/tasks.py` to this level. """ import warnings from functools import partial, singledispatch from typing import Any, Callable, Dict, List, Optional, Union import gym import numpy as np from sequoia.settings.rl.envs import MONSTERKONG_INSTALLED, MetaMonsterKongEnv, sequoia_registry from ..continual.tasks import ( ContinuousTask, _is_supported, make_continuous_task, task_sampling_function, ) DiscreteTask = Union[ContinuousTask, Callable[[gym.Env], Any]] @task_sampling_function(env_registry=sequoia_registry, based_on=make_continuous_task) @singledispatch def make_discrete_task( env: gym.Env, *, step: int, change_steps: List[int], seed: int = None, **kwargs, ) -> DiscreteTask: """Generic function used by Sequoia's `DiscreteTaskAgnosticRLSetting` (and its descendants) to create a "task" that will be applied to an environment like `env`. To add support for a new type of environment, simply register a handler function: ``` @make_discrete_task.register(SomeGymEnvClass) def make_discrete_task_for_my_env(env: SomeGymEnvClass, step: int, change_steps: List[int], **kwargs,): return {"my_attribute": random.random()} ``` """ raise NotImplementedError(f"Don't currently know how to create a discrete task for env {env}") # return make_continuous_task( # env, step=step, change_steps=change_steps, seed=seed, **kwargs # ) is_supported = partial(_is_supported, _make_task_function=make_discrete_task) if MONSTERKONG_INSTALLED: # In MonsterKong the tasks can be changed on-the-fly, whereas they can't in the # size-based MUJOCO envs. @make_discrete_task.register def make_task_for_monsterkong_env( env: MetaMonsterKongEnv, step: int, change_steps: List[int] = None, seed: int = None, **kwargs, ) -> Union[Dict[str, Any], Any]: """Samples a task for the MonsterKong environment. TODO: When given a seed, sample the task randomly (but deterministicly) using the seed. """ assert change_steps is not None, "Need task boundaries to construct the task schedule." if step not in change_steps: raise RuntimeError( f"Monsterkong's has discrete tasks, {step} should be in {change_steps}!" ) task_index = change_steps.index(step) # TODO: double-check with @mattriemer on this: n_supported_levels = 30 # IDEA: Could also have a list of supported levels levels = list(range(n_supported_levels)) nb_tasks = len(change_steps) rng: Optional[np.random.Generator] = None if seed is not None: # perform a deterministic shuffling of the 'task ids' rng = np.random.default_rng(seed) rng.shuffle(levels) level: int if task_index >= n_supported_levels: warnings.warn( RuntimeWarning( f"The given task id ({task_index}) is greater than the number of " f"levels currently available in MonsterKong " f"({n_supported_levels})!\n" f"Multiple tasks may therefore use the same level!" ) ) # Option 1: Loop back around, using the same task as the first task? # (Probably not a good idea, since then we might get to train on the first # tasks right before testing begins! (which isnt great as a CL evaluation) # task_index %= n_supported_levels # Option 2 (better): Sample levels at random after all other levels have been # exhausted. # NOTE: Other calls to this should not get the same value! rng = rng or np.random.default_rng(seed) random_extra_levels = rng.integers( 0, n_supported_levels, size=nb_tasks - n_supported_levels ) level = int(random_extra_levels[task_index - n_supported_levels]) else: level = levels[task_index] return {"level": level} ================================================ FILE: sequoia/settings/rl/discrete/tasks_test.py ================================================ import pytest from sequoia.conftest import monsterkong_required from sequoia.settings.rl.envs import MetaMonsterKongEnv from .tasks import make_discrete_task @monsterkong_required def test_monsterkong_tasks(): # assert make_discrete_task.is_supported(MetaMonsterKongEnv) task = make_discrete_task(MetaMonsterKongEnv, step=0, change_steps=[0, 100, 200]) assert task == {"level": 0} task = make_discrete_task(MetaMonsterKongEnv, step=100, change_steps=[0, 100, 200]) assert task == {"level": 1} with pytest.raises(RuntimeError): _ = make_discrete_task(MetaMonsterKongEnv, step=123, change_steps=[0, 100, 200]) ================================================ FILE: sequoia/settings/rl/discrete/test_environment.py ================================================ import itertools import math from typing import Dict from sequoia.common.metrics.rl_metrics import EpisodeMetrics from sequoia.settings.assumptions.discrete_results import TaskSequenceResults from sequoia.settings.assumptions.iid_results import TaskResults from ..continual.test_environment import ContinualRLTestEnvironment class DiscreteTaskAgnosticRLTestEnvironment(ContinualRLTestEnvironment): def __init__(self, *args, task_schedule: Dict, **kwargs): super().__init__(*args, task_schedule=task_schedule, **kwargs) self.task_schedule = task_schedule self.boundary_steps = [step // (self.batch_size or 1) for step in self.task_schedule.keys()] # TODO: Removing the last entry since it's the terminal state. self.boundary_steps.pop(-1) def __len__(self): return math.ceil(self.step_limit / (getattr(self.env, "batch_size", 1) or 1)) def get_results(self) -> TaskSequenceResults[EpisodeMetrics]: # TODO: Place the metrics in the right 'bin' at the end of each episode during # testing depending on the task at that time, rather than what's happening here, # where we're getting all the rewards and episode lengths at the end and then # sort it out into the bins based on the task schedule. ALSO: this would make it # easier to support monitoring batched RL environments, since these `Monitor` # methods (get_episode_rewards, get_episode_lengths, etc) assume the environment # isn't batched. rewards = self.get_episode_rewards() lengths = self.get_episode_lengths() task_schedule: Dict[int, Dict] = self.task_schedule task_steps = sorted(task_schedule.keys()) # TODO: Removing the last entry since it's the terminal state. task_steps.pop(-1) assert 0 in task_steps import bisect nb_tasks = len(task_steps) assert nb_tasks >= 1 test_results = TaskSequenceResults([TaskResults() for _ in range(nb_tasks)]) # TODO: Fix this, since the task id might not be related to the steps! for step, episode_reward, episode_length in zip( itertools.accumulate(lengths), rewards, lengths ): # Given the step, find the task id. task_id = bisect.bisect_right(task_steps, step) - 1 episode_metric = EpisodeMetrics( n_samples=1, mean_episode_reward=episode_reward, mean_episode_length=episode_length, ) test_results.task_results[task_id].metrics.append(episode_metric) return test_results def render(self, mode="human", **kwargs): # TODO: This might not be setup right. Need to check. image_batch = super().render(mode=mode, **kwargs) if mode == "rgb_array" and self.batch_size: return tile_images(image_batch) return image_batch def _after_reset(self, observation): # Is this going to work fine when the observations are batched though? return super()._after_reset(observation) ================================================ FILE: sequoia/settings/rl/environment.py ================================================ from typing import * from torch.utils.data import DataLoader, Dataset, IterableDataset from sequoia.settings.base.environment import ActionType, Environment, ObservationType, RewardType from sequoia.utils.logging_utils import get_logger logger = get_logger(__name__) from typing_extensions import Final from .objects import ActionType, ObservationType, RewardType # TODO: Instead of using a 'y' field for both the supervised learning labels/target and # for the reward in RL, instead use a 'reward' field in RL, and a 'y' field in SL, where # in SL the reward could actually be wether the chosen action was correct or not, and # 'y' could contain the correct prediction for each action. class RLEnvironment(DataLoader, Environment[ObservationType, ActionType, RewardType]): """Environment in an RL Setting. Extends DataLoader to support sending back actions to the 'dataset'. This could be useful for modeling RL or Active Learning, for instance, where the predictions (actions) have an impact on the data generation process. TODO: Not really used at the moment besides as the base class for the GymDataLoader. TODO: Maybe add a custom `map` class for generators? Iterating through an RL Environment is different than when iterating on an SL environment: - Batches only contain the observations, rather than (observations, rewards) - The rewards are given back after an action is sent to the environment using `send`. TODO: maybe change this class into something like a `FakeActiveEnvironment`. """ actions_influence_future_observations: Final[bool] = True def __init__(self, dataset: Union[Dataset, IterableDataset], **dataloader_kwargs): super().__init__(dataset, **dataloader_kwargs) self.observation: ObservationType = None self.action: ActionType = None self.reward: RewardType = None # def __next__(self) -> ObservationType: # return self.observation def send(self, action: ActionType) -> RewardType: """Sends an action to the 'dataset'/'Environment'. Does nothing when the environment is a simple Dataset (when it isn't an instance of EnvironmentBase). TODO: Figure out the interactions with num_workers and send, if any. """ self.action = action if hasattr(self.dataset, "send"): self.reward = self.dataset.send(self.action) # TODO: Clean this up, this is taken care of in the GymDataLoader class. # if hasattr(self.dataset, "step"): # self.observation, self.reward, self.done, self.info = self.dataset.step(self.action) else: assert ( False ), "TODO: ActiveDataloader dataset should always have a `send` attribute for now." return self.reward # Deprecated names for the same thing: ActiveDataLoader = RLEnvironment ActiveEnvironment = RLEnvironment ================================================ FILE: sequoia/settings/rl/environment_test.py ================================================ from typing import Generator from torch import Tensor from torchvision.datasets import MNIST from sequoia.utils.logging_utils import log_calls from .environment import ActiveEnvironment class ActiveMnistEnvironment(ActiveEnvironment[Tensor, Tensor, Tensor]): """An Mnist environment which will keep showing the same class until a correct prediction is made, and then switch to another class. Which will keep giving the same class until the right prediction is made. """ def __init__(self, start_class: int = 0, **kwargs): self.current_class: int = 0 dataset = MNIST("data") super().__init__(dataset, batch_size=None, **kwargs) self.observation: Tensor = None self.reward: Tensor = None self.action: Tensor = None @log_calls def __next__(self) -> Tensor: for x, y in self.dataset: # keep iterating while the example isn't of the right type. if y == self.current_class: self.observation = x self.reward = y break print(f"next obs: {self.observation}, next reward = {self.reward}") return self.observation @log_calls def __iter__(self) -> Generator[Tensor, Tensor, None]: while True: action = yield next(self) if action is not None: logger.debug(f"Received an action of {action} while iterating..") self.reward = self.send(action) @log_calls def send(self, action: Tensor) -> Tensor: print(f"received action {action}, returning current label {self.reward}") self.action = action if action == self.current_class: print("Switching classes since the prediction was right!") self.current_class += 1 self.current_class %= 10 else: print("Prediction was wrong, staying on the same class.") return self.reward def test_active_mnist_environment(): """Test the active mnist env, which will keep giving the same class until the right prediction is made.""" env = ActiveMnistEnvironment() # So in this test, the env will only give samples of class 0, until a correct # prediction is made, then it will switch to giving samples of class 1, etc. # what the current class is (just for testing) _current_class = 0 # first loop, where we always predict the right label. for i, x in enumerate(env): print(f"x: {x}") y_pred = i % 10 print(f"Sending prediction of {y_pred}") y_true = env.send(y_pred) print(f"Received back {y_true}") assert y_pred == y_true if i == 9: break # current class should be 0 as last prediction was 9 and correct. _current_class = 0 # Second loop, where we always predict the wrong label. for i, x in enumerate(env): print(f"x: {x}") y_pred = 1 y_true = env.send(y_pred) assert y_true == 0 if i > 2: break x = next(env) y_pred = 0 y_true = env.send(y_pred) assert y_true == 0 x = next(env) y_true = env.send(1) assert y_true == 1 ================================================ FILE: sequoia/settings/rl/envs/__init__.py ================================================ import copy import json from abc import ABC from contextlib import redirect_stdout from io import StringIO from pathlib import Path from typing import Dict, List, Type, Union import gym from gym.envs.registration import EnvSpec, registry from sequoia.utils import get_logger logger = get_logger(__name__) # IDEA: Modify a copy of the gym registry? # sequoia_registry = copy.deepcopy(registry) sequoia_registry = registry from .classic_control import PixelObservationWrapper, register_classic_control_variants from .variant_spec import EnvVariantSpec register_classic_control_variants(sequoia_registry) ATARI_PY_INSTALLED = False try: from ale_py.gym.environment import ALGymEnv AtariEnv = ALGymEnv ATARI_PY_INSTALLED = True except (gym.error.DependencyNotInstalled, ImportError): class AtariEnv(gym.Env): pass MONSTERKONG_INSTALLED = False try: # Redirecting stdout because this import prints stuff. from .monsterkong import MetaMonsterKongEnv, register_monsterkong_variants register_monsterkong_variants(sequoia_registry) MONSTERKONG_INSTALLED = True except ImportError: class MetaMonsterKongEnv(gym.Env): pass MTENV_INSTALLED = False mtenv_envs = [] try: from mtenv import MTEnv from mtenv.envs.registration import mtenv_registry mtenv_envs = [env_spec.id for env_spec in mtenv_registry.all()] MTENV_INSTALLED = True except ImportError: # Create a 'dummy' class so we can safely use MTEnv in the type hints below. # Additionally, isinstance(some_env, MTEnv) will always fail when mtenv isn't # installed, which is good. class MTEnv(gym.Env): pass MUJOCO_INSTALLED = False try: import mujoco_py mj_path, _ = mujoco_py.utils.discover_mujoco() from gym.envs.mujoco import MujocoEnv from .mujoco import ( ContinualHalfCheetahEnv, ContinualHalfCheetahV2Env, ContinualHalfCheetahV3Env, ContinualHopperEnv, ContinualHopperV2Env, ContinualHopperV3Env, ContinualWalker2dEnv, ContinualWalker2dV2Env, ContinualWalker2dV3Env, register_mujoco_variants, ) register_mujoco_variants(env_registry=sequoia_registry) MUJOCO_INSTALLED = True except ( ImportError, AttributeError, ValueError, gym.error.DependencyNotInstalled, ) as exc: logger.debug(f"Couldn't import mujoco: ({exc})") # Create a 'dummy' class so we can safely use type hints everywhere. # Additionally, `isinstance(some_env, )`` will always fail when the # dependency isn't installed, which is good. class MujocoEnv(gym.Env): pass class ContinualHalfCheetahEnv(MujocoEnv): pass class ContinualHalfCheetahV2Env(MujocoEnv): pass class ContinualHalfCheetahV3Env(MujocoEnv): pass class ContinualHopperEnv(MujocoEnv): pass class ContinualHopperV2Env(MujocoEnv): pass class ContinualHopperV3Env(MujocoEnv): pass class ContinualWalker2dEnv(MujocoEnv): pass class ContinualWalker2dV2Env(MujocoEnv): pass class ContinualWalker2dV3Env(MujocoEnv): pass METAWORLD_INSTALLED = False metaworld_envs: List[Type[gym.Env]] = [] try: if not MUJOCO_INSTALLED: # Skip the stuff below, since metaworld requires mujoco anyway. raise ImportError import metaworld from metaworld import MetaWorldEnv # TODO: Use mujoco from metaworld? or from mujoco_py? from metaworld.envs.mujoco.mujoco_env import MujocoEnv as MetaWorldMujocoEnv from metaworld.envs.mujoco.sawyer_xyz.sawyer_xyz_env import SawyerXYZEnv # from metaworld.envs.mujoco.mujoco_env import MujocoEnv METAWORLD_INSTALLED = True # metaworld_dir = getsourcefile(metaworld) # mujoco_dir = Path("~/.mujoco").expanduser() # TODO: Cache the names of the metaworld envs to a file, just so we don't take about # 10 seconds to import metaworld every time? # TODO: Make sure this also works on a cluster. # TODO: When updating metaworld, need to remove this file. envs_cache_file = Path("temp/metaworld_envs.json") envs_cache_file.parent.mkdir(exist_ok=True) all_metaworld_envs: Dict[str, List[str]] = {} if envs_cache_file.exists(): with open(envs_cache_file, "r") as f: all_metaworld_envs = json.load(f) else: print( "Loading up the list of available envs from metaworld for the first time, " "this might take a while (usually ~10 seconds)." ) if "ML10" not in all_metaworld_envs: ML10_envs = list(metaworld.ML10().train_classes.keys()) all_metaworld_envs["ML10"] = ML10_envs with open(envs_cache_file, "w") as f: json.dump(all_metaworld_envs, f) metaworld_envs = sum([list(envs) for envs in all_metaworld_envs.values()], []) except (ImportError, AttributeError, gym.error.DependencyNotInstalled) as e: logger.debug(f"Unable to import metaworld: {e}") # raise e if not METAWORLD_INSTALLED: # Create a 'dummy' class so we can safely use MetaWorldEnv in the type hints below. # Additionally, isinstance(some_env, MetaWorldEnv) will always fail when metaworld # isn't installed, which is good. class MetaWorldEnv(gym.Env, ABC): pass class MetaWorldMujocoEnv(gym.Env, ABC): pass class SawyerXYZEnv(gym.Env, ABC): pass ================================================ FILE: sequoia/settings/rl/envs/classic_control.py ================================================ """ Registers variants of the classic-control envs that are used by sequoia. """ # TODO: Add Pixel???-v? variants for the classic-control envs. from typing import Dict from gym.envs.registration import EnvRegistry, EnvSpec, registry from sequoia.common.gym_wrappers.pixel_observation import PixelObservationWrapper from .variant_spec import EnvVariantSpec def register_classic_control_variants(env_registry: EnvRegistry = registry) -> None: """Adds pixel variants for the classic-control envs to the given registry in-place.""" classic_control_env_specs: Dict[str, EnvSpec] = { spec.id: spec for env_id, spec in env_registry.env_specs.items() if isinstance(spec.entry_point, str) and spec.entry_point.startswith("gym.envs.classic_control") } for env_id, env_spec in classic_control_env_specs.items(): new_id = "Pixel" + env_id if new_id not in env_registry.env_specs: new_spec = EnvVariantSpec.of( env_spec, new_id=new_id, wrappers=[PixelObservationWrapper] ) env_registry.env_specs[new_id] = new_spec ================================================ FILE: sequoia/settings/rl/envs/monsterkong.py ================================================ from contextlib import redirect_stdout from io import StringIO import numpy as np from gym import spaces from gym.envs.registration import EnvRegistry, EnvSpec, registry # Avoid print statements from pygame package. with redirect_stdout(StringIO()): from meta_monsterkong.make_env import MetaMonsterKongEnv from .variant_spec import EnvVariantSpec def observe_state(env: MetaMonsterKongEnv) -> MetaMonsterKongEnv: if not env.observe_state: env.unwrapped.observe_state = True env.unwrapped.observation_space = spaces.Box( 0, 292, [ 402, ], np.int16, ) return env def register_monsterkong_variants(env_registry: EnvRegistry = registry) -> None: for env_id in ["MetaMonsterKong-v0", "MetaMonsterKong-v1"]: spec: EnvSpec = env_registry.spec(env_id) # Add an explicit 'State' variant of the envs. new_env_id = "State" + env_id new_spec = EnvVariantSpec.of( spec, new_id=new_env_id, new_max_episode_steps=500, new_kwargs={"observe_state": True}, ) if new_env_id not in env_registry.env_specs: env_registry.env_specs[new_env_id] = new_spec # Add an explicit 'Pixel' variant of the envs (even though by default we currently # always observe the state). new_env_id = "Pixel" + env_id new_spec = EnvVariantSpec.of( spec, new_id=new_env_id, new_max_episode_steps=500, new_kwargs={"observe_state": False}, ) if new_env_id not in env_registry.env_specs: env_registry.env_specs[new_env_id] = new_spec ================================================ FILE: sequoia/settings/rl/envs/mujoco/__init__.py ================================================ """ CL environments based on the mujoco envs. NOTE: This is based on https://github.com/Breakend/gym-extensions """ # from sequoia.conftest import mujoco_required # pytestmark = mujoco_required import os from pathlib import Path from typing import Callable, Dict, List, Type, Union import gym from gym.envs import register from gym.envs.mujoco import MujocoEnv from gym.envs.mujoco.half_cheetah_v3 import HalfCheetahEnv from gym.envs.registration import EnvRegistry, EnvSpec, load, registry from sequoia.utils.logging_utils import get_logger from ..variant_spec import EnvVariantSpec from .half_cheetah import ( ContinualHalfCheetahV2Env, ContinualHalfCheetahV3Env, HalfCheetahV2Env, HalfCheetahV3Env, ) from .hopper import ContinualHopperV2Env, ContinualHopperV3Env, HopperV2Env, HopperV3Env from .modified_gravity import ModifiedGravityEnv from .modified_size import ModifiedSizeEnv from .walker2d import ContinualWalker2dV2Env, ContinualWalker2dV3Env, Walker2dV2Env, Walker2dV3Env logger = get_logger(__name__) # NOTE: Prefer the 'V3' variants # HalfCheetahEnv = HalfCheetahV3Env # Walker2dEnv = Walker2dV3Env ContinualHalfCheetahEnv = ContinualHalfCheetahV3Env ContinualHopperEnv = ContinualHopperV3Env ContinualWalker2dEnv = ContinualWalker2dV3Env SOURCE_DIR = Path(os.path.dirname(os.path.abspath(__file__))) __all__ = [ "ContinualHalfCheetahEnv", "ContinualHalfCheetahV2Env", "ContinualHalfCheetahV3Env", "ContinualHopperV2Env", "ContinualHopperV3Env", "ContinualWalker2dEnv", "ContinualWalker2dV2Env", "ContinualWalker2dV3Env", "ModifiedGravityEnv", "ModifiedSizeEnv", "MujocoEnv", ] def get_entry_point(Env: Type[gym.Env]) -> str: # TODO: Make sure this also works when Sequoia is installed in non-editable mode. return f"{Env.__module__}:{Env.__name__}" # The list of mujoco envs which we explicitly have support for. # TODO: Should probably use a Wrapper rather than a new base class (at least for the # GravityEnv and the modifications that can be made to an already-instantiated env. # NOTE: Using the same version tag as the CURRENTLY_SUPPORTED_MUJOCO_ENVS: Dict[str, Type[MujocoEnv]] = { "HalfCheetah-v2": ContinualHalfCheetahV2Env, "HalfCheetah-v3": ContinualHalfCheetahV3Env, "Hopper-v2": ContinualHopperV2Env, "Hopper-v3": ContinualHopperV3Env, "Walker2d-v2": ContinualWalker2dV2Env, "Walker2d-v3": ContinualWalker2dV3Env, } # TODO: Register the 'continual' variants automatically by finding the entries in the # registry that can be wrapped, and wrapping them. # IDEA: Actually swap out the entries for these envs, rather than overwrite them? def register_mujoco_variants(env_registry: EnvRegistry = registry) -> None: """Adds pixel variants for the classic-control envs to the given registry in-place.""" # Dict from the env id to the original spec original_mujoco_env_specs: Dict[str, EnvSpec] = { original_env_id: env_registry.spec(original_env_id) for original_env_id in CURRENTLY_SUPPORTED_MUJOCO_ENVS } # Dict from the # TODO: Add broader support for mujoco envs new_entry_points = CURRENTLY_SUPPORTED_MUJOCO_ENVS # NOTE: Currently we do two things: Register a new spec with a different name, like # `ContinualWalker2d-v2`, as well as 'overwrite' the entry-point of the original # spec ("Walker2d-v2") to point to our custom subclass (ContinualWalker2dV2Env) prefixes = ["Continual", ""] # NOTE: It could actually make more sense to only register our variants, and # then have the Setting map one to the other intelligently, but it causes a bit more # trouble # prefixes = ["Continual"] for prefix in prefixes: for env_id, original_env_spec in original_mujoco_env_specs.items(): # TODO: Use the same ID, or a different one? new_id = prefix + env_id if (new_id not in env_registry.env_specs or new_id == env_id) and not isinstance( original_env_spec, EnvVariantSpec ): new_spec = EnvVariantSpec.of( original=original_env_spec, new_id=new_id, new_entry_point=new_entry_points[env_id], ) env_registry.env_specs[new_id] = new_spec if new_id != env_id: logger.debug( f"Registering MuJoCO Environment variant of {env_id} at id {new_id}." ) else: logger.debug(f"Overwriting the existing EnvSpec at id {env_id}") # Replace the entry-point for these mujoco envs. # IMPORTANT: This doesn't change anything about the envs, apart from making it possible # to explicitly change the gravity or mass etc if you want. # TODO: Should probably still only modify a custom/copied registry, so that importing # Sequoia doesn't modify the gym registry when Sequoia isn't being used explicitly. # registry.env_specs["HalfCheetah-v2"].entry_point = ContinualHalfCheetahV2Env # registry.env_specs["HalfCheetah-v3"].entry_point = ContinualHalfCheetahV3Env # registry.env_specs["Hopper-v2"].entry_point = ContinualHopperEnv # registry.env_specs["Walker2d-v2"].entry_point = ContinualWalker2dEnv # EnvSpec( # "HalfCheetah-v2", # entry_point=get_entry_point(Continu), # reward_threshold=None, # nondeterministic=False, # max_episode_steps=None, # kwargs=None, # ) # gym.envs.register( # id="ContinualHalfCheetah-v2", # entry_point=get_entry_point(ContinualHalfCheetahV2Env), # max_episode_steps=1000, # reward_threshold=4800.0, # ) # gym.envs.register( # id="ContinualHalfCheetah-v3", # entry_point=get_entry_point(ContinualHalfCheetahV3Env), # max_episode_steps=1000, # reward_threshold=4800.0, # ) # gym.envs.register( # id="ContinualHopper-v2", # entry_point=get_entry_point(ContinualHopperEnv), # max_episode_steps=1000, # reward_threshold=4800.0, # ) # gym.envs.register( # id="ContinualWalker2d-v3", # entry_point=get_entry_point(ContinualWalker2dEnv), # max_episode_steps=1000, # reward_threshold=4800.0, # ) ================================================ FILE: sequoia/settings/rl/envs/mujoco/half_cheetah.py ================================================ from typing import ClassVar, Dict, List import numpy as np from gym.envs.mujoco import MujocoEnv from gym.envs.mujoco.half_cheetah import HalfCheetahEnv as _HalfCheetahV2Env # TODO: Use HalfCheetah-v3 instead, which allows explicitly to change the model file! from gym.envs.mujoco.half_cheetah_v3 import HalfCheetahEnv as _HalfCheetahV3Env from .modified_gravity import ModifiedGravityEnv from .modified_mass import ModifiedMassEnv from .modified_size import ModifiedSizeEnv class HalfCheetahV2Env(_HalfCheetahV2Env): """ Simply allows changing of XML file, probably not necessary if we pull request the xml name as a kwarg in openai gym """ BODY_NAMES: ClassVar[List[str]] = [ "torso", "bthigh", "bshin", "bfoot", "fthigh", "fshin", "ffoot", ] def __init__(self, model_path: str = "half_cheetah.xml", frame_skip: int = 5): MujocoEnv.__init__(self, model_path=model_path, frame_skip=frame_skip) # Q: Why isn't HalfCheetahV3 based on HalfCheetahV2 in gym ?! class HalfCheetahV3Env(_HalfCheetahV3Env): BODY_NAMES: ClassVar[List[str]] = [ "torso", "bthigh", "bshin", "bfoot", "fthigh", "fshin", "ffoot", ] def __init__( self, model_path="half_cheetah.xml", forward_reward_weight: float = 1.0, ctrl_cost_weight: float = 0.1, reset_noise_scale: float = 0.1, exclude_current_positions_from_observation: bool = True, xml_file: str = None, frame_skip: int = 5, ): if frame_skip != 5: raise NotImplementedError("todo: Add a frame_skip arg to the gym class.") super().__init__( xml_file=xml_file or model_path, forward_reward_weight=forward_reward_weight, ctrl_cost_weight=ctrl_cost_weight, reset_noise_scale=reset_noise_scale, exclude_current_positions_from_observation=exclude_current_positions_from_observation, ) # class HalfCheetahGravityEnv(ModifiedGravityEnv, HalfCheetahEnv): # # NOTE: This environment could be used in ContinualRL! # def __init__( # self, # model_path: str = "half_cheetah.xml", # frame_skip: int = 5, # gravity: float = -9.81, # ): # super().__init__(model_path=model_path, frame_skip=frame_skip, gravity=gravity) class HalfCheetahWithSensorEnv(HalfCheetahV2Env): """NOTE: unused for now. Adds empty sensor readouts, this is to be used when transfering to WallEnvs where we get sensor readouts with distances to the wall """ def __init__(self, model_path: str, frame_skip: int = 5, n_bins: int = 10): super().__init__(model_path=model_path, frame_skip=frame_skip) self.n_bins = n_bins def _get_obs(self): obs = np.concatenate( [ super()._get_obs(), np.zeros(self.n_bins), # NOTE: @lebrice HUH? what's the point of doing this? # goal_readings ] ) return obs # TODO: Rename these base classes to 'ModifyGravityMixin', 'ModifySizeMixin', etc. class ContinualHalfCheetahV2Env( ModifiedGravityEnv, ModifiedSizeEnv, ModifiedMassEnv, HalfCheetahV2Env ): def __init__( self, model_path: str = "half_cheetah.xml", frame_skip: int = 5, gravity=-9.81, body_name_to_size_scale: Dict[str, float] = None, body_name_to_mass_scale: Dict[str, float] = None, ): super().__init__( model_path=model_path, frame_skip=frame_skip, gravity=gravity, body_name_to_size_scale=body_name_to_size_scale, body_name_to_mass_scale=body_name_to_mass_scale, ) class ContinualHalfCheetahV3Env( ModifiedGravityEnv, ModifiedSizeEnv, ModifiedMassEnv, HalfCheetahV3Env ): def __init__( self, model_path: str = "half_cheetah.xml", frame_skip: int = 5, forward_reward_weight: float = 1.0, ctrl_cost_weight: float = 0.1, reset_noise_scale: float = 0.1, exclude_current_positions_from_observation: bool = True, gravity=-9.81, body_name_to_size_scale: Dict[str, float] = None, body_name_to_mass_scale: Dict[str, float] = None, xml_file: str = None, ): super().__init__( model_path=xml_file or model_path, frame_skip=frame_skip, forward_reward_weight=forward_reward_weight, ctrl_cost_weight=ctrl_cost_weight, reset_noise_scale=reset_noise_scale, exclude_current_positions_from_observation=exclude_current_positions_from_observation, gravity=gravity, body_name_to_size_scale=body_name_to_size_scale, body_name_to_mass_scale=body_name_to_mass_scale, ) ================================================ FILE: sequoia/settings/rl/envs/mujoco/half_cheetah_test.py ================================================ from typing import ClassVar, Type from sequoia.conftest import mujoco_required pytestmark = mujoco_required from .half_cheetah import ContinualHalfCheetahV2Env, ContinualHalfCheetahV3Env from .modified_gravity_test import ModifiedGravityEnvTests from .modified_mass_test import ModifiedMassEnvTests from .modified_size_test import ModifiedSizeEnvTests @mujoco_required class TestHalfCheetahV2(ModifiedGravityEnvTests, ModifiedSizeEnvTests, ModifiedMassEnvTests): Environment: ClassVar[Type[ContinualHalfCheetahV2Env]] = ContinualHalfCheetahV2Env @mujoco_required class TestHalfCheetahV3(ModifiedGravityEnvTests, ModifiedSizeEnvTests, ModifiedMassEnvTests): Environment: ClassVar[Type[ContinualHalfCheetahV3Env]] = ContinualHalfCheetahV3Env ================================================ FILE: sequoia/settings/rl/envs/mujoco/hopper.py ================================================ # TODO: Should we use HopperV3 instead? from typing import ClassVar, Dict, List, Tuple from gym.envs.mujoco import MujocoEnv from gym.envs.mujoco.hopper import HopperEnv as _HopperV2Env # TODO: Use HalfCheetah-v3 instead, which allows explicitly to change the model file! from gym.envs.mujoco.hopper_v3 import HopperEnv as _HopperV3Env from .modified_gravity import ModifiedGravityEnv from .modified_mass import ModifiedMassEnv from .modified_size import ModifiedSizeEnv # NOTE: Removed the `utils.EzPickle` base class (since it wasn't being passed any kwargs # (and therefore wasn't saving any of the 'state') anyway. class HopperV2Env(_HopperV2Env): """ Simply allows changing of XML file, probably not necessary if we pull request the xml name as a kwarg in openai gym """ BODY_NAMES: ClassVar[List[str]] = ["torso", "thigh", "leg", "foot"] def __init__(self, model_path: str = "hopper.xml", frame_skip: int = 4): MujocoEnv.__init__(self, model_path=model_path, frame_skip=frame_skip) # utils.EzPickle.__init__(self) class HopperV3Env(_HopperV3Env): BODY_NAMES: ClassVar[List[str]] = ["torso", "thigh", "leg", "foot"] def __init__( self, model_path="hopper.xml", forward_reward_weight: float = 1.0, ctrl_cost_weight: float = 1e-3, healthy_reward: float = 1.0, terminate_when_unhealthy: bool = True, healthy_state_range: Tuple[float, float] = (-100.0, 100.0), healthy_z_range: Tuple[float, float] = (0.7, float("inf")), healthy_angle_range: Tuple[float, float] = (-0.2, 0.2), reset_noise_scale: float = 5e-3, exclude_current_positions_from_observation: bool = True, xml_file: str = None, frame_skip: int = 4, ): if frame_skip != 4: raise NotImplementedError("todo: Add a frame_skip arg to the gym class.") super().__init__( xml_file=xml_file or model_path, forward_reward_weight=forward_reward_weight, ctrl_cost_weight=ctrl_cost_weight, healthy_reward=healthy_reward, terminate_when_unhealthy=terminate_when_unhealthy, healthy_state_range=healthy_state_range, healthy_z_range=healthy_z_range, healthy_angle_range=healthy_angle_range, reset_noise_scale=reset_noise_scale, exclude_current_positions_from_observation=exclude_current_positions_from_observation, ) class HopperV2GravityEnv(ModifiedGravityEnv, HopperV2Env): # NOTE: This environment could be used in ContinualRL! def __init__( self, model_path: str = "hopper.xml", frame_skip: int = 4, gravity: float = -9.81, ): super().__init__(model_path=model_path, frame_skip=frame_skip, gravity=gravity) class ContinualHopperV2Env(ModifiedGravityEnv, ModifiedSizeEnv, ModifiedMassEnv, HopperV2Env): def __init__( self, model_path: str = "hopper.xml", frame_skip: int = 4, gravity=-9.81, body_name_to_size_scale: Dict[str, float] = None, body_name_to_mass_scale: Dict[str, float] = None, ): super().__init__( model_path=model_path, frame_skip=frame_skip, gravity=gravity, body_name_to_size_scale=body_name_to_size_scale, body_name_to_mass_scale=body_name_to_mass_scale, ) class ContinualHopperV3Env(ModifiedGravityEnv, ModifiedSizeEnv, ModifiedMassEnv, HopperV3Env): def __init__( self, model_path="hopper.xml", forward_reward_weight: float = 1.0, ctrl_cost_weight: float = 1e-3, healthy_reward: float = 1.0, terminate_when_unhealthy: bool = True, healthy_state_range: Tuple[float, float] = (-100.0, 100.0), healthy_z_range: Tuple[float, float] = (0.7, float("inf")), healthy_angle_range: Tuple[float, float] = (-0.2, 0.2), reset_noise_scale: float = 5e-3, exclude_current_positions_from_observation: bool = True, # xml_file: str = None, frame_skip: int = 4, gravity=-9.81, body_name_to_size_scale: Dict[str, float] = None, body_name_to_mass_scale: Dict[str, float] = None, ): super().__init__( model_path=model_path, frame_skip=frame_skip, # xml_file=xml_file or model_path, forward_reward_weight=forward_reward_weight, ctrl_cost_weight=ctrl_cost_weight, healthy_reward=healthy_reward, terminate_when_unhealthy=terminate_when_unhealthy, healthy_state_range=healthy_state_range, healthy_z_range=healthy_z_range, healthy_angle_range=healthy_angle_range, reset_noise_scale=reset_noise_scale, exclude_current_positions_from_observation=exclude_current_positions_from_observation, gravity=gravity, body_name_to_size_scale=body_name_to_size_scale, body_name_to_mass_scale=body_name_to_mass_scale, ) # ------------- NOTE (@lebrice) ------------------------------- # Everything below this is unused. # The idea was to do some kind of inverse-kinematics-ish math to fix the placement of the joints # when the size of one of the parts of the model is changed. # # from typing import Dict # def get_parent(tree: ElementTree, node: Element) -> Element: # parent_map: Dict[Element, Element] = {c: p for p in tree.iter() for c in p} # return parent_map[node] # def update_world( # tree: ElementTree, # world_body: Element, # new_torso_max: Pos, # size_scaling_factor: float = 1.0, # **kwargs, # ) -> None: # """propagate the changes from the body to the world, if need be.""" # # TODO: Maybe move the camera etc? # def update_torso( # tree: ElementTree = None, # torso_body: Element = None, # new_torso_min: Pos = None, # size_scaling_factor: float = 1.0, # geom_suffix="torso_geom", # **kwargs, # ) -> None: # """'move' the torso body and its endpoints, after another bodypart has been # scaled. # This moves all relevant geoms and # joints and bodies, # Normally, this can update the # (through possibly recursive calls to one of `update_torso`, # `update_thigh`, `update_leg`, `update_foot`.) # """ # assert size_scaling_factor != 0.0 # body_name = "torso" # # Get the elements to be modified. # if torso_body is None: # assert tree is not None, "need the tree if torso_body is not given!" # if isinstance(tree, Element) and tree.tag == "body" and tree.get("name") == body_name: # torso_body = tree # tree = None # else: # torso_body = tree.find(f".//body[@name='{body_name}']") # assert torso_body is not None, "can't find the torso body!" # torso_geom = torso_body.find(f"./geom[@name='{body_name}']") # if torso_geom is None: # torso_geom = torso_body.find(f"./geom[@name='{body_name}_geom']") # if torso_geom is None: # raise RuntimeError(f"Can't find the geom for body part '{body_name}'!") # rooty_joint = torso_body.find("./joint[@name='rooty']") # rootz_joint = torso_body.find("./joint[@name='rootz']") # torso_body_pos = Pos.of_element(torso_body) # torso_geom_size = float(torso_geom.get("size")) # torso_geom_fromto = FromTo.of_element(torso_geom) # rootz_joint_ref = float(rootz_joint.get("ref")) # rooty_joint_pos = Pos.of_element(rooty_joint) # torso_max = torso_geom_fromto.start # torso_min = torso_geom_fromto.end # torso_length = torso_max - torso_min # assert torso_body_pos == torso_geom_fromto.center # # This happens to coincide with torso's pos. # assert rootz_joint_ref == torso_body_pos.z # assert rooty_joint_pos == torso_body_pos # if new_torso_min is None: # # Assume that the location of the base of the torso doesn't change, i.e. that # # this was called in order to JUST scale the torso and nothing else. # new_torso_min = torso_min # # new_torso_min is already given, calculate the other two: # new_torso_length = torso_length * (1 if size_scaling_factor is None else size_scaling_factor) # new_torso_max = new_torso_min + new_torso_length # # NOTE: fromto is from top to bottom here (maybe also everywhere else, not sure). # new_torso_geom_size = torso_geom_size * size_scaling_factor # new_torso_geom_fromto = FromTo(start=new_torso_max, end=new_torso_min) # new_torso_pos = (new_torso_max + new_torso_min) / 2 # new_rootz_joint_ref = new_torso_pos.z # new_rooty_joint_pos = new_torso_pos # # Update the fields of the different elements. # torso_body.set("pos", new_torso_pos.to_str()) # torso_geom.set("fromto", new_torso_geom_fromto.to_str()) # torso_geom.set("size", new_torso_geom_size) # # TODO: Not sure if this makes sense: The rooty joint has a Pos that coincides # # with the torso pos. # new_torso_pos.set_in_element(rooty_joint) # # TODO: rootz has a 'ref' which also coincides with the torso pos. # rootz_joint.set("ref", str(new_rootz_joint_ref)) # rooty_joint.set("pos", new_rooty_joint_pos) # new_torso_pos = new_torso_geom_fromto.center # # TODO: Also move the camera? # world_body: Optional[Element] = None # if tree is not None: # assert tree is not None, "need the tree if torso_body is not given!" # world_body = get_parent(tree, torso_body) # # Don't change the scaling of the parent, if this body part was scaled! # parent_scale_factor = 1 if size_scaling_factor != 1 else size_scaling_factor # update_world( # tree=tree, # world_body=world_body, # new_torso_min=new_torso_min, # new_torso_max=new_torso_max, # size_scaling_factor=parent_scale_factor, # **kwargs, # ) # def update_thigh( # tree: ElementTree = None, # thigh_body: Element = None, # new_thigh_min: Pos = None, # new_thigh_max: Pos = None, # size_scaling_factor: float = None, # **kwargs, # ) -> None: # """'move' the thigh and its endpoints. This moves all relevant geoms and # joints and then moves the torso by calling `update_torso`. # """ # # TODO: # new_torso_min = new_thigh_max # new_torso_max = todo # torso_body = get_parent(tree, thigh_body) # update_torso( # torso_body, # new_torso_min=new_torso_min, # new_torso_max=new_torso_max, # size_scaling_factor=size_scaling_factor, # new_thigh_min=new_thigh_min, # new_thigh_max=new_thigh_max, # **kwargs, # ) # def update_thigh( # tree: ElementTree = None, # thigh_body: Element = None, # new_thigh_min: Pos = None, # new_thigh_max: Pos = None, # size_scaling_factor: float = None, # **kwargs, # ) -> None: # """'move' the thigh and its endpoints. This moves all relevant geoms and # joints and then moves the torso by calling `update_torso`. # """ # new_torso_min = NotImplemented # new_thigh_max = NotImplemented # torso_body = get_parent(tree, thigh_body) # update_torso( # torso_body, # new_torso_min=new_torso_min, # size_scaling_factor=size_scaling_factor, # new_thigh_min=new_thigh_min, # new_thigh_max=new_thigh_max, # Pass it in case the above components need it. # **kwargs, # ) # def scale_size(tree: ElementTree, body_name: str, scale: float) -> str: # tree = copy.deepcopy(tree) # target_body: Element = tree.find(f".//body[@name='{body_name}']") # parent_map: Dict[Element, Element] = {c: p for p in tree.iter() for c in p} # if body_name == "torso": # update_torso(tree, torso_body=target_body, size_scaling_factor=scale) # raise NotImplementedError(f"WIP") ================================================ FILE: sequoia/settings/rl/envs/mujoco/hopper_test.py ================================================ from sequoia.conftest import mujoco_required pytestmark = mujoco_required import inspect import itertools import os from pathlib import Path from typing import ClassVar, Type from xml.etree.ElementTree import ElementTree, fromstring import pytest from gym.envs.mujoco import MujocoEnv from sequoia.conftest import mujoco_required from .hopper import ContinualHopperV2Env, ContinualHopperV3Env from .modified_gravity_test import ModifiedGravityEnvTests from .modified_mass_test import ModifiedMassEnvTests from .modified_size_test import ModifiedSizeEnvTests # # TODO: There is a bug in the way the hopper XML is generated, where the sticks / joints don't seem to follow. # bob = ContinualHopperEnv(body_name_to_size_scale={"thigh": 2}) # assert False, bob @mujoco_required class TestContinualHopperV2Env(ModifiedGravityEnvTests, ModifiedSizeEnvTests, ModifiedMassEnvTests): Environment: ClassVar[Type[ContinualHopperV2Env]] = ContinualHopperV2Env @mujoco_required class TestContinualHopperV3Env(ModifiedGravityEnvTests, ModifiedSizeEnvTests, ModifiedMassEnvTests): Environment: ClassVar[Type[ContinualHopperV3Env]] = ContinualHopperV3Env def load_tree(model_path: Path) -> ElementTree: # model_path = "hopper.xml" if model_path.startswith("/"): full_path = model_path else: full_path = os.path.join( os.path.dirname(inspect.getsourcefile(MujocoEnv)), "assets", model_path ) if not os.path.exists(full_path): raise IOError(f"File {full_path} does not exist") with open(model_path, "r") as f: return f.read() default_hopper_body_xml = f"""\ """ def elements_equal(e1, e2) -> bool: """Taken from https://stackoverflow.com/a/24349916/6388696""" assert e1.tag == e2.tag assert e1.text == e2.text assert e1.tail == e2.tail assert e1.attrib == e2.attrib assert len(e1) == len(e2) assert all(elements_equal(c1, c2) for c1, c2 in zip(e1, e2)) @pytest.mark.xfail(reason="Dropping this for now, XML is really annoying.") @pytest.mark.parametrize( "input_xml_str, scale_factor, output_xml_str", [ ( default_hopper_body_xml, 1.0, default_hopper_body_xml, ), ( default_hopper_body_xml, 2.0, f"""\ """, ), ], ids=(f"param{i}" for i in itertools.count()), ) def test_change_torso(input_xml_str: str, scale_factor: float, output_xml_str: str): # # TODO: Get rid of annoying whitespace issues! pass input_tree = fromstring(input_xml_str) expected = fromstring(output_xml_str) # from io import StringIO # in_file = StringIO(input_xml_str) # out_file = StringIO(output_xml_str) # input_tree = parse(in_file) # expected = parse(out_file) update_torso(tree=input_tree, size_scale_factor=scale_factor) # import textwrap # from xml.dom import minidom # result = minidom.parseString(tostring(input_tree, method="text")).toprettyxml() result = input_tree assert elements_equal(result, expected) # expected = minidom.parseString().toprettyxml() assert result == expected ================================================ FILE: sequoia/settings/rl/envs/mujoco/modified_friction.py ================================================ """ TODO: Wrapper that modifies the friction, if possible on-the-fly. """ from typing import ClassVar from gym.envs.mujoco import MujocoEnv class ModifiedFrictionEnv(MujocoEnv): """ Allows the gravity to be changed. Adapted from https://github.com/Breakend/gym-extensions/blob/master/gym_extensions/continuous/mujoco/gravity_envs.py """ # IDEA: Use somethign like this to tell appart modifications which can be applied # on-the-fly on a given env to get multiple tasks, vs those that require creating a # new environment for each task. CAN_BE_UPDATED_IN_PLACE: ClassVar[bool] = True ================================================ FILE: sequoia/settings/rl/envs/mujoco/modified_friction_test.py ================================================ """ TODO: Tests for the 'modified friction' mujoco envs. """ ================================================ FILE: sequoia/settings/rl/envs/mujoco/modified_gravity.py ================================================ import warnings from typing import ClassVar from gym.envs.mujoco import MujocoEnv from sequoia.utils.logging_utils import get_logger logger = get_logger(__name__) class ModifiedGravityEnv(MujocoEnv): """ Allows the gravity to be changed. Adapted from https://github.com/Breakend/gym-extensions/blob/master/gym_extensions/continuous/mujoco/gravity_envs.py """ # IDEA: Use somethign like this to tell appart modifications which can be applied # on-the-fly on a given env to get multiple tasks, vs those that require creating a # new environment for each task. CAN_BE_UPDATED_IN_PLACE: ClassVar[bool] = True def __init__(self, model_path: str, frame_skip: int, gravity: float = -9.81, **kwargs): super().__init__(model_path=model_path, frame_skip=frame_skip, **kwargs) # self.model.opt.gravity = (mujoco_py.mjtypes.c_double * 3)(*[0., 0., gravity]) if gravity != -9.81: self.model.opt.gravity[2] = gravity # self.model._compute_subtree() # self.model.forward() self.sim.forward() # self.sim: MjSim logger.debug(f"Setting initial gravity to {self.gravity}") @property def gravity(self) -> float: return self.model.opt.gravity[2] @gravity.setter def gravity(self, value: float) -> None: # TODO: Seems to be bad practice to modify memory in-place for some reason? self.model.opt.gravity[2] = value # self.model.opt.gravity[2] = - abs(value) def set_gravity(self, value: float) -> None: if value >= 0: warnings.warn( RuntimeWarning( "Not a good idea to use a positive value! (things will start to float)" ) ) # IDEA: always convert to negative value in the setter? self.gravity = value ================================================ FILE: sequoia/settings/rl/envs/mujoco/modified_gravity_test.py ================================================ """ TODO: Tests for the 'modified gravity' mujoco envs. """ from typing import ClassVar, Type, TypeVar from gym.wrappers import TimeLimit from sequoia.conftest import mujoco_required pytestmark = mujoco_required from .modified_gravity import ModifiedGravityEnv EnvType = TypeVar("EnvType", bound=ModifiedGravityEnv) class ModifiedGravityEnvTests: Environment: ClassVar[Type[EnvType]] # @pytest.mark.xfail(reason="The condition doesn't always work.") def test_change_gravity_each_step(self): env: ModifiedGravityEnv = self.Environment() max_episode_steps = 50 n_episodes = 3 # NOTE: Interestingly, the renderer will show # `env.frame_skip * max_episode_steps` frames per episode, even when # "Ren[d]er every frame" is set to False. env = TimeLimit(env, max_episode_steps=max_episode_steps) total_steps = 0 for episode in range(n_episodes): initial_state = env.reset() done = False episode_steps = 0 start_y = initial_state[1] moved_up = 0 previous_state = initial_state state = initial_state while not done: previous_state = state state, reward, done, info = env.step(env.action_space.sample()) env.render("human") episode_steps += 1 total_steps += 1 # decrease the gravity continually over time. # By the end, things should be floating. env.set_gravity(-10 + 5 * total_steps / max_episode_steps) moved_up += state[1] > previous_state[1] # print(f"Moving upward? {obs[1] > state[1]}") if episode_steps != max_episode_steps: print(f"Episode ended early?") print(f"Gravity at end of episode: {env.gravity}") # TODO: Check that the position (in the observation) is obeying gravity? # if env.gravity <= 0: # # Downward force, so should not have any significant preference for # # moving up vs moving down. # assert 0.4 <= (moved_up / max_episode_steps) <= 0.6, env.gravity # # if env.gravity == 0: # # assert 0.5 <= (moved_up / max_episode_steps) <= 1.0 # if env.gravity > 0: # assert 0.5 <= (moved_up / max_episode_steps) <= 1.0, env.gravity assert total_steps <= n_episodes * max_episode_steps initial_z = env.init_qpos[1] final_z = env.sim.data.qpos[1] if env.gravity > 0: assert final_z > initial_z # TODO: These checks aren't deterministic, and only really "work" with # half-cheetah. # assert initial_z == 0 # Check that the robot is high up in the sky! :D # assert final_z > 3 # assert False, (env.init_qpos, env.sim.data.qpos) def test_task_schedule(self): # TODO: Reuse this test (and perhaps others from multi_task_environment_test.py) # but with this continual_half_cheetah instead of cartpole. original = self.Environment() starting_gravity = original.gravity task_schedule = { 10: dict(gravity=starting_gravity), 20: dict(gravity=-12.0), 30: dict(gravity=0.9), } from sequoia.common.gym_wrappers import MultiTaskEnvironment env = MultiTaskEnvironment(original, task_schedule=task_schedule) env.seed(123) env.reset() for step in range(100): _, _, done, _ = env.step(env.action_space.sample()) # env.render() if done: env.reset() if 0 <= step < 10: assert env.gravity == starting_gravity elif 10 <= step < 20: assert env.gravity == starting_gravity elif 20 <= step < 30: assert env.gravity == -12.0 elif step >= 30: assert env.gravity == 0.9 env.close() ================================================ FILE: sequoia/settings/rl/envs/mujoco/modified_mass.py ================================================ from functools import partial from typing import ClassVar, Dict, List, TypeVar, Union import numpy as np from gym.envs.mujoco import MujocoEnv V = TypeVar("V") class ModifiedMassEnv(MujocoEnv): """ Allows the mass of body parts to be changed. NOTE: Haven't yet checked how this affects the physics simulation! Might not be 100% working. """ # IDEA: Use somethign like this to tell appart modifications which can be applied # on-the-fly on a given env to get multiple tasks, vs those that require creating a # new environment for each task. CAN_BE_UPDATED_IN_PLACE: ClassVar[bool] = True BODY_NAMES: ClassVar[List[str]] def __init__( self, model_path: str, frame_skip: int, body_name_to_mass_scale: Dict[str, float] = None, **kwargs, ): super().__init__( model_path=model_path, frame_skip=frame_skip, **kwargs, ) self.body_name_to_mass_scale = body_name_to_mass_scale or {} self.default_masses_dict: Dict[str, float] = { body_name: self.model.body_mass[i] for i, body_name in enumerate(self.model.body_names) } self.default_masses: np.ndarray = np.copy(self.model.body_mass) # dict(zip(body_parts, mass_scales)) self.scale_masses(**self.body_name_to_mass_scale) # self.model.body_mass = self.get_and_modify_bodymass(body_part, mass_scale) # self.model._compute_subtree() # self.model.forward() def __init_subclass__(cls): super().__init_subclass__() # Add auto-generated properties for getting and setting the mass of the bodyparts. for body_part in cls.BODY_NAMES: property_name = f"{body_part}_mass" mass_property = property( fget=partial(cls.get_mass, body_part=body_part), fset=partial(cls._mass_setter, body_part), ) setattr(cls, property_name, mass_property) def _update(self) -> None: """'Update' the model, if necessary, after a change has occured to the mass. TODO: Not sure if this is entirely correct """ # self.model._compute_subtree() # self.model.forward() def reset_masses(self) -> None: """Resets the masses to their default values.""" # NOTE: Use [:] to modify in-place, just in case there are any # pointer-shenanigans going on on the C side. self.model.body_mass[:] = self.default_masses # self.model._compute_subtree() #TODO: Not sure about this call # self.model.forward() def get_masses_dict(self) -> Dict[str, float]: return { body_name: self.model.body_masses[i] for i, body_name in enumerate(self.model.body_names) } def set_mass(self, **body_name_to_mass: Dict[str, Union[int, float]]) -> None: # Will raise an IndexError if the body part isnt found. # _set_mass(self, body_part=body_part, mass=mass) for body_part, mass in body_name_to_mass.items(): idx = self.model.body_names.index(body_part) self.model.body_mass[idx] = mass def get_mass(self, body_part: str) -> float: # Will raise an IndexError if the body part isnt found. if body_part not in self.model.body_names: raise ValueError( f"No body named {body_part} in this mujoco model! (body names: " f"{self.model.body_names})." ) idx = self.model.body_names.index(body_part) return self.model.body_mass[idx] def scale_masses( self, body_parts: List[str] = None, mass_scales: List[float] = None, **body_name_to_mass_scale, ) -> Dict[str, float]: """Scale the (original) mass of body parts of the Mujoco model. Returns a dictionary with the new masses. """ new_masses: Dict[str, float] = {} body_parts = body_parts or [] mass_scales = mass_scales or [] body_name_to_mass_scale = body_name_to_mass_scale or {} self.reset_masses() body_name_to_mass_scale.update(zip(body_parts, mass_scales)) for body_name, mass_scale in body_name_to_mass_scale.items(): current_mass = self.get_mass(body_name) new_mass = mass_scale * current_mass self.set_mass(**{body_name: new_mass}) new_masses[body_name] = new_mass # Not sure if we need to do this? self._update() return new_masses def get_and_modify_bodymass(self, body_name: str, scale: float): idx = self.model.body_names.index(body_name) temp = np.copy(self.model.body_mass) temp[idx] *= scale return temp @staticmethod def _mass_setter(body_part: str, env: MujocoEnv, mass: float) -> None: """Function used to set the mass of a body part. This is used as the setter of the generated `_mass` properties. """ # Will raise an IndexError if the body part isnt found. idx = env.model.body_names.index(body_part) env.model.body_mass[idx] = mass # def _get_mass(env: MujocoEnv, /, body_part: str) -> float: # # Will raise an IndexError if the body part isnt found. # idx = env.model.body_names.index(body_part) # return env.model.body_mass[idx] ================================================ FILE: sequoia/settings/rl/envs/mujoco/modified_mass_test.py ================================================ """ TODO: Tests for the 'modified gravity' mujoco envs. """ import operator from typing import ClassVar, List, Type from gym.wrappers import TimeLimit from sequoia.conftest import mujoco_required pytestmark = mujoco_required from .modified_mass import ModifiedMassEnv class ModifiedMassEnvTests: Environment: ClassVar[Type[ModifiedMassEnv]] # names of the parts of the model which can be changed. body_names: ClassVar[List[str]] def test_generated_properties_change_the_actual_mass(self): env = self.Environment() for body_name in self.Environment.BODY_NAMES: # Get the value directly from the mujoco model. model_value = env.model.body_mass[env.model.body_names.index(body_name)] assert getattr(env, f"{body_name}_mass") == model_value new_value = model_value * 2 setattr(env, f"{body_name}_mass", new_value) model_value = env.model.body_mass[env.model.body_names.index(body_name)] assert model_value == new_value def test_change_mass_each_step(self): env: ModifiedMassEnv = self.Environment() max_episode_steps = 200 n_episodes = 3 # NOTE: Interestingly, the renderer will show # `env.frame_skip * max_episode_steps` frames per episode, even when # "Ren[d]er every frame" is set to False. env = TimeLimit(env, max_episode_steps=max_episode_steps) env: ModifiedMassEnv total_steps = 0 for episode in range(n_episodes): initial_state = env.reset() done = False episode_steps = 0 start_y = initial_state[1] moved_up = 0 previous_state = initial_state state = initial_state body_part = self.Environment.BODY_NAMES[0] start_mass = env.get_mass(body_part) while not done: previous_state = state state, reward, done, info = env.step(env.action_space.sample()) env.render("human") episode_steps += 1 total_steps += 1 env.set_mass(**{body_part: start_mass + 5 * total_steps / max_episode_steps}) moved_up += state[1] > previous_state[1] print(f"Moving upward? {moved_up}") initial_z = env.init_qpos[1] final_z = env.sim.data.qpos[1] # TODO: Check that the change in mass had an impact def test_set_mass_with_task_schedule(self): body_part = "torso" original = self.Environment() starting_mass = original.get_mass("torso") task_schedule = { 10: dict(), 20: operator.methodcaller("set_mass", torso=starting_mass * 2), 30: operator.methodcaller("set_mass", torso=starting_mass * 4), } from sequoia.common.gym_wrappers import MultiTaskEnvironment env = MultiTaskEnvironment(original, task_schedule=task_schedule) env.seed(123) env.reset() for step in range(100): _, _, done, _ = env.step(env.action_space.sample()) # env.render() if done: env.reset() if 0 <= step < 10: assert env.get_mass(body_part) == starting_mass, step elif 10 <= step < 20: assert env.get_mass(body_part) == starting_mass, step elif 20 <= step < 30: assert env.get_mass(body_part) == starting_mass * 2, step elif step >= 30: assert env.get_mass(body_part) == starting_mass * 4, step env.close() ================================================ FILE: sequoia/settings/rl/envs/mujoco/modified_size.py ================================================ import hashlib import inspect import os import tempfile import xml.etree.ElementTree as ET from copy import deepcopy from logging import getLogger as get_logger from pathlib import Path from typing import ClassVar, Dict, List from gym.envs.mujoco import MujocoEnv logger = get_logger(__name__) def change_size_in_xml( tree: ET.ElementTree, **body_name_to_size_scale: Dict[str, float] ) -> ET.ElementTree: tree = deepcopy(tree) for body_name, size_scale in body_name_to_size_scale.items(): body = tree.find(f".//body[@name='{body_name}']") geom = tree.find(f".//geom[@name='{body_name}']") if geom is None: geom = tree.find(f".//geom[@name='{body_name}_geom']") assert geom is not None assert "size" in geom.attrib # print(body_name) # print("Old size: ", geom.attrib["size"]) sizes: List[float] = [float(s) for s in geom.attrib["size"].split(" ")] new_sizes = [size * size_scale for size in sizes] geom.attrib["size"] = " ".join(map(str, new_sizes)) # print("New size: ", geom.attrib['size']) return tree def get_geom_sizes(tree: ET.ElementTree, body_name: str) -> List[float]: # body = tree.find(f".//body[@name='{body_name}']") geom = tree.find(f".//geom[@name='{body_name}']") if geom is None: geom = tree.find(f".//geom[@name='{body_name}_geom']") assert geom is not None assert "size" in geom.attrib # print(body_name) # print("Old size: ", geom.attrib["size"]) sizes: List[float] = [float(s) for s in geom.attrib["size"].split(" ")] return sizes class ModifiedSizeEnv(MujocoEnv): """ Allows changing the size of the body parts. TODO: This currently can modify the geometry in-place (at least visually) with the `self.model.geom_size` ndarray, but the joints don't follow the change in length. """ BODY_NAMES: ClassVar[List[str]] # IDEA: Use somethign like this to tell appart modifications which can be applied # on-the-fly on a given env to get multiple tasks, vs those that require creating a # new environment for each task. CAN_BE_UPDATED_IN_PLACE: ClassVar[bool] = False def __init__( self, model_path: str, frame_skip: int, # TODO: IF using one or more of these `Modified` buffers, then we need to # get each one a distinct argument name, which isn't ideal! body_parts: List[str] = None, # Has to be the name of a geom, not of a body! size_scales: List[float] = None, body_name_to_size_scale: Dict[str, float] = None, **kwargs, ): body_parts = body_parts or [] size_scales = size_scales or [] body_name_to_size_scale = body_name_to_size_scale or {} body_name_to_size_scale.update(zip(body_parts, size_scales)) if model_path.startswith("/"): full_path = model_path else: full_path = os.path.join( os.path.dirname(inspect.getsourcefile(MujocoEnv)), "assets", model_path ) if not os.path.exists(full_path): raise IOError(f"File {full_path} does not exist") # find the body_part we want if any(scale_factor == 0 for scale_factor in size_scales): raise RuntimeError("Can't use a scale_factor of 0!") logger.debug(f"Default XML path: {full_path}") self.default_tree = ET.parse(full_path) self.tree = self.default_tree if body_name_to_size_scale: logger.debug(f"Changing parts: {body_name_to_size_scale}") self.tree = change_size_in_xml(self.default_tree, **body_name_to_size_scale) # create new xml # IDEA: Create an XML file with a unique name somewhere, and then write the hash_str = hashlib.md5((str(self) + str(body_name_to_size_scale)).encode()).hexdigest() temp_dir = Path(tempfile.gettempdir()) new_xml_path = temp_dir / f"{hash_str}.xml" if not new_xml_path.parent.exists(): new_xml_path.parent.mkdir(exist_ok=False, parents=True) self.tree.write(str(new_xml_path)) logger.debug(f"Generated XML path: {new_xml_path}") # Update the value to be passed to the constructor: full_path = str(new_xml_path) self.body_name_to_size_scale = body_name_to_size_scale # load the modified xml super().__init__(model_path=full_path, frame_skip=frame_skip, **kwargs) ================================================ FILE: sequoia/settings/rl/envs/mujoco/modified_size_test.py ================================================ """ TODO: Tests for the 'modified size' mujoco envs. """ from typing import ClassVar, List, Type import numpy as np from gym.wrappers import TimeLimit from sequoia.conftest import mujoco_required pytestmark = mujoco_required from .modified_size import ModifiedSizeEnv, get_geom_sizes class ModifiedSizeEnvTests: Environment: ClassVar[Type[ModifiedSizeEnv]] def test_change_size_per_task(self): body_part = self.Environment.BODY_NAMES[0] nb_tasks = 2 max_episode_steps = 200 n_episodes = 2 scale_factors: List[float] = [ (0.5 + 2 * (task_id / nb_tasks)) for task_id in range(nb_tasks) ] default_tree = self.Environment().default_tree default_sizes: List[str] = get_geom_sizes(default_tree, body_part) task_envs: List[EnvType] = [ # RenderEnvWrapper( TimeLimit( self.Environment(body_name_to_size_scale={body_part: scale_factor}), max_episode_steps=max_episode_steps, ) # ) for task_id, scale_factor in enumerate(scale_factors) ] for task_id, task_env in enumerate(task_envs): task_scale_factor = scale_factors[task_id] for episode in range(n_episodes): size = get_geom_sizes(task_env.tree, body_part) expected_size = [default_size * task_scale_factor for default_size in default_sizes] print( f"default sizes: {default_sizes}, Size: {size}, " f"task_scale_factor: {task_scale_factor}" ) assert np.allclose(size, expected_size) state = task_env.reset() done = False steps = 0 while not done: obs, reward, done, info = task_env.step(task_env.action_space.sample()) steps += 1 # NOTE: Uncomment to visually inspect. task_env.render("human") task_env.close() ================================================ FILE: sequoia/settings/rl/envs/mujoco/modified_wall.py ================================================ """ TODO: DO the same for the WallEnv from gym-extensions. """ # HalfCheetahWallEnv = lambda *args, **kwargs: WallEnvFactory(ModifiedHalfCheetahEnv)( # model_path=os.path.dirname(gym.envs.mujoco.__file__) + "/assets/half_cheetah.xml", # ori_ind=-1, # *args, # **kwargs # ) ================================================ FILE: sequoia/settings/rl/envs/mujoco/mujoco_model_utils.py ================================================ from dataclasses import dataclass from typing import Any, NamedTuple, Sequence, Tuple, Union from xml.etree.ElementTree import Element import numpy as np def pos_to_str(pos: Tuple[float, ...]) -> str: return " ".join("0" if v == 0 else str(round(v, 5)) for v in pos) def str_to_pos(pos_str: str) -> "Pos": return Pos(*[float(v) for v in pos_str.split()]) class Pos(NamedTuple): x: float y: float z: float def to_str(self) -> str: """Return the 'str' version of `self` to be placed in a 'pos' field in the XML.""" return pos_to_str(self) @classmethod def from_str(cls, pos_str: str) -> "Pos": return cls(*[float(v) for v in pos_str.split()]) def __mul__(self, value: Union[int, float, np.ndarray]) -> "Pos": if isinstance(value, (int, float)): value = [value for _ in range(len(self))] if not isinstance(value, (list, tuple, np.ndarray)): return NotImplemented assert len(value) == len(self) return type(self)(*[v * axis_scaling_coef for v, axis_scaling_coef in zip(self, value)]) def __eq__(self, other: Union[Tuple[float, ...], np.ndarray]): if not isinstance(other, (list, tuple, np.ndarray)): return NotImplemented return np.isclose(np.asfarray(self), np.asfarray(other)).all() def __rmul__(self, value: Any): return self * value def __truediv__(self, other: Union[int, float, Sequence[float]]): if isinstance(other, (int, float)): other = [other for _ in range(len(self))] if not isinstance(other, (list, tuple, np.ndarray)): return NotImplemented assert len(other) == len(self) return type(self)(*[v / v_other for v, v_other in zip(self, other)]) def __add__(self, other: Union[int, float, np.ndarray]) -> "Pos": if isinstance(other, (int, float)): other = [other for _ in range(len(self))] if not isinstance(other, (list, tuple, np.ndarray)): return NotImplemented assert len(other) == len(self) return type(self)(*[v + v_other for v, v_other in zip(self, other)]) def __radd__(self, other: Any) -> "Pos": return self + other def __neg__(self) -> "Pos": return type(self)(*[-v for v in self]) def __sub__(self, other: Union[int, float, np.ndarray]) -> "Pos": if isinstance(other, (int, float)): other = [other for _ in range(len(self))] if not isinstance(other, (list, tuple, np.ndarray)): return NotImplemented assert len(other) == len(self) return self + (-other) # return type(self)(*[v + v_other for v, v_other in zip(self, other)]) def __rsub__(self, other: Any) -> "Pos": return (-self) + other @classmethod def of_element(cls, element: Element, field: str = "pos") -> "Pos": if field not in element.attrib: raise RuntimeError(f"Element {element} doesn't have a '{field}' attribute.") return cls.from_str(element.attrib[field]) def set_in_element(self, element: Element, field: str = "pos") -> None: if field not in element.attrib: # NOTE: Refusing to set a new field for now. raise RuntimeError(f"Element {element} doesn't have a '{field}' attribute.") element.set(field, self.to_str()) class FromTo(NamedTuple): start: Pos end: Pos def to_str(self) -> str: """Return the 'str' version of `self` to be placed in a 'pos' field in the XML.""" return self.start.to_str() + " " + self.end.to_str() @classmethod def from_str(cls, fromto: str) -> "FromTo": values = [float(v) for v in fromto.split()] assert len(values) == 6 return cls(Pos(*values[:3]), Pos(*values[3:])) @classmethod def of_element(cls, element: Element, field: str = "fromto") -> "FromTo": if field not in element.attrib: raise RuntimeError(f"Element {element} doesn't have a '{field}' attribute.") return cls.from_str(element.attrib.get(field)) def set_in_element(self, element: Element, field: str = "fromto") -> None: if field not in element.attrib: # NOTE: Refusing to set a new field for now. raise RuntimeError(f"Element {element} doesn't have a '{field}' attribute.") element.set(field, self.to_str()) @property def center(self) -> Pos: return (self.start + self.end) / 2 import textwrap @dataclass class FromTo: from_x: float from_y: float from_z: float to_x: float to_y: float to_z: float def __str__(self): return " ".join([self.from_x, self.from_y, self.from_z, self.to_x, self.to_y, self.to_z]) from dataclasses import dataclass @dataclass class TorsoGeom: friction: float = 0.9 fromto = FromTo(0, 0, 1.45, 0, 0, 1.05) name: str = "torso_geom" size: float = 0.05 type: str = "capsule" def render_xml(self) -> str: return f"""""" @dataclass class HoperV3Model: torso_geom: TorsoGeom def render_xml(self) -> str: return textwrap.dedent( """\ """ ) ================================================ FILE: sequoia/settings/rl/envs/mujoco/walker2d.py ================================================ from typing import ClassVar, Dict, List, Tuple from gym.envs.mujoco import MujocoEnv from gym.envs.mujoco.walker2d import Walker2dEnv as _Walker2dV2Env from gym.envs.mujoco.walker2d_v3 import Walker2dEnv as _Walker2dV3Env from .modified_gravity import ModifiedGravityEnv from .modified_mass import ModifiedMassEnv from .modified_size import ModifiedSizeEnv class Walker2dV2Env(_Walker2dV2Env): """ Simply allows changing of XML file, probably not necessary if we pull request the xml name as a kwarg in openai gym """ BODY_NAMES: ClassVar[List[str]] = [ "torso", "thigh", "leg", "foot", "thigh_left", "leg_left", "foot_left", ] def __init__(self, model_path: str = "walker2d.xml", frame_skip: int = 4): MujocoEnv.__init__(self, model_path=model_path, frame_skip=frame_skip) class Walker2dV3Env(_Walker2dV3Env): BODY_NAMES: ClassVar[List[str]] = [ "torso", "thigh", "leg", "foot", "thigh_left", "leg_left", "foot_left", ] def __init__( self, model_path: str = "walker2d.xml", forward_reward_weight: float = 1.0, ctrl_cost_weight: float = 1e-3, healthy_reward: float = 1.0, terminate_when_unhealthy: bool = True, healthy_z_range: Tuple[float, float] = (0.8, 2.0), healthy_angle_range: Tuple[float, float] = (-1.0, 1.0), reset_noise_scale: float = 5e-3, exclude_current_positions_from_observation: bool = True, xml_file: str = None, frame_skip: int = 4, ): if frame_skip != 4: raise NotImplementedError("todo: Add a frame_skip arg to the gym class.") super().__init__( xml_file=xml_file or model_path, forward_reward_weight=forward_reward_weight, ctrl_cost_weight=ctrl_cost_weight, healthy_reward=healthy_reward, terminate_when_unhealthy=terminate_when_unhealthy, healthy_z_range=healthy_z_range, healthy_angle_range=healthy_angle_range, reset_noise_scale=reset_noise_scale, exclude_current_positions_from_observation=exclude_current_positions_from_observation, ) class Walker2dGravityEnv(ModifiedGravityEnv, Walker2dV2Env): # NOTE: This environment could be used in ContinualRL! def __init__( self, model_path: str = "walker2d.xml", frame_skip: int = 4, gravity: float = -9.81, ): super().__init__(model_path=model_path, frame_skip=frame_skip, gravity=gravity) class ContinualWalker2dV2Env(ModifiedGravityEnv, ModifiedSizeEnv, ModifiedMassEnv, Walker2dV2Env): def __init__( self, model_path: str = "walker2d.xml", frame_skip: int = 4, gravity=-9.81, body_name_to_size_scale: Dict[str, float] = None, body_name_to_mass_scale: Dict[str, float] = None, ): super().__init__( model_path=model_path, frame_skip=frame_skip, gravity=gravity, # body_parts=body_parts, # size_scales=size_scales, body_name_to_size_scale=body_name_to_size_scale, body_name_to_mass_scale=body_name_to_mass_scale, ) class ContinualWalker2dV3Env(ModifiedGravityEnv, ModifiedSizeEnv, ModifiedMassEnv, Walker2dV3Env): # def __init__(self, model_path, frame_skip, gravity=-9.81, **kwargs): # super().__init__(model_path, frame_skip, gravity=gravity, **kwargs) def __init__( self, model_path: str = "walker2d.xml", forward_reward_weight: float = 1.0, ctrl_cost_weight: float = 1e-3, healthy_reward: float = 1.0, terminate_when_unhealthy: bool = True, healthy_z_range: Tuple[float, float] = (0.8, 2.0), healthy_angle_range: Tuple[float, float] = (-1.0, 1.0), reset_noise_scale: float = 5e-3, exclude_current_positions_from_observation: bool = True, gravity=-9.81, body_name_to_size_scale: Dict[str, float] = None, body_name_to_mass_scale: Dict[str, float] = None, xml_file: str = None, frame_skip: int = 4, ): if frame_skip != 4: raise NotImplementedError("todo: Add a frame_skip arg to the gym class.") super().__init__( model_path=model_path, frame_skip=frame_skip, xml_file=xml_file or model_path, forward_reward_weight=forward_reward_weight, ctrl_cost_weight=ctrl_cost_weight, healthy_reward=healthy_reward, terminate_when_unhealthy=terminate_when_unhealthy, healthy_z_range=healthy_z_range, healthy_angle_range=healthy_angle_range, reset_noise_scale=reset_noise_scale, exclude_current_positions_from_observation=exclude_current_positions_from_observation, body_name_to_size_scale=body_name_to_size_scale, body_name_to_mass_scale=body_name_to_mass_scale, gravity=gravity, ) ================================================ FILE: sequoia/settings/rl/envs/mujoco/walker2d_test.py ================================================ from typing import ClassVar, Type from sequoia.conftest import mujoco_required from .modified_gravity_test import ModifiedGravityEnvTests from .modified_mass_test import ModifiedMassEnvTests from .modified_size_test import ModifiedSizeEnvTests from .walker2d import ContinualWalker2dV2Env, ContinualWalker2dV3Env pytestmark = mujoco_required class TestContinualWalker2dV2Env( ModifiedGravityEnvTests, ModifiedSizeEnvTests, ModifiedMassEnvTests ): Environment: ClassVar[Type[ContinualWalker2dV2Env]] = ContinualWalker2dV2Env class TestContinualWalker2dV3Env( ModifiedGravityEnvTests, ModifiedSizeEnvTests, ModifiedMassEnvTests ): Environment: ClassVar[Type[ContinualWalker2dV3Env]] = ContinualWalker2dV3Env ================================================ FILE: sequoia/settings/rl/envs/variant_spec.py ================================================ from typing import Any, Callable, Dict, Generic, List, Optional, TypeVar, Union import gym from gym.envs.registration import EnvSpec, load EnvType = TypeVar("EnvType", bound=gym.Env) _EntryPoint = Union[str, Callable[..., gym.Env]] class EnvVariantSpec(EnvSpec, Generic[EnvType]): def __init__( self, id: str, base_spec: EnvSpec, entry_point: Union[str, Callable[..., EnvType]] = None, reward_threshold: int = None, nondeterministic: bool = False, max_episode_steps=None, kwargs=None, ): super().__init__( id_requested=id, entry_point=entry_point, reward_threshold=reward_threshold, nondeterministic=nondeterministic, max_episode_steps=max_episode_steps, kwargs=kwargs, ) self.base_spec = base_spec def make(self, **kwargs) -> EnvType: return super().make(**kwargs) @classmethod def of( cls, original: EnvSpec, *, new_id: str, new_reward_threshold: Optional[float] = None, new_nondeterministic: Optional[bool] = None, new_max_episode_steps: Optional[int] = None, new_kwargs: Dict[str, Any] = None, new_entry_point: Union[str, Callable[..., gym.Env]] = None, wrappers: Optional[List[Callable[[gym.Env], gym.Env]]] = None, ) -> "EnvVariantSpec": """Returns a new env spec which uses additional wrappers. NOTE: The `new_kwargs` update the current kwargs, rather than replacing them. """ new_spec_kwargs = original.kwargs new_spec_kwargs.update(new_kwargs or {}) # Replace the entry-point if desired: new_spec_entry_point: Union[str, Callable[..., EnvType]] = ( new_entry_point or original.entry_point ) new_reward_threshold = ( new_reward_threshold if new_reward_threshold is not None else original.reward_threshold ) new_nondeterministic = ( new_nondeterministic if new_nondeterministic is not None else original.nondeterministic ) new_max_episode_steps = ( new_max_episode_steps if new_max_episode_steps is not None else original.max_episode_steps ) # Add wrappers if desired. if wrappers: # Get the callable that creates the env. if callable(original.entry_point): env_fn = original.entry_point else: env_fn = load(original.entry_point) # @lebrice Not sure if there is a cleaner way to do this, maybe using # functools.reduce or functools.partial? def _new_entry_point(**kwargs) -> gym.Env: env = env_fn(**kwargs) for wrapper in wrappers: env = wrapper(env) return env new_spec_entry_point = _new_entry_point return cls( new_id, base_spec=original, entry_point=new_spec_entry_point, reward_threshold=new_reward_threshold, nondeterministic=new_nondeterministic, max_episode_steps=new_max_episode_steps, kwargs=new_spec_kwargs, ) ================================================ FILE: sequoia/settings/rl/incremental/__init__.py ================================================ from .setting import IncrementalRLSetting from .tasks import make_incremental_task ================================================ FILE: sequoia/settings/rl/incremental/objects.py ================================================ from dataclasses import dataclass from typing import Optional, Sequence, TypeVar, Union from torch import Tensor from sequoia.settings.assumptions.incremental import IncrementalAssumption from ..discrete import DiscreteTaskAgnosticRLSetting # IncrementalAssumption, DiscreteTaskAgnosticRLSetting @dataclass(frozen=True) class Observations(DiscreteTaskAgnosticRLSetting.Observations, IncrementalAssumption.Observations): """Observations from a Continual Reinforcement Learning environment.""" x: Tensor task_labels: Optional[Tensor] = None # The 'done' that is normally returned by the 'step' method. # We add this here in case a method were to iterate on the environments in the # dataloader-style so they also have access to those (i.e. for the BaseMethod). done: Optional[Union[bool, Sequence[bool]]] = None @dataclass(frozen=True) class Actions(DiscreteTaskAgnosticRLSetting.Actions, IncrementalAssumption.Actions): """Actions to be sent to a Continual Reinforcement Learning environment.""" y_pred: Tensor @dataclass(frozen=True) class Rewards(DiscreteTaskAgnosticRLSetting.Rewards, IncrementalAssumption.Rewards): """Rewards obtained from a Continual Reinforcement Learning environment.""" y: Tensor ObservationType = TypeVar("ObservationType", bound=Observations) ActionType = TypeVar("ActionType", bound=Actions) RewardType = TypeVar("RewardType", bound=Rewards) ================================================ FILE: sequoia/settings/rl/incremental/results.py ================================================ from dataclasses import dataclass from typing import ClassVar, TypeVar from sequoia.common.metrics.rl_metrics import EpisodeMetrics from sequoia.settings.assumptions.incremental_results import IncrementalResults MetricType = TypeVar("MetricsType", bound=EpisodeMetrics) @dataclass class IncrementalRLResults(IncrementalResults[MetricType]): # Higher mean reward / episode => better lower_is_better: ClassVar[bool] = False objective_name: ClassVar[str] = "Mean reward per episode" # Minimum runtime considered (in hours). # (No extra points are obtained for going faster than this.) min_runtime_hours: ClassVar[float] = 1.5 # Maximum runtime allowed (in hours). max_runtime_hours: ClassVar[float] = 12.0 ================================================ FILE: sequoia/settings/rl/incremental/setting.py ================================================ import itertools import operator import sys import warnings from dataclasses import dataclass, fields from functools import partial from itertools import islice from typing import Callable, ClassVar, Dict, List, Optional, Tuple, Type, Union import gym import numpy as np from gym import spaces from gym.envs.registration import EnvSpec from gym.utils import colorize from gym.vector.utils import batch_space from simple_parsing import list_field from simple_parsing.helpers import choice from typing_extensions import Final from sequoia.common.gym_wrappers import MultiTaskEnvironment, TransformObservation from sequoia.common.gym_wrappers.utils import is_monsterkong_env from sequoia.common.metrics import EpisodeMetrics from sequoia.common.spaces import Sparse from sequoia.common.spaces.typed_dict import TypedDictSpace from sequoia.common.transforms import Transforms from sequoia.settings.assumptions.iid_results import TaskResults from sequoia.settings.assumptions.incremental import IncrementalAssumption from sequoia.settings.base import Method from sequoia.settings.rl.continual import ContinualRLSetting from sequoia.settings.rl.envs import ( METAWORLD_INSTALLED, MTENV_INSTALLED, MUJOCO_INSTALLED, MetaWorldEnv, MTEnv, metaworld_envs, mtenv_envs, ) from sequoia.settings.rl.wrappers.task_labels import FixedTaskLabelWrapper from sequoia.utils.logging_utils import get_logger from sequoia.utils.utils import constant, dict_union, pairwise from ..discrete.setting import DiscreteTaskAgnosticRLSetting from ..discrete.setting import supported_envs as _parent_supported_envs from .objects import Actions, Observations, Rewards # type: ignore from .results import IncrementalRLResults from .tasks import IncrementalTask, is_supported, make_incremental_task, sequoia_registry logger = get_logger(__name__) # A callable that returns an env. EnvFactory = Callable[[], gym.Env] # TODO: Move this 'passing custom env for each task' feature up into DiscreteTaskAgnosticRL. # TODO: Design a better mechanism for extending this task creation. Currently, this dictionary lists # out the 'supported envs' (envs for which we have an explicit way of creating tasks). However when # the dataset is set to "MT10" for example, then that does something different: It hard-sets some # of the values of the fields on the setting! supported_envs: Dict[str, Union[str, EnvSpec]] = dict_union( _parent_supported_envs, { spec.id: spec for env_id, spec in sequoia_registry.env_specs.items() if spec.id not in _parent_supported_envs and is_supported(env_id) }, ) if METAWORLD_INSTALLED: supported_envs["MT10"] = "MT10" supported_envs["MT50"] = "MT50" supported_envs["CW10"] = "CW10" supported_envs["CW20"] = "CW20" if MUJOCO_INSTALLED: for env_name, modification, version in itertools.product( ["HalfCheetah", "Hopper", "Walker2d"], ["bodyparts", "gravity"], ["v2", "v3"] ): env_id = f"LPG-FTW-{modification}-{env_name}-{version}" supported_envs[env_id] = env_id available_datasets: Dict[str, str] = {env_id: env_id for env_id in supported_envs} @dataclass class IncrementalRLSetting(IncrementalAssumption, DiscreteTaskAgnosticRLSetting): """Continual RL setting in which: - Changes in the environment's context occur suddenly (same as in Discrete, Task-Agnostic RL) - Task boundary information (and task labels) are given at training time - Task boundary information is given at test time, but task identity is not. """ Observations: ClassVar[Type[Observations]] = Observations Actions: ClassVar[Type[Actions]] = Actions Rewards: ClassVar[Type[Rewards]] = Rewards # The function used to create the tasks for the chosen env. _task_sampling_function: ClassVar[Callable[..., IncrementalTask]] = make_incremental_task Results: ClassVar[Type[Results]] = IncrementalRLResults # Class variable that holds the dict of available environments. available_datasets: ClassVar[Dict[str, str]] = available_datasets # Which dataset/environment to use for training, validation and testing. dataset: str = choice(available_datasets, default="CartPole-v0") # # The number of tasks. By default 0, which means that it will be set # # depending on other fields in __post_init__, or eventually be just 1. # nb_tasks: int = field(0, alias=["n_tasks", "num_tasks"]) # (Copied from the assumption, just for clarity:) # TODO: Shouldn't these kinds of properties be on the class, rather than on the # instance? # Wether the task boundaries are smooth or sudden. smooth_task_boundaries: Final[bool] = constant(False) # Wether to give access to the task labels at train time. task_labels_at_train_time: Final[bool] = constant(True) # Wether to give access to the task labels at test time. task_labels_at_test_time: bool = False # NOTE: Specifying the `type` to use for the argparse argument, because of a bug in # simple-parsing that makes this not work correctly atm. train_envs: List[Union[str, Callable[[], gym.Env]]] = list_field(type=str) val_envs: List[Union[str, Callable[[], gym.Env]]] = list_field(type=str) test_envs: List[Union[str, Callable[[], gym.Env]]] = list_field(type=str) def __post_init__(self): defaults = {f.name: f.default for f in fields(self)} # NOTE: These benchmark functions don't just create the datasets, they actually set most of # the fields too! if isinstance(self.dataset, str) and self.dataset.startswith("LPG-FTW"): self.train_envs, self.val_envs, self.test_envs = make_lpg_ftw_datasets(self.dataset) # Use fewer tasks, if a custom number was passed. (NOTE: This is not ideal, same as # everywhere else that has to check against the default value) if self.nb_tasks not in {None, defaults["nb_tasks"]}: logger.info( f"Using a custom number of tasks ({self.nb_tasks}) instead of the default " f"({len(self.train_envs)})." ) self.train_envs = self.train_envs[: self.nb_tasks] self.val_envs = self.val_envs[: self.nb_tasks] self.test_envs = self.test_envs[: self.nb_tasks] self.nb_tasks = len(self.train_envs) self.max_episode_steps = self.max_episode_steps or 1_000 self.train_steps_per_task = 100_000 self.train_max_steps = self.nb_tasks * self.train_steps_per_task self.test_steps_per_task = 10_000 self.test_max_steps = self.nb_tasks * self.test_steps_per_task task_label_space = spaces.Discrete(self.nb_tasks) train_task_label_space = task_label_space if not self.task_labels_at_train_time: train_task_label_space = Sparse(train_task_label_space, sparsity=1.0) # This should be ok for now. val_task_label_space = train_task_label_space test_task_label_space = task_label_space if not self.task_labels_at_test_time: test_task_label_space = Sparse(test_task_label_space, sparsity=1.0) train_seed: Optional[int] = None valid_seed: Optional[int] = None test_seed: Optional[int] = None if self.config and self.config.seed is not None: train_seed = self.config.seed valid_seed = train_seed + 123 test_seed = train_seed + 456 self.train_envs = [ partial( create_env, env_fn=env_fn, wrappers=[ partial( FixedTaskLabelWrapper, task_label=(i if self.task_labels_at_train_time else None), task_label_space=train_task_label_space, ) ], seed=train_seed, ) for i, env_fn in enumerate(self.train_envs) ] self.val_envs = [ partial( create_env, env_fn=env_fn, wrappers=[ partial( FixedTaskLabelWrapper, task_label=(i if self.task_labels_at_train_time else None), task_label_space=val_task_label_space, ) ], seed=valid_seed, ) for i, env_fn in enumerate(self.train_envs) ] self.test_envs = [ partial( create_env, env_fn=env_fn, wrappers=[ partial( FixedTaskLabelWrapper, task_label=(i if self.task_labels_at_test_time else None), task_label_space=test_task_label_space, ) ], seed=test_seed, ) for i, env_fn in enumerate(self.train_envs) ] # Meta-World datasets: if self.dataset in ["MT10", "MT50", "CW10", "CW20"]: from metaworld import MT10, MT50, MetaWorldEnv, Task benchmarks = { "MT10": MT10, "MT50": MT50, "CW10": MT50, "CW20": MT50, } benchmark_class = benchmarks[self.dataset] logger.info( f"Creating metaworld benchmark {benchmark_class}, this might take a " f"while (~15 seconds)." ) # NOTE: Saving this attribute on `self` for the time being so that it can be inspected # by the tests if needed. However it would be best to move this benchmark stuff into a # function, same as with LPG-FTW. benchmark = benchmark_class(seed=self.config.seed if self.config else None) self._benchmark = benchmark envs: Dict[str, Type[MetaWorldEnv]] = benchmark.train_classes env_tasks: Dict[str, List[Task]] = { env_name: [task for task in benchmark.train_tasks if task.env_name == env_name] for env_name, env_class in benchmark.train_classes.items() } train_env_tasks: Dict[str, List[Task]] = {} val_env_tasks: Dict[str, List[Task]] = {} test_env_tasks: Dict[str, List[Task]] = {} test_fraction = 0.1 val_fraction = 0.1 for env_name, env_tasks in env_tasks.items(): n_tasks = len(env_tasks) n_val_tasks = int(max(1, n_tasks * val_fraction)) n_test_tasks = int(max(1, n_tasks * test_fraction)) n_train_tasks = len(env_tasks) - n_val_tasks - n_test_tasks if n_train_tasks <= 1: # Can't create train, val and test tasks. raise RuntimeError(f"There aren't enough tasks for env {env_name} ({n_tasks}) ") tasks_iterator = iter(env_tasks) train_env_tasks[env_name] = list(islice(tasks_iterator, n_train_tasks)) val_env_tasks[env_name] = list(islice(tasks_iterator, n_val_tasks)) test_env_tasks[env_name] = list(islice(tasks_iterator, n_test_tasks)) assert train_env_tasks[env_name] assert val_env_tasks[env_name] assert test_env_tasks[env_name] max_train_steps_per_task = 1_000_000 if self.dataset in ["CW10", "CW20"]: # TODO: Raise a warning if the number of tasks is non-default and set to # something different than in the benchmark # Re-create the [ContinualWorld benchmark](@TODO: Add citation here) version = 2 env_names = [ f"hammer-v{version}", f"push-wall-v{version}", f"faucet-close-v{version}", f"push-back-v{version}", f"stick-pull-v{version}", f"handle-press-side-v{version}", f"push-v{version}", f"shelf-place-v{version}", f"window-close-v{version}", f"peg-unplug-side-v{version}", ] if ( self.train_steps_per_task not in [defaults["train_steps_per_task"], None] and self.train_steps_per_task > max_train_steps_per_task ): raise RuntimeError( f"Can't use more than {max_train_steps_per_task} steps per " f"task in the {self.dataset} benchmark!" ) # TODO: Decide the number of test steps. # NOTE: Should we allow using fewer steps? # NOTE: The default value for this field is 10_000 currently, so this # check doesn't do anything. if self.dataset == "CW20": # CW20 does tasks [0 -> 10] and then [0 -> 10] again. env_names = env_names * 2 train_env_names = env_names val_env_names = env_names test_env_names = env_names else: train_env_names = list(train_env_tasks.keys()) val_env_names = list(val_env_tasks.keys()) test_env_names = list(test_env_tasks.keys()) self.nb_tasks = len(train_env_names) if self.train_max_steps not in [defaults["train_max_steps"], None]: self.train_steps_per_task = self.train_max_steps // self.nb_tasks elif self.train_steps_per_task is None: self.train_steps_per_task = max_train_steps_per_task self.train_max_steps = self.nb_tasks * self.train_steps_per_task if self.test_max_steps in [defaults["test_max_steps"], None]: if self.test_steps_per_task is None: self.test_steps_per_task = 10_000 self.test_max_steps = self.test_steps_per_task * self.nb_tasks # TODO: Double-check that the train/val/test wrappers are added to each env. self.train_envs = [ partial( make_metaworld_env, env_class=envs[env_name], tasks=train_env_tasks[env_name], ) for env_name in train_env_names ] self.val_envs = [ partial( make_metaworld_env, env_class=envs[env_name], tasks=val_env_tasks[env_name], ) for env_name in val_env_names ] self.test_envs = [ partial( make_metaworld_env, env_class=envs[env_name], tasks=test_env_tasks[env_name], ) for env_name in test_env_names ] # if is_monsterkong_env(self.dataset): # if self.force_pixel_observations: # # Add this to the kwargs that will be passed to gym.make, to make sure that # # we observe pixels, and not state. # self.base_env_kwargs["observe_state"] = False # elif self.force_state_observations: # self.base_env_kwargs["observe_state"] = True self._using_custom_envs_foreach_task: bool = False if self.train_envs: self._using_custom_envs_foreach_task = True if self.dataset == defaults["dataset"]: # avoid the `dataset` key keeping the default value of "CartPole-v0" when we pass # envs for each task (and no value for the `dataset` argument). self.dataset = None # TODO: Raise a warning if we're going to overwrite a non-default nb_tasks? self.nb_tasks = len(self.train_envs) assert self.train_steps_per_task or self.train_max_steps if self.train_steps_per_task is None: self.train_steps_per_task = self.train_max_steps // self.nb_tasks # TODO: Should we use the task schedules to tell the length of each task? if self.test_steps_per_task in [defaults["test_steps_per_task"], None]: self.test_steps_per_task = self.test_max_steps // self.nb_tasks assert self.test_steps_per_task assert self.train_steps_per_task == self.train_max_steps // self.nb_tasks, ( self.train_max_steps, self.train_steps_per_task, self.nb_tasks, ) task_schedule_keys = np.linspace( 0, self.train_max_steps, self.nb_tasks + 1, endpoint=True, dtype=int ).tolist() self.train_task_schedule = self.train_task_schedule or { key: {} for key in task_schedule_keys } self.val_task_schedule = self.train_task_schedule.copy() assert self.test_steps_per_task == self.test_max_steps // self.nb_tasks, ( self.test_max_steps, self.test_steps_per_task, self.nb_tasks, ) test_task_schedule_keys = np.linspace( 0, self.test_max_steps, self.nb_tasks + 1, endpoint=True, dtype=int ).tolist() self.test_task_schedule = self.test_task_schedule or { key: {} for key in test_task_schedule_keys } if not self.val_envs: # TODO: Use a wrapper that sets a different random seed? self.val_envs = self.train_envs.copy() if not self.test_envs: # TODO: Use a wrapper that sets a different random seed? self.test_envs = self.train_envs.copy() if ( any(self.train_task_schedule.values()) or any(self.val_task_schedule.values()) or any(self.test_task_schedule.values()) ): raise RuntimeError( "Can't use a non-empty task schedule when passing the " "train/valid/test envs." ) self.train_dataset: Union[str, Callable[[], gym.Env]] = self.train_envs[0] self.val_dataset: Union[str, Callable[[], gym.Env]] = self.val_envs[0] self.test_dataset: Union[str, Callable[[], gym.Env]] = self.test_envs[0] # TODO: Add wrappers with the fixed task id for each env, if necessary, right? else: if self.val_envs or self.test_envs: raise RuntimeError( "Can't pass `val_envs` or `test_envs` without passing `train_envs`." ) # Call super().__post_init__() (delegates up the chain: IncrementalAssumption->DiscreteRL->ContinualRL) # NOTE: This deep inheritance isn't ideal. Should probably use composition instead somehow. super().__post_init__() if self._using_custom_envs_foreach_task: # TODO: Use 'no-op' task schedules for now. # self.train_task_schedule.clear() # self.val_task_schedule.clear() # self.test_task_schedule.clear() pass # TODO: Check that all the envs have the same observation spaces! # (If possible, find a way to check this without having to instantiate all # the envs.) # TODO: If the dataset has a `max_path_length` attribute, then it's probably # a Mujoco / metaworld / etc env, and so we set a limit on the episode length to # avoid getting an error. max_path_length: Optional[int] = getattr(self._temp_train_env, "max_path_length", None) if self.max_episode_steps is None and max_path_length is not None: assert max_path_length > 0 logger.info( f"Setting the max episode steps to {max_path_length} because a 'max_path_length' " f"attribute is present on the train env." ) self.max_episode_steps = max_path_length # if self.dataset == "MetaMonsterKong-v0": # # TODO: Limit the episode length in monsterkong? # # TODO: Actually end episodes when reaching a task boundary, to force the # # level to change? # self.max_episode_steps = self.max_episode_steps or 500 # FIXME: Really annoying little bugs with these three arguments! # self.nb_tasks = self.max_steps // self.steps_per_task @property def current_task_id(self) -> int: return self._current_task_id @current_task_id.setter def current_task_id(self, value: int) -> None: if value != self._current_task_id: # Set those to False so we re-create the wrappers for each task. self._has_setup_fit = False self._has_setup_validate = False self._has_setup_test = False # TODO: No idea what the difference is between `predict` and test. self._has_setup_predict = False # TODO: There are now also teardown hooks, maybe use them? self._current_task_id = value @property def train_task_lengths(self) -> List[int]: """Gives the length of each training task (in steps for now).""" return [ task_b_step - task_a_step for task_a_step, task_b_step in pairwise(sorted(self.train_task_schedule.keys())) ] @property def train_phase_lengths(self) -> List[int]: """Gives the length of each training 'phase', i.e. the maximum number of (steps for now) that can be taken in the training environment, in a single call to .fit """ return [ task_b_step - task_a_step for task_a_step, task_b_step in pairwise(sorted(self.train_task_schedule.keys())) ] @property def current_train_task_length(self) -> int: """Deprecated field, gives back the max number of steps per task.""" if self.stationary_context: return sum(self.train_task_lengths) return self.train_task_lengths[self.current_task_id] @property def task_label_space(self) -> gym.Space: # TODO: Explore an alternative design for the task sampling, based more around # gym spaces rather than the generic function approach that's currently used? # IDEA: Might be cleaner to put this in the assumption class task_label_space = spaces.Discrete(self.nb_tasks) if not self.task_labels_at_train_time or not self.task_labels_at_test_time: sparsity = 1 if self.task_labels_at_train_time ^ self.task_labels_at_test_time: # We have task labels "50%" of the time, ish: sparsity = 0.5 task_label_space = Sparse(task_label_space, sparsity=sparsity) return task_label_space def setup(self, stage: str = None) -> None: # Called before the start of each task during training, validation and # testing. super().setup(stage=stage) # What's done in ContinualRLSetting: # if stage in {"fit", None}: # self.train_wrappers = self.create_train_wrappers() # self.valid_wrappers = self.create_valid_wrappers() # elif stage in {"test", None}: # self.test_wrappers = self.create_test_wrappers() if self._using_custom_envs_foreach_task: logger.debug( f"Using custom environments from `self.[train/val/test]_envs` for task " f"{self.current_task_id}." ) if self.stationary_context: from sequoia.settings.rl.discrete.multienv_wrappers import ( ConcatEnvsWrapper, RandomMultiEnvWrapper, RoundRobinWrapper, ) # NOTE: Here is how this supports passing custom envs for each task: We # just switch out the value of these properties, and let the # `train/val/test_dataloader` methods work as usual! wrapper_type = RandomMultiEnvWrapper if self.task_labels_at_train_time or "pytest" in sys.modules: # A RoundRobin wrapper can be used when task labels are available, # because the task labels are available anyway, so it doesn't matter # if the Method figures out the pattern in the task IDs. # A RoundRobinWrapper is also used during testing, because it # makes it easier to check that things are working correctly: for example that # each task is visited equally, even when the number of total steps is small. wrapper_type = RoundRobinWrapper # NOTE: Not instantiating all the train/val/test envs here. Instead, the multienv # wrapper will lazily instantiate the envs as needed. # self.train_envs = instantiate_all_envs_if_needed(self.train_envs) # self.val_envs = instantiate_all_envs_if_needed(self.val_envs) # self.test_envs = instantiate_all_envs_if_needed(self.test_envs) self.train_dataset = wrapper_type( self.train_envs, add_task_ids=self.task_labels_at_train_time ) self.val_dataset = wrapper_type( self.val_envs, add_task_ids=self.task_labels_at_train_time ) self.test_dataset = ConcatEnvsWrapper( self.test_envs, add_task_ids=self.task_labels_at_test_time ) elif self.known_task_boundaries_at_train_time: self.train_dataset = self.train_envs[self.current_task_id] self.val_dataset = self.val_envs[self.current_task_id] # TODO: The test loop goes through all the envs, hence this doesn't really # work. self.test_dataset = self.test_envs[self.current_task_id] else: self.train_dataset = ConcatEnvsWrapper( self.train_envs, add_task_ids=self.task_labels_at_train_time ) self.val_dataset = ConcatEnvsWrapper( self.val_envs, add_task_ids=self.task_labels_at_train_time ) self.test_dataset = ConcatEnvsWrapper( self.test_envs, add_task_ids=self.task_labels_at_test_time ) # Check that the observation/action spaces are all the same for all # the train/valid/test envs self._check_all_envs_have_same_spaces( envs_or_env_functions=self.train_envs, wrappers=self.train_wrappers, ) # TODO: Inconsistent naming between `val_envs` and `valid_wrappers` etc. self._check_all_envs_have_same_spaces( envs_or_env_functions=self.val_envs, wrappers=self.val_wrappers, ) self._check_all_envs_have_same_spaces( envs_or_env_functions=self.test_envs, wrappers=self.test_wrappers, ) else: # TODO: Should we populate the `self.train_envs`, `self.val_envs` and # `self.test_envs` fields here as well, just to be consistent? # base_env = self.dataset # def task_env(task_index: int) -> Callable[[], MultiTaskEnvironment]: # return self._make_env( # base_env=base_env, # wrappers=[], # ) # self.train_envs = [partial(gym.make, self.dataset) for i in range(self.nb_tasks)] # self.val_envs = [partial(gym.make, self.dataset) for i in range(self.nb_tasks)] # self.test_envs = [partial(gym.make, self.dataset) for i in range(self.nb_tasks)] # assert False, self.train_task_schedule pass def test_dataloader(self, batch_size: Optional[int] = None, num_workers: Optional[int] = None): if not self._using_custom_envs_foreach_task: return super().test_dataloader(batch_size=batch_size, num_workers=num_workers) # IDEA: Pretty hacky, but might be cleaner than adding fields for the moment. test_max_steps = self.test_max_steps test_max_episodes = self.test_max_episodes self.test_max_steps = test_max_steps // self.nb_tasks if self.test_max_episodes: self.test_max_episodes = test_max_episodes // self.nb_tasks # self.test_env = self.TestEnvironment(self.test_envs[self.current_task_id]) task_test_env = super().test_dataloader(batch_size=batch_size, num_workers=num_workers) self.test_max_steps = test_max_steps self.test_max_episodes = test_max_episodes return task_test_env def test_loop(self, method: Method["IncrementalRLSetting"]): if not self._using_custom_envs_foreach_task: return super().test_loop(method) # TODO: If we're using custom envs for each task, then the test loop needs to be # re-organized. # raise NotImplementedError( # f"TODO: Need to add a wrapper that can switch between envs, or " # f"re-write the test loop." # ) assert self.nb_tasks == len(self.test_envs), "assuming this for now." test_envs = [] for task_id in range(self.nb_tasks): # TODO: Make sure that self.test_dataloader() uses the right number of steps # per test task (current hard-set to self.test_max_steps). task_test_env = self.test_dataloader() test_envs.append(task_test_env) # TODO: Move these wrappers to sequoia/common/gym_wrappers/multienv_wrappers or something, # and then import them correctly at the top of this file. from ..discrete.multienv_wrappers import ConcatEnvsWrapper task_label_space = spaces.Discrete(self.nb_tasks) if self.batch_size is not None: task_label_space = batch_space(task_label_space, self.batch_size) if not self.task_labels_at_test_time: task_label_space = Sparse(task_label_space, sparsity=1) test_envs_with_task_ids = [ FixedTaskLabelWrapper( env=test_env, task_label=(i if self.task_labels_at_test_time else None), task_label_space=task_label_space, ) for i, test_env in enumerate(test_envs) ] # NOTE: This check is a bit redundant here, since IncrementalRLSetting always has task # boundaries, but this might be useful if moving this to DiscreteTaskIncrementalRL on_task_switch_callback: Optional[Callable[[Optional[int]], None]] if self.known_task_boundaries_at_test_time: on_task_switch_callback = getattr(method, "on_task_switch", None) # NOTE: Not adding a task id here, since we instead add the fixed task id for each test env. # NOTE: Not adding task ids with this, doing it instead with a dedicated wrapper for each env above. joined_test_env = ConcatEnvsWrapper( test_envs_with_task_ids, add_task_ids=False, on_task_switch_callback=on_task_switch_callback, ) # TODO: Use this 'joined' test environment in this test loop somehow. # IDEA: Hacky way to do it: (I don't think this will work as-is though) _test_dataloader_method = self.test_dataloader self.test_dataloader = lambda *args, **kwargs: joined_test_env super().test_loop(method) self.test_dataloader = _test_dataloader_method test_loop_results = DiscreteTaskAgnosticRLSetting.Results() for task_id, test_env in enumerate(test_envs): # TODO: The results are still of the wrong type, because we aren't changing # the type of test environment or the type of Results results_of_wrong_type: IncrementalRLResults = test_env.get_results() # For now this weird setup means that there will be only one 'result' # object in this that actually has metrics: # assert results_of_wrong_type.task_results[task_id].metrics all_metrics: List[EpisodeMetrics] = sum( [result.metrics for result in results_of_wrong_type.task_results], [] ) n_metrics_in_each_result = [ len(result.metrics) for result in results_of_wrong_type.task_results ] # assert all(n_metrics == 0 for i, n_metrics in enumerate(n_metrics_in_each_result) if i != task_id), (n_metrics_in_each_result, task_id) # TODO: Also transfer the other properties like runtime, online performance, # etc? # TODO: Maybe add addition for these? # task_result = sum(results_of_wrong_type.task_results) task_result = TaskResults(metrics=all_metrics) # task_result: TaskResults[EpisodeMetrics] = results_of_wrong_type.task_results[task_id] test_loop_results.task_results.append(task_result) return test_loop_results @property def phases(self) -> int: """The number of training 'phases', i.e. how many times `method.fit` will be called. In this Incremental-RL Setting, fit is called once per task. (Same as ClassIncrementalSetting in SL). """ return self.nb_tasks @staticmethod def _make_env( base_env: Union[str, gym.Env, Callable[[], gym.Env]], wrappers: List[Callable[[gym.Env], gym.Env]] = None, **base_env_kwargs: Dict, ) -> gym.Env: """Helper function to create a single (non-vectorized) environment. This is also used to create the env whenever `self.dataset` is a string that isn't registered in gym. This happens for example when using an environment from meta-world (or mtenv). """ # Check if the env is registed in a known 'third party' gym-like package, and if # needed, create the base env in the way that package requires. if isinstance(base_env, str): env_id = base_env # Check if the id belongs to mtenv if MTENV_INSTALLED and env_id in mtenv_envs: from mtenv import make as mtenv_make # This is super weird. Don't undestand at all # why they are doing this. Makes no sense to me whatsoever. base_env = mtenv_make(env_id, **base_env_kwargs) # Add a wrapper that will remove the task information, because we use # the same MultiTaskEnv wrapper for all the environments. wrappers.insert(0, MTEnvAdapterWrapper) if METAWORLD_INSTALLED and env_id in metaworld_envs: # TODO: Should we use a particular benchmark here? # For now, we find the first benchmark that has an env with this name. import metaworld for benchmark_class in [metaworld.ML10]: benchmark = benchmark_class() if env_id in benchmark.train_classes.keys(): # TODO: We can either let the base_env be an env type, or # actually instantiate it. base_env: Type[MetaWorldEnv] = benchmark.train_classes[env_id] # NOTE: (@lebrice) Here I believe it's better to just have the # constructor, that way we re-create the env for each task. # I think this might be better, as I don't know for sure that # the `set_task` can be called more than once in metaworld. # base_env = base_env_type() break else: raise NotImplementedError( f"Can't find a metaworld benchmark that uses env {env_id}" ) return ContinualRLSetting._make_env( base_env=base_env, wrappers=wrappers, **base_env_kwargs, ) def create_task_schedule( self, temp_env: gym.Env, change_steps: List[int], seed: int = None, ) -> Dict[int, Dict]: task_schedule: Dict[int, Dict] = {} if self._using_custom_envs_foreach_task: # If custom envs were passed to be used for each task, then we don't create # a "task schedule", because the only reason we're using a task schedule is # when we want to change something about the 'base' env in order to get # multiple tasks. # Create a task schedule dict, just to fit in? for i, task_step in enumerate(change_steps): task_schedule[task_step] = {} return task_schedule # TODO: Make it possible to use something other than steps as keys in the task # schedule, something like a NamedTuple[int, DeltaType], e.g. Episodes(10) or # Steps(10), something like that! # IDEA: Even fancier, we could use a TimeDelta to say "do one hour of task 0"!! for step in change_steps: # TODO: Add a `stage` argument (an enum or something with 'train', 'valid' # 'test' as values, and pass it to this function. Tasks should be the same # in train/valid for now, given the same task Id. # TODO: When the Results become able to handle a different ordering of tasks # at train vs test time, allow the test task schedule to have different # ordering than train / valid. task = type(self)._task_sampling_function( temp_env, step=step, change_steps=change_steps, seed=seed, ) task_schedule[step] = task return task_schedule def create_train_wrappers(self) -> List[Callable[[gym.Env], gym.Env]]: """Create and return the wrappers to apply to the train environment of the current task.""" wrappers: List[Callable[[gym.Env], gym.Env]] = [] # TODO: Clean this up a bit? if self._using_custom_envs_foreach_task: # TODO: Maybe do something different here, since we don't actually want to # add a CL wrapper at all in this case? assert not any(self.train_task_schedule.values()) base_env = self.train_envs[self.current_task_id] else: base_env = self.train_dataset # assert False, super().create_train_wrappers() if self.stationary_context: task_schedule_slice = self.train_task_schedule.copy() assert len(task_schedule_slice) >= 2 assert self.nb_tasks == len(self.train_task_schedule) - 1 # Need to pop the last task, so that we don't sample it by accident! max_step = max(task_schedule_slice) last_task = task_schedule_slice.pop(max_step) # TODO: Shift the second-to-last task to the last step last_boundary = max(task_schedule_slice) second_to_last_task = task_schedule_slice.pop(last_boundary) task_schedule_slice[max_step] = second_to_last_task if 0 not in task_schedule_slice: assert self.nb_tasks == 1 task_schedule_slice[0] = second_to_last_task # assert False, (max_step, last_boundary, last_task, second_to_last_task) else: current_task = list(self.train_task_schedule.values())[self.current_task_id] task_length = self.train_max_steps // self.nb_tasks task_schedule_slice = { 0: current_task, task_length: current_task, } return self._make_wrappers( base_env=base_env, task_schedule=task_schedule_slice, # TODO: Removing this, but we have to check that it doesn't change when/how # the task boundaries are given to the Method. # sharp_task_boundaries=self.known_task_boundaries_at_train_time, task_labels_available=self.task_labels_at_train_time, transforms=self.transforms + self.train_transforms, starting_step=0, max_steps=max(task_schedule_slice.keys()), new_random_task_on_reset=self.stationary_context, ) def create_valid_wrappers(self): if self._using_custom_envs_foreach_task: # TODO: Maybe do something different here, since we don't actually want to # add a CL wrapper at all in this case? assert not any(self.val_task_schedule.values()) base_env = self.val_envs[self.current_task_id] else: base_env = self.val_dataset # assert False, super().create_train_wrappers() if self.stationary_context: task_schedule_slice = self.val_task_schedule else: current_task = list(self.val_task_schedule.values())[self.current_task_id] task_length = self.train_max_steps // self.nb_tasks task_schedule_slice = { 0: current_task, task_length: current_task, } return self._make_wrappers( base_env=base_env, task_schedule=task_schedule_slice, # TODO: Removing this, but we have to check that it doesn't change when/how # the task boundaries are given to the Method. # sharp_task_boundaries=self.known_task_boundaries_at_train_time, task_labels_available=self.task_labels_at_train_time, transforms=self.transforms + self.val_transforms, starting_step=0, max_steps=max(task_schedule_slice.keys()), new_random_task_on_reset=self.stationary_context, ) def create_test_wrappers(self): if self._using_custom_envs_foreach_task: # TODO: Maybe do something different here, since we don't actually want to # add a CL wrapper at all in this case? assert not any(self.test_task_schedule.values()) base_env = self.test_envs[self.current_task_id] else: base_env = self.test_dataset # assert False, super().create_train_wrappers() task_schedule_slice = self.test_task_schedule # if self.stationary_context: # else: # current_task = list(self.test_task_schedule.values())[self.current_task_id] # task_length = self.test_max_steps // self.nb_tasks # task_schedule_slice = { # 0: current_task, # task_length: current_task, # } return self._make_wrappers( base_env=base_env, task_schedule=task_schedule_slice, # TODO: Removing this, but we have to check that it doesn't change when/how # the task boundaries are given to the Method. # sharp_task_boundaries=self.known_task_boundaries_at_train_time, task_labels_available=self.task_labels_at_train_time, transforms=self.transforms + self.test_transforms, starting_step=0, max_steps=self.test_max_steps, new_random_task_on_reset=self.stationary_context, ) def _check_all_envs_have_same_spaces( self, envs_or_env_functions: List[Union[str, gym.Env, Callable[[], gym.Env]]], wrappers: List[Callable[[gym.Env], gym.Wrapper]], ) -> None: """Checks that all the environments in the list have the same observation/action spaces. """ first_env = self._make_env( base_env=envs_or_env_functions[0], wrappers=wrappers, **self.base_env_kwargs ) if not isinstance(envs_or_env_functions[0], gym.Env): # NOTE: Avoid closing the envs for now in case 'live' envs were passed to the Setting. # first_env.close() pass for task_id, task_env_id_or_function in zip( range(1, len(envs_or_env_functions)), envs_or_env_functions[1:] ): task_env = self._make_env( base_env=task_env_id_or_function, wrappers=wrappers, **self.base_env_kwargs, ) if not isinstance(task_env_id_or_function, gym.Env): # NOTE: Avoid closing the envs for now in case 'live' envs were passed to the Setting. # task_env.close() pass def warn_spaces_are_different( task_id: int, kind: str, first_env: gym.Env, task_env: gym.Env ) -> None: task_space = ( task_env.observation_space if kind == "observation" else task_env.action_space ) first_space = ( first_env.observation_space if kind == "observation" else first_env.action_space ) warnings.warn( RuntimeWarning( colorize( f"Env at task {task_id} doesn't have the same {kind} " f"space as the environment of the first task: \n" f"{task_space} \n" f"!=\n" f"{first_space} \n" f"This isn't fully supported yet. Don't expect this to work.", "yellow", ) ) ) if task_env.observation_space != first_env.observation_space: if ( isinstance(task_env.observation_space, spaces.Box) and isinstance(first_env.observation_space, spaces.Box) and task_env.observation_space.shape == first_env.observation_space.shape ) or ( isinstance(task_env.observation_space, TypedDictSpace) and isinstance(first_env.observation_space, TypedDictSpace) and "x" in task_env.observation_space.spaces and "x" in first_env.observation_space.spaces and task_env.observation_space.x.shape == first_env.observation_space.x.shape ): warnings.warn( RuntimeWarning( f"The shape of the observation space is the same, but the bounds are " f"different between the first env and the env of task {task_id}!" ) ) else: warn_spaces_are_different(task_id, "observation", first_env, task_env) if task_env.action_space != first_env.action_space: warn_spaces_are_different(task_id, "action", first_env, task_env) def _make_wrappers( self, base_env: Union[str, gym.Env, Callable[[], gym.Env]], task_schedule: Dict[int, Dict], # sharp_task_boundaries: bool, task_labels_available: bool, transforms: List[Transforms], starting_step: int, max_steps: int, new_random_task_on_reset: bool, ) -> List[Callable[[gym.Env], gym.Env]]: if self._using_custom_envs_foreach_task: if any(task_schedule.values()): logger.warning( RuntimeWarning( f"Ignoring task schedule {task_schedule}, since custom envs were " f"passed for each task!" ) ) task_schedule = None wrappers = super()._make_wrappers( base_env=base_env, task_schedule=task_schedule, task_labels_available=task_labels_available, transforms=transforms, starting_step=starting_step, max_steps=max_steps, new_random_task_on_reset=new_random_task_on_reset, ) if self._using_custom_envs_foreach_task: # If the user passed a specific env to use for each task, then there won't # be a MultiTaskEnv wrapper in `wrappers`, since the task schedule is # None/empty. # Instead, we will add a Wrapper that always gives the task ID of the # current task. # TODO: There are some 'unused' args above: `starting_step`, `max_steps`, # `new_random_task_on_reset` which are still passed to the super() call, but # just unused. if new_random_task_on_reset: pass # raise NotImplementedError( # "TODO: Add a MultiTaskEnv wrapper of some sort that alternates " # " between the source envs." # ) else: assert not task_schedule task_label = self.current_task_id task_label_space = spaces.Discrete(self.nb_tasks) if not task_labels_available: task_label = None task_label_space = Sparse(task_label_space, sparsity=1.0) wrappers.append( partial( FixedTaskLabelWrapper, task_label=task_label, task_label_space=task_label_space, ) ) if is_monsterkong_env(base_env): # TODO: Need to register a MetaMonsterKong-State-v0 or something like that! # TODO: Maybe add another field for 'force_state_observations' ? # if self.force_pixel_observations: pass return wrappers class MTEnvAdapterWrapper(TransformObservation): # TODO: For now, we remove the task id portion of the space and of the observation # dicts. def __init__(self, env: MTEnv, f: Callable = operator.itemgetter("env_obs")): super().__init__(env=env, f=f) # self.observation_space = self.env.observation_space["env_obs"] # def observation(self, observation): # return observation["env_obs"] def make_metaworld_env(env_class: Type[MetaWorldEnv], tasks: List["Task"]) -> MetaWorldEnv: env = env_class() env.set_task(tasks[0]) # TODO: Could maybe replace this with the 'RoundRobin' or 'Random' wrapper from # `multienv_wrappers.py` by making it appear like it's multiple envs, but actually # share the env instance env = MultiTaskEnvironment( env, task_schedule={i: operator.methodcaller("set_task", task) for i, task in enumerate(tasks)}, new_random_task_on_reset=True, add_task_dict_to_info=False, add_task_id_to_obs=False, ) return env def wrap(env_or_env_fn: Union[gym.Env, EnvFactory], wrappers: List[gym.Wrapper] = None) -> gym.Env: env: gym.Env = env_or_env_fn if isinstance(env_or_env_fn, gym.Env) else env_or_env_fn() wrappers = wrappers or [] for wrapper in wrappers: env = wrapper(env) return env def create_env( env_fn: Union[Type[gym.Env], Callable[[], gym.Env]], kwargs: Dict = None, wrappers: List[Callable[[gym.Env], gym.Env]] = None, seed: int = None, ) -> gym.Env: """ 1. Create an env instance by calling `env_fn`; 2. Wrap it with the wrappers in `wrappers`, if any; 3. seed it with `seed` if it is not None. """ env = env_fn(**(kwargs or {})) wrappers = wrappers or [] for wrapper in wrappers: env = wrapper(env) if seed is not None: env.seed(seed) return env def make_lpg_ftw_datasets( dataset: str, ) -> Tuple[List[EnvFactory], List[EnvFactory], List[EnvFactory]]: # IDEA: "LPG-FTW-{bodyparts|gravity}-{HalfCheetah|Hopper|Walker2d}-{v2|v3}", # TODO: Instead of doing what I'm doing here, we could instead add an argument that gets # passed to the task creation function, for instance to get only a bodysize task, or # only a gravity task, etc. train_envs: List[EnvFactory] = [] valid_envs: List[EnvFactory] = [] test_envs: List[EnvFactory] = [] name_parts = dataset.split("-") if len(name_parts) != 5: raise ValueError( "Expected the name to follow this format: \n" "\t 'LPG-FTW-{bodyparts|gravity}-{HalfCheetah|Hopper|Walker2d}-{v2|v3}' \n" f"but got {dataset}" ) _, _, modification_type, env_name, version = name_parts # NOTE: From the LPG-FTW repo: # > "500 for halfcheetah, 600 for hopper, 700 for walker" task_creation_seeds = {"HalfCheetah": 500, "Hopper": 600, "Walker2d": 700} task_creation_seed = task_creation_seeds[env_name] rng = np.random.default_rng(task_creation_seed) from sequoia.settings.rl.envs.mujoco import ( ContinualHalfCheetahV2Env, ContinualHalfCheetahV3Env, ContinualHopperV2Env, ContinualHopperV3Env, ContinualWalker2dV2Env, ContinualWalker2dV3Env, ) env_classes: Dict[str, Dict[str, Type[gym.Env]]] = { "HalfCheetah": { "v2": ContinualHalfCheetahV2Env, "v3": ContinualHalfCheetahV3Env, }, "Hopper": {"v2": ContinualHopperV2Env, "v3": ContinualHopperV3Env}, "Walker2d": {"v2": ContinualWalker2dV2Env, "v3": ContinualWalker2dV3Env}, } env_class = env_classes[env_name][version] # NOTE: Could also get the list of all geoms from the BODY_NAMES property on the classes above, # but the LPG-FTW repo actually uses a subset of those: bodyparts_for_env: Dict[str, List[str]] = { "HalfCheetah": ["torso", "fthigh", "fshin", "ffoot"], "Hopper": ["torso", "thigh", "leg", "foot"], "Walker2d": ["torso", "thigh", "leg", "foot"], } # From the paper: "We created T_max=20 tasks for HalfCheetah and Hopper domains, and # T_max=50 tasks for Walker2d domains." # NOTE: Here if `nb_tasks` is None, we use the default number of tasks from the paper. nb_tasks = 20 if env_name in ["HalfCheetah", "Hopper"] else 50 task_params: List[Dict] = [] values = [] for task_id in range(nb_tasks): # NOTE: Could also support a different type of modification per task, by passing a list of # types of modifications to use! if modification_type == "gravity": # This is a function that will be called for each task, and must produce a set of # (distinct, reproducible) keyword arguments for the given task. original_gravity = -9.81 task_gravity = round(((rng.random() + 0.5) * original_gravity), 4) task_kwargs = {"gravity": task_gravity} values.append(task_gravity) elif modification_type == "bodyparts": body_names = bodyparts_for_env[env_name] scale_factors = (rng.random(len(body_names)) + 0.5).round(4) values.append(scale_factors) body_name_to_size_scale = dict(zip(body_names, scale_factors)) # between 0.5 and 1.5, with 4 digits of precision. # NOTE: Scale the mass by the same factor as the size. task_kwargs = { "body_name_to_size_scale": body_name_to_size_scale, "body_name_to_mass_scale": body_name_to_size_scale.copy(), } else: raise NotImplementedError( f"Unsupported modification type: '{modification_type}'! Supported values are " f"'bodyparts', 'gravity'." ) logger.info(f"Arguments for task {task_id}: {task_kwargs}") task_params.append(task_kwargs) values = np.array(values) logger.debug(values.tolist()) # assert False # logger.info("Task parameters:") # logger.info(json.dumps(task_params, indent="\t")) # NOTE: All envs in LPG-FTW use max_episode_steps of 1000. # max_episode_steps = 1000 # wrappers = [partial(TimeLimit, max_episode_steps=max_episode_steps)] for task_id, task_kwargs in enumerate(task_params): # Function that will create the env with the given task. base_env_fn = partial(env_class, **task_kwargs) train_envs.append(base_env_fn) valid_envs.append(base_env_fn) test_envs.append(base_env_fn) return train_envs, valid_envs, test_envs ================================================ FILE: sequoia/settings/rl/incremental/setting_test.py ================================================ import dataclasses import enum import functools import inspect import math import random from typing import Any, ClassVar, Dict, NamedTuple, Optional, Type import gym import numpy as np import pytest from gym import spaces from gym.envs.classic_control import CartPoleEnv from sequoia.common.config import Config from sequoia.common.gym_wrappers import RenderEnvWrapper from sequoia.common.spaces import Image, Sparse from sequoia.conftest import ( metaworld_required, monsterkong_required, mtenv_required, mujoco_required, slow, xfail_param, ) from sequoia.methods.random_baseline import RandomBaselineMethod from sequoia.settings.assumptions.incremental_test import OtherDummyMethod from sequoia.settings.rl import TaskIncrementalRLSetting from sequoia.settings.rl.continual.setting_test import all_different_from_next from sequoia.settings.rl.setting_test import DummyMethod from ..discrete.setting_test import ( TestDiscreteTaskAgnosticRLSetting as DiscreteTaskAgnosticRLSettingTests, ) from .setting import IncrementalRLSetting class TestIncrementalRLSetting(DiscreteTaskAgnosticRLSettingTests): Setting: ClassVar[Type[Setting]] = IncrementalRLSetting dataset: pytest.fixture @pytest.fixture() def setting_kwargs(self, dataset: str, nb_tasks: int, config: Config): """Fixture used to pass keyword arguments when creating a Setting.""" kwargs = {"dataset": dataset, "nb_tasks": nb_tasks, "max_episode_steps": 100} if dataset.lower().startswith(("walker2d", "hopper", "halfcheetah", "continual")): # kwargs["train_max_steps"] = 5_000 # kwargs["max_episode_steps"] = 100 pass # NOTE: Using 0 workers so I can parallelize the tests without killing my PC. config.num_workers = 0 kwargs["config"] = config return kwargs def test_passing_supported_dataset(self, setting_kwargs: Dict): # Override this test because envs can be passed for each task. setting = self.Setting(**setting_kwargs) assert setting.train_task_schedule if setting.train_envs: # Passing the dataset created custom envs for each task (e.g. MT10, CW10, LPG-FTW-(...). # The task schedule should have keys for the task boundary steps, but values should be # empty dictionaries. assert not any(setting.train_task_schedule.values()) else: # Passing the dataset created a task schedule. assert all(setting.train_task_schedule.values()), "Should have non-empty tasks." def validate_results( self, setting: IncrementalRLSetting, method: DummyMethod, results: IncrementalRLSetting.Results, ) -> None: """Check that the results make sense. The Dummy Method used also keeps useful attributes, which we check here. """ assert results assert results.objective assert len(results.task_sequence_results) == setting.nb_tasks assert results.average_final_performance == sum( results.task_sequence_results[-1].average_metrics_per_task ) t = setting.nb_tasks p = setting.phases assert setting.known_task_boundaries_at_train_time assert setting.known_task_boundaries_at_test_time assert setting.task_labels_at_train_time # assert not setting.task_labels_at_test_time assert not setting.stationary_context if setting.nb_tasks == 1: assert not method.received_task_ids assert not method.received_while_training else: assert method.received_task_ids == sum( [ [t_i] + [t_j if setting.task_labels_at_test_time else None for t_j in range(t)] for t_i in range(t) ], [], ) assert method.received_while_training == sum( [[True] + [False for _ in range(t)] for t_i in range(t)], [] ) def test_tasks_are_different(self, setting_kwargs: Dict[str, Any], config: Config): """Check that the tasks different from the next. NOTE: Overriding this test because task schedules are empty when using custom envs for each task. """ config = setting_kwargs.pop("config", config) assert config.seed is not None setting = self.Setting(**setting_kwargs, config=config) # Check that each task is different from the next. # NOTE: When custom datasets are used for each task then the task schedules' values are # empty, we have to change the test condition a little bit here. if setting.train_envs: # The dataset being used resulted in creating an env per task, rather than just using # one env with a task schedule. # Make sure that the fn for creating the env of each task is unique. assert all_different_from_next(setting.train_envs) assert all_different_from_next(setting.val_envs) assert all_different_from_next(setting.test_envs) else: # Check that each task is different from the next. assert all_different_from_next(setting.train_task_schedule.values()) assert all_different_from_next(setting.val_task_schedule.values()) assert all_different_from_next(setting.test_task_schedule.values()) def test_number_of_tasks(self): setting = self.Setting( dataset="CartPole-v0", monitor_training_performance=True, nb_tasks=10, train_max_steps=10_000, test_max_steps=1000, ) assert setting.nb_tasks == 10 def test_max_number_of_steps_per_task_is_respected(self): setting = self.Setting( dataset="CartPole-v0", monitor_training_performance=True, # train_steps_per_task=500, nb_tasks=2, train_max_steps=1000, test_max_steps=1000, ) for task_id in range(setting.phases): setting.current_task_id = task_id train_env = setting.train_dataloader() total_steps = 0 while total_steps < setting.steps_per_phase: print(total_steps) obs = train_env.reset() done = False while not done: if total_steps == setting.current_train_task_length: assert train_env.is_closed() with pytest.raises(gym.error.ClosedEnvironmentError): obs, reward, done, info = train_env.step( train_env.action_space.sample() ) return else: obs, reward, done, info = train_env.step(train_env.action_space.sample()) total_steps += 1 assert total_steps == setting.steps_per_phase with pytest.raises(gym.error.ClosedEnvironmentError): train_env.reset() @monsterkong_required @pytest.mark.timeout(120) @pytest.mark.parametrize( "state", [False, xfail_param(True, reason="TODO: MonsterkongState doesn't work?")], ) def test_monsterkong(self, state: bool): """Checks that the MonsterKong env works fine with pixel and state input.""" setting = self.Setting( dataset="StateMetaMonsterKong-v0" if state else "PixelMetaMonsterKong-v0", # force_state_observations=state, # force_pixel_observations=(not state), nb_tasks=5, train_max_steps=500, test_max_steps=500, # train_steps_per_task=100, # test_steps_per_task=100, train_transforms=[], test_transforms=[], val_transforms=[], max_episode_steps=10, ) if state: # State-based monsterkong: We observe a flattened version of the game state # (20 x 20 grid + player cell and goal cell, IIRC.) assert setting.observation_space.x == spaces.Box( 0, 292, (402,), np.int16 ), setting._temp_train_env.observation_space else: assert setting.observation_space.x == Image(0, 255, (64, 64, 3), np.uint8) if setting.task_labels_at_test_time: assert setting.observation_space.task_labels == spaces.Discrete(5) else: assert setting.task_labels_at_train_time assert setting.observation_space.task_labels == Sparse( spaces.Discrete(5), sparsity=0.5, # 0.5 since we have task labels at train time. ) assert setting.test_max_steps == 500 with setting.train_dataloader() as env: obs = env.reset() assert obs in setting.observation_space method = DummyMethod() results = setting.apply(method) self.validate_results(setting, method, results) @mujoco_required @pytest.mark.parametrize("seed", [None, 123, 456]) @pytest.mark.parametrize("version", ["v2", "v3"]) @pytest.mark.parametrize("env_name", ["HalfCheetah", "Hopper", "Walker2d"]) @pytest.mark.parametrize("modification", ["bodyparts", "gravity"]) def test_LPG_FTW_datasets( self, env_name: str, modification: str, version: str, config: Config, seed: int, ): """Test using a dataset from the LPG-FTW paper / repo (continual mujoco variants). TODO: Check that: - the task sequence is always the same (uses the same seed), regardless of what seed is passed; - The envs are created correctly; - The number of tasks / train steps / test steps / etc is set to the right values. """ # LPG-FTW-{bodysize|gravity}-{HalfCheetah|Hopper|Walker2d}-{v2|v3} dataset = f"LPG-FTW-{modification}-{env_name}-{version}" # NOTE: Set the seed in the config, preserving the other values: config = dataclasses.replace(config, seed=seed) nb_tasks: Optional[int] = None # Using the default number of tasks for that setting for now setting: TaskIncrementalRLSetting = self.Setting( dataset=dataset, nb_tasks=nb_tasks, config=config, ) if nb_tasks is not None: assert setting.nb_tasks == nb_tasks else: assert setting.nb_tasks == 20 if env_name in ["HalfCheetah", "Hopper"] else 50 assert setting.train_steps_per_task == 100_000 assert setting.train_max_steps == setting.train_steps_per_task * setting.nb_tasks assert setting.test_steps_per_task == 10_000 assert setting.test_max_steps == setting.test_steps_per_task * setting.nb_tasks assert setting.config == config expected_values = { "bodyparts": { "HalfCheetah": np.array( [ [1.0667, 1.354, 1.1454, 0.9112], [0.968, 1.3214, 0.8125, 1.2862], [0.9356, 0.7476, 0.9421, 1.397], [1.057, 1.0286, 0.776, 1.3749], [0.7592, 1.3059, 0.6209, 0.9313], [0.8497, 1.016, 0.869, 0.9722], [0.6936, 0.7496, 0.9946, 0.7713], [0.9878, 1.1394, 1.438, 1.3296], [1.1359, 1.1118, 1.4415, 1.3868], [0.5468, 0.9953, 1.3474, 1.3668], [0.7779, 0.5924, 0.8996, 0.8196], [0.9775, 0.7775, 1.3211, 1.1515], [0.6026, 0.833, 0.9688, 1.4437], [0.6035, 1.161, 1.0771, 0.7065], [1.0629, 1.4446, 0.9937, 0.5573], [1.2337, 0.522, 1.0446, 0.86], [0.7313, 1.35, 1.2919, 0.6101], [1.0026, 0.5937, 0.6216, 1.3764], [0.6369, 0.8332, 1.0068, 1.1956], [1.1337, 0.8872, 1.0393, 1.4391], ] ), "Hopper": np.array( [ [0.7135, 0.5054, 1.3158, 1.3817], [1.2478, 1.4622, 0.8828, 0.7484], [0.5758, 1.4022, 1.0022, 1.2518], [1.4175, 0.5328, 0.8692, 0.6997], [0.6962, 1.3126, 1.2338, 1.4018], [1.4837, 1.0798, 0.7868, 0.8489], [1.3545, 0.7424, 1.2719, 1.0976], [0.6088, 0.516, 0.8584, 1.0396], [1.19, 0.6938, 0.5663, 0.8589], [0.8211, 1.3241, 0.9745, 1.345], [0.6572, 1.0763, 1.3601, 0.659], [0.7739, 0.7299, 0.6518, 1.469], [1.0556, 0.7345, 0.532, 1.0279], [1.2296, 0.6701, 1.4398, 1.0611], [0.6225, 1.0743, 0.827, 0.6753], [0.7325, 0.809, 1.2254, 0.9415], [1.4439, 0.9964, 1.4649, 1.333], [0.5189, 0.9123, 1.1166, 1.3882], [1.0468, 1.4162, 1.4152, 1.4333], [1.1143, 1.2726, 1.0209, 1.0729], ] ), "Walker2d": np.array( [ [0.7567, 0.756, 1.4277, 0.9565], [1.4109, 0.5937, 0.7606, 0.6839], [1.0276, 1.2041, 1.4451, 0.8439], [0.9755, 0.8187, 0.591, 0.583], [1.2181, 0.8519, 0.5878, 0.9935], [0.8885, 1.2908, 1.3013, 1.1454], [1.0147, 0.7442, 1.236, 0.5236], [1.1978, 0.5307, 1.4067, 1.1635], [0.9529, 0.8574, 0.6655, 0.5294], [0.8051, 1.1687, 0.8499, 1.3864], [1.2848, 0.8866, 0.5215, 1.0251], [1.2241, 0.7499, 1.1479, 0.5744], [1.2354, 0.5853, 1.1212, 0.5174], [0.7968, 0.7717, 1.2285, 0.8687], [1.0544, 0.5814, 0.8588, 0.687], [1.0695, 0.6469, 0.8567, 0.6682], [1.2904, 0.8367, 1.228, 0.8606], [1.0343, 0.7646, 0.515, 1.3386], [1.1157, 1.2064, 1.0026, 0.9877], [0.6621, 0.809, 1.0466, 0.5361], [0.9291, 0.6168, 0.9013, 1.4358], [1.048, 0.8483, 0.8586, 1.1867], [1.327, 1.0487, 1.4479, 0.9426], [1.2382, 0.8678, 1.0034, 1.2412], [0.5863, 1.4389, 0.934, 1.3923], [1.1379, 1.154, 0.5595, 0.5955], [1.3881, 1.3309, 0.5342, 1.1085], [0.8394, 1.0508, 0.9655, 0.7755], [0.7494, 0.6891, 0.6979, 1.3249], [1.1108, 1.3998, 0.7783, 0.599], [0.8687, 0.5902, 1.212, 0.6375], [0.5668, 0.981, 0.5026, 1.0739], [0.9416, 1.4424, 1.0721, 0.9112], [1.2981, 1.0119, 1.2722, 0.9808], [1.4171, 1.1066, 0.6053, 1.2302], [1.1096, 1.0246, 1.3117, 0.5727], [0.8082, 0.875, 0.9299, 1.2194], [1.0526, 0.961, 1.0492, 1.2552], [1.46, 0.8331, 0.934, 0.5725], [1.3832, 1.4736, 1.2651, 0.7956], [0.68, 1.2663, 1.4183, 0.9284], [1.2713, 0.6865, 0.8331, 1.0081], [1.4115, 0.5781, 0.9823, 0.8094], [1.4614, 0.5998, 1.2237, 1.3794], [1.2385, 1.2489, 0.7521, 0.818], [1.077, 1.2589, 0.748, 1.1483], [0.7855, 1.1619, 0.5537, 1.2367], [1.4765, 1.1728, 0.9052, 1.3113], [1.1144, 0.9986, 1.3052, 0.9948], [1.1542, 1.3616, 0.7465, 0.8679], ] ), }, "gravity": { "HalfCheetah": np.array( [ -10.4648, -13.2825, -11.236, -8.9384, -9.4964, -12.9626, -7.9709, -12.6178, -9.1777, -7.3343, -9.2424, -13.7041, -10.3694, -10.091, -7.6124, -13.4874, -7.4477, -12.8111, -6.0907, -9.1363, ] ), "Hopper": np.array( [ -6.999, -4.9579, -12.9078, -13.5543, -12.2405, -14.3439, -8.6606, -7.3419, -5.6488, -13.7555, -9.8317, -12.2801, -13.9059, -5.2266, -8.5266, -6.8638, -6.83, -12.8763, -12.104, -13.7512, ] ), "Walker2d": np.array( [ -7.4229, -7.4163, -14.006, -9.3835, -13.8414, -5.8243, -7.461, -6.7093, -10.0807, -11.8119, -14.1762, -8.2791, -9.57, -8.031, -5.7979, -5.7189, -11.9495, -8.3575, -5.7666, -9.7467, -8.7165, -12.6623, -12.7656, -11.2362, -9.9544, -7.3011, -12.1249, -5.1366, -11.7508, -5.2058, -13.8, -11.4139, -9.3481, -8.4107, -6.5289, -5.1934, -7.898, -11.4647, -8.3374, -13.6001, -12.6038, -8.6978, -5.1157, -10.0563, -12.0081, -7.3568, -11.2612, -5.6351, -12.1197, -5.7417, ] ), }, } def _unwrap_partials(env_fn: functools.partial) -> functools.partial: from gym.envs.mujoco import MujocoEnv # 'unwrap' the env fn: while isinstance(env_fn, functools.partial): # We want to recover the 'base' env factory (the function that actually creates # the modified mujoco env.) # NOTE `env_fn` is probably something like: # `partial(create_env, base_env_factory, wrappers=[...]) # or # `partial(foo, env_fn=base_env_factory, wrappers=[...]) print(env_fn) if inspect.isclass(env_fn.func) and issubclass(env_fn.func, MujocoEnv): # Reached the lowest-level partial, the one we're looking for. break if env_fn.args: env_fn = env_fn.args[0] else: env_fn = list(env_fn.keywords.values())[0] return env_fn if modification == "bodyparts": expected_factors_for_env = expected_values["bodyparts"][env_name] def check_env_fn_matches_expected(task_id: int, env_fn: functools.partial): env_fn = _unwrap_partials(env_fn) assert isinstance(env_fn, functools.partial) kwargs = env_fn.keywords for argument_name in ["body_name_to_size_scale", "body_name_to_mass_scale"]: argument_values = np.array(list(kwargs[argument_name].values())) assert (argument_values == expected_factors_for_env[task_id]).all() env_fn: functools.partial # Inspect the env functions and check that the arguments that would be passed to the # constructor make sense. # NOTE: Could also create the envs using the setting and inspect these attributes, # but I think that inspecting the attributes on the multi-env wrappers used by the # Traditional and MultiTask RL settings might not work. This is ok for now. for task_id, env_fn in enumerate(setting.train_envs): check_env_fn_matches_expected(task_id, env_fn) for task_id, env_fn in enumerate(setting.val_envs): check_env_fn_matches_expected(task_id, env_fn) for task_id, env_fn in enumerate(setting.test_envs): check_env_fn_matches_expected(task_id, env_fn) elif modification == "gravity": expected_gravities_for_env = expected_values["gravity"][env_name] def check_env_fn_matches_expected(task_id: int, env_fn: functools.partial): env_fn = _unwrap_partials(env_fn) kwargs = env_fn.keywords gravity_value: float = kwargs["gravity"] assert np.isclose(gravity_value, expected_gravities_for_env[task_id]) for task_id, env_fn in enumerate(setting.train_envs): check_env_fn_matches_expected(task_id, env_fn) for task_id, env_fn in enumerate(setting.val_envs): check_env_fn_matches_expected(task_id, env_fn) for task_id, env_fn in enumerate(setting.test_envs): check_env_fn_matches_expected(task_id, env_fn) # TODO: Not sure if this check will also work with the stationary settings, so skipping it # for now. if setting.stationary_context: return # Check that the max episode length is really respected. with setting.train_dataloader() as temp_env: steps = 0 obs = temp_env.reset() done = False while not done: action = temp_env.action_space.sample() obs, reward, done, info = temp_env.step(action) assert obs in temp_env.observation_space steps += 1 assert steps <= 1000 assert steps <= 1000 # NOTE: Testing the 'live' envs is much slower, since we have to actually isntantiate the # envs. Skipping the rest for now. return def _check_env_attributes_match(task_id: int, env: gym.Env): if modification == "bodyparts": size_scales = env.body_name_to_size_scale mass_scales = env.body_name_to_mass_scale assert size_scales == mass_scales assert list(size_scales.values()) == expected_factors_for_env[task_id].tolist() elif modification == "gravity": gravity = env.gravity assert gravity == expected_gravities_for_env[task_id] setting.prepare_data() for task_id in range(setting.nb_tasks): print(f"Testing the 'live' envs for task {task_id}.") setting.current_task_id = task_id with setting.train_dataloader() as env: _check_env_attributes_match(task_id, env) with setting.val_dataloader() as env: _check_env_attributes_match(task_id, env) with setting.test_dataloader() as env: _check_env_attributes_match(task_id, env) @pytest.mark.timeout(120) def test_action_space_always_matches_obs_batch_size_in_RL(config: Config): """ """ from sequoia.settings import TaskIncrementalRLSetting nb_tasks = 2 batch_size = 1 setting = TaskIncrementalRLSetting( dataset="cartpole", nb_tasks=nb_tasks, batch_size=batch_size, train_max_steps=200, test_max_steps=200, num_workers=0, # monitor_training_performance=True, # This is still a TODO in RL. ) total_samples = len(setting.test_dataloader()) method = OtherDummyMethod() _ = setting.apply(method, config=config) expected_encountered_batch_sizes = {batch_size or 1} last_batch_size = total_samples % (batch_size or 1) if last_batch_size != 0: expected_encountered_batch_sizes.add(last_batch_size) assert set(method.batch_sizes) == expected_encountered_batch_sizes # NOTE: Multiply by nb_tasks because the test loop is ran after each training task. actual_num_batches = len(method.batch_sizes) expected_num_batches = math.ceil(total_samples / (batch_size or 1)) * nb_tasks # MINOR BUG: There's an extra batch for each task. Might make sense, or might not, # not sure. assert actual_num_batches == expected_num_batches + nb_tasks expected_total = total_samples * nb_tasks actual_total_obs = sum(method.batch_sizes) assert actual_total_obs == expected_total + nb_tasks @mtenv_required @pytest.mark.xfail(reason="don't know how to get the max path length through mtenv!") def test_mtenv_meta_world_support(): from mtenv import MTEnv, make env: MTEnv = make("MT-MetaWorld-MT10-v0") env.set_task_state(0) env.seed(123) env.seed_task(123) obs = env.reset() assert isinstance(obs, dict) assert list(obs.keys()) == ["env_obs", "task_obs"] print(obs) done = False # BUG: No idea how to get the max path length, since I'm getting # AttributeError: 'MetaWorldMTWrapper' object has no attribute 'max_path_length' steps = 0 while not done and steps < env.max_path_length: obs, reward, done, info = env.step(env.action_space.sample()) # BUG: Can't render when using metaworld through mtenv, since mtenv *contains* a # straight-up copy-pasted old version of meta-world, which doesn't support it. env.render() steps += 1 env.close() env_obs_space = env.observation_space["env_obs"] task_obs_space = env.observation_space["task_obs"] # TODO: If the task observation space is Discrete(10), then we can't create a # setting with more than 10 tasks! We could add a check for this. # TODO: Figure out the default number of tasks depending on the chosen dataset. setting = IncrementalRLSetting(dataset="MT-MetaWorld-MT10-v0", nb_tasks=3) assert setting.observation_space.x == env_obs_space assert setting.nb_tasks == 3 train_env = setting.train_dataloader() assert train_env.observation_space.x == env_obs_space assert train_env.observation_space.task_labels == spaces.Discrete(3) n_episodes = 1 for episode in range(n_episodes): obs = train_env.reset() done = False steps = 0 while not done and steps < env.max_path_length: obs, reward, done, info = train_env.step(train_env.action_space.sample()) # BUG: Can't render meta-world env when using mtenv. train_env.render() steps += 1 # @pytest.mark.no_xvfb # @pytest.mark.xfail(reason="TODO: Rethink how we want to integrate MetaWorld envs.") @pytest.mark.skip(reason="BUG: timeout handler seems to be bugged, test lasts forever") @metaworld_required @pytest.mark.timeout(60) def test_metaworld_support(config: Config): """Test using metaworld benchmarks as the dataset of an RL Setting. NOTE: Uses either a MetaWorldEnv instance as the `dataset`, or the env id. TODO: Need to rethink this, we should instead use one env class per task (where each task env goes through a subset of the tasks for training) """ # TODO: Add option of passing a benchmark instance? setting = IncrementalRLSetting( dataset="MT10", config=config, max_episode_steps=10, train_max_steps=500, test_max_steps=500, ) assert setting.nb_tasks == len(setting.train_envs) assert setting.nb_tasks == 10 assert setting.train_max_steps == 500 assert setting.test_max_steps == 500 assert setting.train_steps_per_task == 50 assert setting.test_steps_per_task == 50 method = DummyMethod() results = setting.apply(method, config=config) assert results.summary() @slow @metaworld_required @pytest.mark.timeout(180) @pytest.mark.parametrize("dataset", ["CW10", "CW20"]) def test_continual_world_support(dataset: str, config: Config): """Test using CW10 and CW20 benchmarks as the dataset of an RL Setting. TODO: This test is quite long to run, in part because metaworld takes like 20 seconds to load, and there being 20 tasks in CW20 """ # TODO: Add option of passing a benchmark instance? That might make it quicker to # run tests? setting = IncrementalRLSetting( dataset=dataset, config=config, ) assert setting.nb_tasks == 10 if dataset == "CW10" else 20 assert setting.train_steps_per_task == 1_000_000 assert setting.train_max_steps == 1_000_000 * setting.nb_tasks assert setting.test_steps_per_task == 10_000 assert setting.test_max_steps == 10_000 * setting.nb_tasks setting = IncrementalRLSetting( dataset=dataset, config=config, max_episode_steps=10, train_steps_per_task=50, test_steps_per_task=50, ) assert setting.nb_tasks == 10 if dataset == "CW10" else 20 assert setting.train_steps_per_task == 50 assert setting.test_steps_per_task == 50 assert setting.train_max_steps == setting.train_steps_per_task * setting.nb_tasks assert setting.test_steps_per_task == setting.test_steps_per_task assert setting.test_max_steps == setting.test_steps_per_task * setting.nb_tasks assert ( setting.nb_tasks == len(setting.train_envs) == len(setting.val_envs) == len(setting.test_envs) ) method = DummyMethod() results = setting.apply(method, config=config) assert method.train_episodes_per_task == [5 for _ in range(setting.nb_tasks)] assert results.summary() @pytest.mark.xfail(reason="Metaworld integration isn't done yet") @metaworld_required @pytest.mark.timeout(120) @pytest.mark.parametrize("pass_env_id_instead_of_env_instance", [True, False]) def test_metaworld_auto_task_schedule(pass_env_id_instead_of_env_instance: bool): """Test that when passing just an env id from metaworld and a number of tasks, the task schedule is created automatically. """ import metaworld from metaworld import MetaWorldEnv benchmark = metaworld.ML10() # Construct the benchmark, sampling tasks env_name = "reach-v2" env_type: Type[MetaWorldEnv] = benchmark.train_classes[env_name] env = env_type() # TODO: When not passing a nb_tasks, the number of available tasks for that env # is used. # setting = TaskIncrementalRLSetting( # dataset=env_name if pass_env_id_instead_of_env_instance else env, # train_steps_per_task=1000, # ) # assert setting.nb_tasks == 50 # assert setting.steps_per_task == 1000 # assert sorted(setting.train_task_schedule.keys()) == list(range(0, 50_000, 1000)) # Test passing a number of tasks: with pytest.warns(RuntimeWarning): setting = TaskIncrementalRLSetting( dataset=env_name if pass_env_id_instead_of_env_instance else env, train_max_steps=2000, nb_tasks=2, test_max_steps=2000, transforms=[], ) assert setting.nb_tasks == 2 assert setting.steps_per_task == 1000 assert sorted(setting.train_task_schedule.keys()) == list(range(0, 2000, 1000)) from sequoia.common.metrics.rl_metrics import EpisodeMetrics method = DummyMethod() with pytest.warns(RuntimeWarning): results: IncrementalRLSetting.Results[EpisodeMetrics] = setting.apply(method) # TODO: Don't know if these values make sense! Rewards are super high, not sure if # that's normal in Mujoco/metaworld envs: # "Average": { # "Episodes": 66, # "Mean reward per episode": 13622.872306005293, # "Mean reward per step": 90.81914870670195 # } # assert 50 < results.average_final_performance.episodes # assert 10_000 < results.average_final_performance.mean_reward_per_episode # assert 100 < results.average_final_performance.mean_episode_length <= 150 @pytest.mark.xfail(reason="WIP: Adding dm_control support") def test_dm_control_support(): import numpy as np from dm_control import suite # Load one task: env = suite.load(domain_name="cartpole", task_name="swingup") # Iterate over a task set: for domain_name, task_name in suite.BENCHMARKING: task_env = suite.load(domain_name, task_name) # Step through an episode and print out reward, discount and observation. action_spec = env.action_spec() time_step = env.reset() while not time_step.last(): action = np.random.uniform(action_spec.minimum, action_spec.maximum, size=action_spec.shape) time_step = env.step(action) print(time_step.reward, time_step.discount, time_step.observation) # TODO: Use the task schedule as a way to specify how long each task lasts in a # given env? For instance: class PeriodTypeEnum(enum.Enum): STEPS = enum.auto() EPISODES = enum.auto() class Period(NamedTuple): value: int type: PeriodTypeEnum = PeriodTypeEnum.STEPS steps = lambda v: Period(value=v, type=PeriodTypeEnum.STEPS) episodes = lambda v: Period(value=v, type=PeriodTypeEnum.EPISODES) train_task_schedule = { steps(10): "CartPole-v0", episodes(1000): "ALE/Breakout-v5", } from gym.wrappers import TimeLimit def make_random_cartpole_env(gravity_scale: float): env = gym.make("CartPole-v1") env = TimeLimit(env, max_episode_steps=50) env.unwrapped.gravity *= gravity_scale return env class TestPassingEnvsForEachTask: """Tests that have to do with the feature of passing the list of environments to use for each task. """ def test_raises_warning_when_envs_have_different_obs_spaces(self): task_envs = ["CartPole-v0", "Pendulum-v1"] with pytest.warns(RuntimeWarning, match="doesn't have the same observation space"): setting = IncrementalRLSetting(train_envs=task_envs) setting.train_dataloader() def test_passing_env_fns_for_each_task(self): nb_tasks = 3 gravity_scales = [0.5 + random.random() for _ in range(nb_tasks)] # task_envs = ["CartPole-v0", "CartPole-v1"] task_envs = [ functools.partial(make_random_cartpole_env, gravity_scales[i]) for i in range(nb_tasks) ] base_env = make_random_cartpole_env(gravity_scale=1.0) setting = IncrementalRLSetting(train_envs=task_envs) assert setting.nb_tasks == nb_tasks # TODO: Using 'no-op' task schedules, rather than empty ones. # This fixes a bug with the creation of the test environment. assert not any(setting.train_task_schedule.values()) assert not any(setting.val_task_schedule.values()) assert not any(setting.test_task_schedule.values()) # assert not setting.train_task_schedule # assert not setting.val_task_schedule # assert not setting.test_task_schedule # assert len(setting.train_task_schedule.keys()) == 2 setting.current_task_id = 0 train_env = setting.train_dataloader() assert train_env.gravity == base_env.gravity * gravity_scales[0] setting.current_task_id = 1 train_env = setting.train_dataloader() assert train_env.gravity == base_env.gravity * gravity_scales[1] assert isinstance(train_env.unwrapped, CartPoleEnv) # Not sure, do we want to add a 'observation_spaces`, `action_spaces` and # `reward_spaces` properties? assert setting.observation_space.x == train_env.observation_space.x if setting.task_labels_at_train_time: # TODO: Either add a `__getattr__` proxy on the Sparse space, or create # dedicated `SparseDiscrete`, `SparseBox` etc spaces so that we eventually # get to use `space.n` on a Sparse space. assert train_env.observation_space.task_labels == spaces.Discrete(setting.nb_tasks) sparsity = 0.0 if setting.task_labels_at_test_time else 0.5 assert setting.observation_space.task_labels == Sparse( spaces.Discrete(setting.nb_tasks), sparsity=sparsity, ) def test_passing_env_for_each_task(self): nb_tasks = 3 gravity_scales = [0.5 + random.random() for _ in range(nb_tasks)] # task_envs = ["CartPole-v0", "CartPole-v1"] task_envs = [make_random_cartpole_env(gravity_scales[i]) for i in range(nb_tasks)] base_env = make_random_cartpole_env(1.0) setting = IncrementalRLSetting(train_envs=task_envs) assert setting.nb_tasks == nb_tasks # TODO: Using 'no-op' task schedules, rather than empty ones. # This fixes a bug with the creation of the test environment. assert not any(setting.train_task_schedule.values()) assert not any(setting.val_task_schedule.values()) assert not any(setting.test_task_schedule.values()) # assert not setting.train_task_schedule # assert not setting.val_task_schedule # assert not setting.test_task_schedule # assert len(setting.train_task_schedule.keys()) == 2 setting.current_task_id = 0 train_env = setting.train_dataloader() assert train_env.gravity == base_env.gravity * gravity_scales[0] setting.current_task_id = 1 train_env = setting.train_dataloader() assert train_env.gravity == base_env.gravity * gravity_scales[1] assert isinstance(train_env.unwrapped, CartPoleEnv) # Not sure, do we want to add a 'observation_spaces`, `action_spaces` and # `reward_spaces` properties? assert setting.observation_space.x == train_env.observation_space.x if setting.task_labels_at_train_time: # TODO: Either add a `__getattr__` proxy on the Sparse space, or create # dedicated `SparseDiscrete`, `SparseBox` etc spaces so that we eventually # get to use `space.n` on a Sparse space. assert train_env.observation_space.task_labels == spaces.Discrete(setting.nb_tasks) sparsity = 0.0 if setting.task_labels_at_test_time else 0.5 assert setting.observation_space.task_labels == Sparse( spaces.Discrete(setting.nb_tasks), sparsity=sparsity ) def test_command_line(self): # TODO: If someone passes the same env ids from the command-line, then shouldn't # we somehow vary the tasks by changing the level or something? setting = IncrementalRLSetting.from_args(argv="--train_envs CartPole-v0 Pendulum-v1") assert setting.train_envs == ["CartPole-v0", "Pendulum-v1"] # TODO: Not using this: def test_raises_warning_when_envs_have_different_obs_spaces(self): task_envs = ["CartPole-v1", "Pendulum-v1"] with pytest.warns(RuntimeWarning, match="doesn't have the same observation space"): setting = IncrementalRLSetting(train_envs=task_envs) setting.train_dataloader() def test_random_baseline(self): nb_tasks = 3 gravities = [random.random() * 10 for _ in range(nb_tasks)] from gym.wrappers import TimeLimit # task_envs = ["CartPole-v0", "CartPole-v1"] task_envs = [make_random_cartpole_env(i) for i in range(nb_tasks)] setting = IncrementalRLSetting( train_envs=task_envs, train_max_steps=1000, test_max_steps=1000 ) assert setting.nb_tasks == nb_tasks method = RandomBaselineMethod() results = setting.apply(method) assert results.objective > 0 @pytest.mark.xfail(reason=f"Don't yet fully changing the size of the body parts.") @mujoco_required def test_incremental_mujoco_like_LPG_FTW(): """Trying to get the same-ish setup as the "LPG_FTW" experiments See https://github.com/Lifelong-ML/LPG-FTW/tree/master/experiments """ nb_tasks = 5 from sequoia.settings.rl.envs.mujoco import ContinualHalfCheetahEnv task_gravity_factors = [random.random() + 0.5 for _ in range(nb_tasks)] task_size_scale_factors = [random.random() + 0.5 for _ in range(nb_tasks)] task_envs = [ RenderEnvWrapper( ContinualHalfCheetahEnv( gravity=task_gravity_factors[task_id] * -9.81, body_name_to_size_scale={"torso": task_size_scale_factors[task_id]}, ), ) for task_id in range(nb_tasks) ] setting = IncrementalRLSetting( train_envs=task_envs, train_steps_per_task=10_000, train_wrappers=RenderEnvWrapper, test_max_steps=10_000, ) assert setting.nb_tasks == nb_tasks # NOTE: Same as above: we use a `no-op` task schedule, rather than an empty one. assert not any(setting.train_task_schedule.values()) assert not any(setting.val_task_schedule.values()) assert not any(setting.test_task_schedule.values()) # assert not setting.train_task_schedule # assert not setting.val_task_schedule # assert not setting.test_task_schedule method = RandomBaselineMethod() # TODO: Using `render=True` causes a silent crash for some reason! results = setting.apply(method) assert results.objective > 0 ================================================ FILE: sequoia/settings/rl/incremental/tasks.py ================================================ """ TODO: Add the tasks for IncrementalRLSetting, on top of the existing tasks from ContinualRL """ import operator import warnings from functools import partial, singledispatch from typing import Callable, List import gym import numpy as np from sequoia.settings.rl.envs import ( METAWORLD_INSTALLED, MTENV_INSTALLED, MetaWorldEnv, MetaWorldMujocoEnv, MTEnv, SawyerXYZEnv, ) from ..discrete.tasks import ( DiscreteTask, _is_supported, make_discrete_task, sequoia_registry, task_sampling_function, ) IncrementalTask = DiscreteTask @task_sampling_function(env_registry=sequoia_registry, based_on=make_discrete_task) @singledispatch def make_incremental_task( env: gym.Env, *, step: int, change_steps: List[int], seed: int = None, **kwargs, ) -> IncrementalTask: """Generic function used by Sequoia's `IncrementalRLSetting` (and its descendants) to create a "task" that will be applied to an environment like `env`. To add support for a new type of environment, simply register a handler function: ``` @make_incremental_task.register(SomeGymEnvClass) def make_incremental_task_for_my_env(env: SomeGymEnvClass, step: int, change_steps: List[int], **kwargs,): return {"my_attribute": random.random()} ``` """ raise NotImplementedError(f"Don't know how to create an (incremental) task for env {env}") is_supported = partial(_is_supported, _make_task_function=make_incremental_task) # def is_supported( # env_id: str, # env_registry: EnvRegistry = sequoia_registry, # _make_task_function: Callable[..., DiscreteTask] = make_incremental_task, # ) -> bool # """ Returns wether Sequoia is able to create (incremental) tasks for the given # environment. # """ # return is_supported_by_parent(env_id, env_registry=env_registry, _make_task_function=_make_task_function) # return make_incremental_task.is_supported(env_id=env_id, env_registry=env_registry) if MTENV_INSTALLED: @make_incremental_task.register def make_task_for_mtenv_env( env: MTEnv, step: int, change_steps: List[int], seed: int = None, **kwargs, ) -> Callable[[MTEnv], None]: """Samples a task for an env from MTEnv. The Task in this case will be a callable that will call the env's `set_task_state` method, passing in an integer (`task`). When `seed` is None, then the task will be the same as the task index. """ assert change_steps, "Need task boundaries to construct the task schedule." if step not in change_steps: raise RuntimeError( f"MTENV has discrete tasks (as far as I'm aware), so step {step} " f"should be in {change_steps}!" ) task_index = change_steps.index(step) task_states = list(range(len(change_steps))) if seed is not None: # perform a deterministic shuffling of the 'task ids' rng = rng or np.random.default_rng(seed) rng.shuffle(task_states) # NOTE: Task state is an integer for now, but I'm not sure if it can also be # something else.. task_state: int = task_states[task_index] return operator.methodcaller("set_task_state", task_state) if METAWORLD_INSTALLED: @make_incremental_task.register(SawyerXYZEnv) @make_incremental_task.register(MetaWorldMujocoEnv) @make_incremental_task.register(MetaWorldEnv) def make_task_for_metaworld_env( env: MetaWorldEnv, step: int, change_steps: List[int] = None, seed: int = None, **kwargs, ) -> Callable[[MetaWorldEnv], None]: """Samples a task for an environment from MetaWorld. The Task in this case will be a callable that will call the env's `set_task` method, passing in a task from the `train_tasks` of the benchmark that contains this environment. When `seed` is None, then the task will be the same as the task index. """ # TODO: Which benchmark should we use? found = False assert change_steps, "Need task boundaries to construct the task schedule." if step not in change_steps: raise RuntimeError( f"MTENV has discrete tasks (as far as I'm aware), so step {step} " f"should be in {change_steps}!" ) task_index = change_steps.index(step) import metaworld # TODO: Not sure how exactly we're supposed to use the train_classes vs # train_tasks, should it be a MultiTaskEnv within a given env class? warnings.warn(RuntimeWarning("This is supposedly not the right way to do it!")) env_name = "" # Find the benchmark that contains this type of env. for benchmark_class in [metaworld.ML10]: benchmark = benchmark_class() for env_name, env_class in benchmark.train_classes.items(): if isinstance(env, env_class): # Found the right benchmark that contains this env class, now # create the task schedule using # the tasks. found = True break if found: break if not found: raise NotImplementedError(f"Can't find a benchmark with env class {type(env)}!") # `benchmark` is here the right benchmark to use to create the tasks. training_tasks = [task for task in benchmark.train_tasks if task.env_name == env_name] tasks = training_tasks.copy() if seed is not None: # perform a deterministic shuffling of the 'task ids' rng = rng or np.random.default_rng(seed) rng.shuffle(tasks) task = tasks[task_index] return operator.methodcaller("set_task", task) ================================================ FILE: sequoia/settings/rl/multi_task/__init__.py ================================================ from .setting import MultiTaskRLSetting ================================================ FILE: sequoia/settings/rl/multi_task/setting.py ================================================ """ 'Classical' RL setting. """ from dataclasses import dataclass from typing import Callable, List import gym from sequoia.utils.logging_utils import get_logger from sequoia.utils.utils import constant from ..task_incremental import TaskIncrementalRLSetting from ..traditional import TraditionalRLSetting logger = get_logger(__name__) @dataclass class MultiTaskRLSetting(TaskIncrementalRLSetting, TraditionalRLSetting): """Reinforcement Learning setting where the environment alternates between a set of tasks sampled uniformly. Implemented as a TaskIncrementalRLSetting, but where the tasks are randomly sampled during training. """ # TODO: Move this into a new Assumption about the context non-stationarity. stationary_context: bool = constant(True) @property def phases(self) -> int: """The number of training 'phases', i.e. how many times `method.fit` will be called. Defaults to the number of tasks, but may be different, for instance in so-called Multi-Task Settings, this is set to 1. """ return 1 # TODO: Show how the multi-task wrapper is created here, rather than in the base class. def create_train_wrappers(self) -> List[Callable[[gym.Env], gym.Env]]: return super().create_train_wrappers() def create_test_wrappers(self) -> List[Callable[[gym.Env], gym.Env]]: """Get the list of wrappers to add to a single test environment. The result of this method must be pickleable when using multiprocessing. Returns ------- List[Callable[[gym.Env], gym.Env]] [description] """ if self.stationary_context: logger.warning( "The test phase will go through all tasks in sequence, rather than " "shuffling them! (This is to make it easier to compile the performance " "metrics for each task." ) new_random_task_on_reset = False # TODO: If we're in the 'Multi-Task RL' setting, then should we maybe change # the task schedule, so that we give an equal number of steps per task? return self._make_wrappers( base_env=self.test_dataset, task_schedule=self.test_task_schedule, # sharp_task_boundaries=self.known_task_boundaries_at_test_time, task_labels_available=self.task_labels_at_test_time, transforms=self.test_transforms, starting_step=0, max_steps=self.test_max_steps, new_random_task_on_reset=new_random_task_on_reset, ) ================================================ FILE: sequoia/settings/rl/multi_task/setting_test.py ================================================ # TODO: Tests for the multi-task RL setting. from typing import ClassVar, Type import pytest from sequoia.settings.rl.setting_test import DummyMethod from ..task_incremental.setting_test import ( TestTaskIncrementalRLSetting as TaskIncrementalRLSettingTests, ) from .setting import MultiTaskRLSetting class TestMultiTaskRLSetting(TaskIncrementalRLSettingTests): Setting: ClassVar[Type[Setting]] = MultiTaskRLSetting dataset: pytest.fixture # def test_on_task_switch_is_called(self): # setting = self.Setting( # dataset="CartPole-v0", # nb_tasks=5, # # train_steps_per_task=100, # train_max_steps=500, # test_max_steps=500, # ) # method = DummyMethod() # _ = setting.apply(method) # assert setting.task_labels_at_test_time # assert False, method.observation_task_labels def validate_results( self, setting: MultiTaskRLSetting, method: DummyMethod, results: MultiTaskRLSetting.Results, ) -> None: """Check that the results make sense. The Dummy Method used also keeps useful attributes, which we check here. """ assert results assert results.objective assert setting.stationary_context assert len(results.task_results) == setting.nb_tasks assert results.average_metrics == sum( task_result.average_metrics for task_result in results.task_results ) t = setting.nb_tasks p = setting.phases assert setting.known_task_boundaries_at_train_time assert setting.known_task_boundaries_at_test_time assert setting.task_labels_at_train_time assert setting.task_labels_at_test_time if setting.nb_tasks == 1: assert not method.received_task_ids assert not method.received_while_training else: # Only received during testing. assert method.received_task_ids == [t_i for t_i in range(t)] assert method.received_while_training == [False for _ in range(t)] ================================================ FILE: sequoia/settings/rl/objects.py ================================================ from dataclasses import dataclass from typing import TypeVar from torch import Tensor from sequoia.settings.base import Setting T = TypeVar("T") @dataclass(frozen=True) class Observations(Setting.Observations): """Observations in a continual RL Setting.""" # Input example x: Tensor @dataclass(frozen=True) class Actions(Setting.Actions): pass # TODO: Replace this 'Rewards' with a 'SparseRewards'-like object for RL, and a # 'DenseRewards'-like object in SL, rather than use the same in RL and SL. @dataclass(frozen=True) class Rewards(Setting.Rewards[T]): """Rewards given back by the environment in RL Settings.""" # @dataclass(frozen=True) # class RLReward(Rewards[T]): # reward: T # @dataclass(frozen=True) # class SLReward(Rewards[T]): # reward: T # y: Sequence[T] ObservationType = TypeVar("ObservationType", bound=Observations) ActionType = TypeVar("ActionType", bound=Actions) RewardType = TypeVar("RewardType", bound=Rewards) # from .environment import RLEnvironment as Environment ================================================ FILE: sequoia/settings/rl/setting.py ================================================ from dataclasses import dataclass from typing import ClassVar, Type from sequoia.settings.base import Setting from sequoia.settings.base.environment import ActionType, ObservationType, RewardType from .environment import RLEnvironment from .objects import Actions, ActionType, Observations, ObservationType, Rewards, RewardType @dataclass class RLSetting(Setting[RLEnvironment[ObservationType, ActionType, RewardType]]): """LightningDataModule for an 'active' setting. This is to be the parent of settings like RL or maybe Active Learning. """ Observations: ClassVar[Type[ObservationType]] = Observations Actions: ClassVar[Type[ActionType]] = Actions Rewards: ClassVar[Type[RewardType]] = Rewards ================================================ FILE: sequoia/settings/rl/setting_test.py ================================================ """ Utilities used in tests for the RL Settings. """ from typing import Any, Callable, Dict, List, Optional import warnings from sequoia.common.gym_wrappers import IterableWrapper from sequoia.methods import RandomBaselineMethod from sequoia.settings.base import Environment from sequoia.utils.logging_utils import get_logger logger = get_logger(__name__) class DummyMethod(RandomBaselineMethod): """Random baseline method used for debugging the (RL) settings. TODO: Remove the other `DummyMethod` variants, replace them with this. """ def __init__( self, additional_train_wrappers: List[Callable[[Environment], Environment]] = None, additional_valid_wrappers: List[Callable[[Environment], Environment]] = None, ): super().__init__() # Wrappers to be added to the train/val environments to debug/test that the # setting's environments work correctly. self.train_env: Optional[Environment] = None self.valid_env: Optional[Environment] = None self.additional_train_wrappers = additional_train_wrappers or [] self.additional_valid_wrappers = additional_valid_wrappers or [] self.all_train_values = [] self.all_valid_values = [] self.observation_task_labels: List[Any] = [] self.n_fit_calls = 0 self.n_task_switches = 0 self.received_task_ids: List[Optional[int]] = [] self.received_while_training: List[bool] = [] self.train_steps_per_task: List[int] = [] self.train_episodes_per_task: List[int] = [] self._has_been_configured_before = False self.changing_attributes: List[str] = [] def configure(self, setting): if self._has_been_configured_before: raise RuntimeError("Can't reuse this Method across Settings for now.") self._has_been_configured_before = True # The attributes to look for changes with. self.changing_attributes = list( set().union(*[task.keys() for task in setting.train_task_schedule.values()]) ) self.train_env = None self.valid_env = None def fit( self, train_env: Environment, valid_env: Environment, ): # Add wrappers, if necessary. for wrapper in self.additional_train_wrappers: train_env = wrapper(train_env) for wrapper in self.additional_valid_wrappers: valid_env = wrapper(valid_env) train_env = CheckAttributesWrapper(train_env, attributes=self.changing_attributes) valid_env = CheckAttributesWrapper(valid_env, attributes=self.changing_attributes) self.train_env = train_env self.valid_env = valid_env # TODO: Replace the loop below with adding soem wrappers around the train/valid envs, and # just delegate to super().fit (so we use the RandomBaselineMethod). # return super().fit(train_env, valid_env) episodes = 0 val_interval = 10 total_steps = 0 self.train_steps_per_task.append(0) self.train_episodes_per_task.append(0) import tqdm train_pbar = tqdm.tqdm(desc="Fake training") while not train_env.is_closed(): obs = train_env.reset() task_labels = obs.task_labels if task_labels is None or isinstance(task_labels, int) or not task_labels.shape: task_labels = [task_labels] self.observation_task_labels.extend(task_labels) attr_dict = {attr: getattr(train_env, attr) for attr in self.changing_attributes} logger.debug(f"Start of episode #{episodes}: {attr_dict}") done = False while not done and not train_env.is_closed(): actions = train_env.action_space.sample() # print(train_env.current_task) obs, rew, done, info = train_env.step(actions) total_steps += 1 self.train_steps_per_task[-1] += 1 train_pbar.update() train_pbar.set_postfix({"episodes": episodes, "total steps": total_steps}) episodes += 1 self.train_episodes_per_task[-1] += 1 if episodes % val_interval == 0 and not valid_env.is_closed(): # Perform one 'validation' episode. obs = valid_env.reset() done = False while not done and not valid_env.is_closed(): actions = valid_env.action_space.sample() obs, rew, done, info = valid_env.step(actions) if self.max_train_episodes is not None and episodes < self.max_train_episodes: break self.all_train_values.append(self.train_env.values) self.all_valid_values.append(self.valid_env.values) self.n_fit_calls += 1 def on_task_switch(self, task_id: Optional[int] = None): self.n_task_switches += 1 self.received_task_ids.append(task_id) self.received_while_training.append(self.training) class CheckAttributesWrapper(IterableWrapper): """Wrapper that stores the value of a given attribute at each step.""" def __init__(self, env, attributes: List[str]): super().__init__(env) self.attributes = attributes self.values: Dict[int, Dict[str, Any]] = {} self.steps = 0 def _store_current_attributes(self): if self.steps not in self.values: self.values[self.steps] = {} for attribute in self.attributes: value = getattr(self.env, attribute) unwrapped_value = getattr(self.env.unwrapped, attribute) assert value == unwrapped_value, (attribute, value, unwrapped_value) self.values[self.steps][attribute] = value def step(self, action): self._store_current_attributes() result = super().step(action) self.steps += 1 self._store_current_attributes() return result ================================================ FILE: sequoia/settings/rl/task_incremental/__init__.py ================================================ from .setting import TaskIncrementalRLSetting ================================================ FILE: sequoia/settings/rl/task_incremental/setting.py ================================================ from dataclasses import dataclass from sequoia.utils.utils import constant from ..incremental import IncrementalRLSetting @dataclass class TaskIncrementalRLSetting(IncrementalRLSetting): """Continual RL setting with clear task boundaries and task labels. The task labels are given at both train and test time. """ task_labels_at_train_time: bool = constant(True) task_labels_at_test_time: bool = constant(True) ================================================ FILE: sequoia/settings/rl/task_incremental/setting_test.py ================================================ from typing import ClassVar, List, Type import pytest from sequoia.common.gym_wrappers import MultiTaskEnvironment from sequoia.settings.rl.incremental.setting_test import ( TestIncrementalRLSetting as IncrementalRLSettingTests, ) from .setting import TaskIncrementalRLSetting class TestTaskIncrementalRLSetting(IncrementalRLSettingTests): Setting: ClassVar[Type[Setting]] = TaskIncrementalRLSetting dataset: pytest.fixture def test_task_label_space_of_env_has_right_n(): setting = TaskIncrementalRLSetting(dataset="MountainCarContinuous-v0") default_nb_tasks = setting.nb_tasks assert setting.observation_space.task_labels.n == default_nb_tasks assert setting.train_dataloader().observation_space.task_labels.n == default_nb_tasks assert setting.val_dataloader().observation_space.task_labels.n == default_nb_tasks assert setting.test_dataloader().observation_space.task_labels.n == default_nb_tasks def test_task_schedule_is_used(): """Test that the tasks are switching over time.""" setting = TaskIncrementalRLSetting( dataset="CartPole-v0", train_max_steps=100, nb_tasks=2, ) default_length = 0.5 for task_id in range(2): setting.current_task_id = task_id env = setting.train_dataloader(batch_size=None) env: MultiTaskEnvironment assert len(setting.train_task_schedule) == 3 assert len(setting.val_task_schedule) == 3 assert len(setting.test_task_schedule) == 3 starting_length = env.length _ = env.reset() lengths: List[float] = [] for i in range(setting.steps_per_phase): obs, reward, done, info = env.step(env.action_space.sample()) # NOTE: If we're done on the last step, we can't reset, since that would go # over the step budget. if done and i != setting.steps_per_phase - 1: env.reset() # Get the length of the pole from the environment. length = env.length lengths.append(length) if task_id == 0: assert starting_length == default_length assert all(length == default_length for length in lengths) else: # The length of the pole is different than the default length assert starting_length != default_length # The length shouldn't be changing over time. assert all(length == starting_length for length in lengths) ================================================ FILE: sequoia/settings/rl/task_incremental/tasks.py ================================================ from ..incremental.tasks import make_incremental_task # NOTE: For now there aren't any tasks specific to only task-incremental. make_task_incremental_task = make_incremental_task is_supported = make_task_incremental_task.is_supported ================================================ FILE: sequoia/settings/rl/traditional/__init__.py ================================================ from .setting import TraditionalRLSetting ================================================ FILE: sequoia/settings/rl/traditional/setting.py ================================================ """ 'Classical' RL setting. """ from dataclasses import dataclass from typing import ClassVar, Dict from simple_parsing.helpers import choice from typing_extensions import Final from sequoia.utils.utils import constant # NOTE: We can reuse those results for now, since they describe the same thing. from ..discrete.results import DiscreteTaskAgnosticRLResults as TraditionalRLResults from ..incremental import IncrementalRLSetting @dataclass class TraditionalRLSetting(IncrementalRLSetting): """Your usual "Classical" Reinforcement Learning setting. Implemented as a MultiTaskRLSetting, but with a single task. """ # Class variable that holds the dict of available environments. available_datasets: ClassVar[Dict[str, str]] = IncrementalRLSetting.available_datasets.copy() # Which dataset/environment to use for training, validation and testing. dataset: str = choice(available_datasets, default="CartPole-v0") # IDEA: By default, only use one task, although there may actually be more than one. nb_tasks: int = 5 stationary_context: Final[bool] = constant(True) known_task_boundaries_at_train_time: Final[bool] = constant(True) task_labels_at_train_time: Final[bool] = constant(True) task_labels_at_test_time: bool = False # Results: ClassVar[Type[Results]] = TaskSequenceResults def __post_init__(self): super().__post_init__() assert self.stationary_context def apply(self, method, config=None): results: IncrementalRLSetting.Results = super().apply(method, config=config) assert len(results.task_sequence_results) == 1 return results.task_sequence_results[0] # result: TraditionalRLResults = TraditionalRLResults(task_results=results.task_sequence_results[0].task_results) result: TraditionalRLResults = results.task_sequence_results[0] # assert False, result._runtime return result @property def phases(self) -> int: """The number of training 'phases', i.e. how many times `method.fit` will be called. Defaults to the number of tasks, but may be different, for instance in so-called Multi-Task Settings, this is set to 1. """ return 1 ================================================ FILE: sequoia/settings/rl/traditional/setting_test.py ================================================ # TODO: Tests for the "traditional" RL setting. from typing import ClassVar, Type import pytest import torch from sequoia.settings.assumptions.incremental_results import TaskSequenceResults from sequoia.settings.rl.setting_test import DummyMethod from ..incremental.setting_test import TestIncrementalRLSetting as IncrementalRLSettingTests from .setting import TraditionalRLSetting class TestTraditionalRLSetting(IncrementalRLSettingTests): Setting: ClassVar[Type[Setting]] = TraditionalRLSetting dataset: pytest.fixture def test_on_task_switch_is_called(self): setting = self.Setting( dataset="CartPole-v0", nb_tasks=5, # train_steps_per_task=100, train_max_steps=500, test_max_steps=500, ) assert setting.stationary_context method = DummyMethod() _ = setting.apply(method) # assert setting.task_labels_at_test_time # assert False, method.observation_task_labels assert method.n_fit_calls == 1 import torch assert torch.unique_consecutive( torch.as_tensor(method.observation_task_labels) ).tolist() != list(range(setting.nb_tasks)) def validate_results( self, setting: TraditionalRLSetting, method: DummyMethod, results: TraditionalRLSetting.Results, ) -> None: """Check that the results make sense. The Dummy Method used also keeps useful attributes, which we check here. """ assert results assert results.objective assert setting.stationary_context assert len(results.task_results) == setting.nb_tasks assert results.average_metrics == sum( task_result.average_metrics for task_result in results.task_results ) t = setting.nb_tasks p = setting.phases assert setting.known_task_boundaries_at_train_time assert setting.known_task_boundaries_at_test_time assert setting.task_labels_at_train_time assert not setting.task_labels_at_test_time if setting.nb_tasks == 1: assert not method.received_task_ids assert not method.received_while_training else: # Only received during testing. assert method.n_task_switches == t assert method.received_task_ids == [None for t_i in range(t)] assert method.received_while_training == [False for _ in range(t)] def validate_results( self, setting: TraditionalRLSetting, method: DummyMethod, results: TraditionalRLSetting.Results, ) -> None: assert results assert results.objective assert isinstance(results, TaskSequenceResults) assert len(results.task_results) == setting.nb_tasks assert results.average_metrics == sum( task_result.average_metrics for task_result in results.task_results ) assert method.n_fit_calls == 1 # BUG: Traditional/Multi-Task RL have one too many task labels: assert list(set(method.observation_task_labels)) == list(range(setting.nb_tasks)) train_task_labels = torch.as_tensor(method.observation_task_labels) new_train_task_labels = torch.unique_consecutive(train_task_labels).tolist() if setting.nb_tasks > 1: assert new_train_task_labels != list(range(setting.nb_tasks)) else: assert set(method.observation_task_labels) == {0} ================================================ FILE: sequoia/settings/rl/wrappers/__init__.py ================================================ """ Wrappers specific to the RL settings, so not exactly as general as those in `common/gym_wrappers`. """ from .measure_performance import MeasureRLPerformanceWrapper from .task_labels import HideTaskLabelsWrapper, RemoveTaskLabelsWrapper from .typed_objects import NoTypedObjectsWrapper, TypedObjectsWrapper ================================================ FILE: sequoia/settings/rl/wrappers/measure_performance.py ================================================ """ TODO: Create a Wrapper that measures performance over the first epoch of training in SL. Then maybe after we can make something more general that also works for RL. """ from typing import Any, Dict, List, Optional, Sequence, Union import numpy as np from torch import Tensor import wandb from sequoia.common.gym_wrappers.measure_performance import MeasurePerformanceWrapper from sequoia.common.metrics import Metrics from sequoia.common.metrics.rl_metrics import EpisodeMetrics from sequoia.settings.base import Actions, Observations, Rewards from sequoia.settings.rl import ActiveEnvironment from sequoia.utils.utils import add_prefix class MeasureRLPerformanceWrapper( MeasurePerformanceWrapper # MeasurePerformanceWrapper[ActiveEnvironment] # python 3.7 # MeasurePerformanceWrapper[ActiveEnvironment, EpisodeMetrics] # python 3.8+ ): def __init__( self, env: ActiveEnvironment, eval_episodes: int = None, eval_steps: int = None, wandb_prefix: str = None, ): super().__init__(env) self._metrics: Dict[int, EpisodeMetrics] = {} self._eval_episodes = eval_episodes or 0 self._eval_steps = eval_steps or 0 # Counter for the number of steps. self._steps: int = 0 # Counter for the number of episodes self._episodes: int = 0 self.wandb_prefix = wandb_prefix self._batch_size = self.env.num_envs if self.is_vectorized else 1 self._current_episode_reward = np.zeros([self._batch_size], dtype=float) self._current_episode_steps = np.zeros([self._batch_size], dtype=int) @property def in_evaluation_period(self) -> bool: """Returns wether the performance is currently being monitored. Returns ------- bool Wether or not the performance on the env is being monitored. """ if self._eval_steps: return self._steps <= self._eval_steps if self._eval_episodes: return self._eval_episodes <= self._eval_episodes return True def reset(self) -> Union[Observations, Any]: obs = super().reset() # assert isinstance(obs, Observations) return obs def step(self, action: Actions): observation, rewards_, done, info = super().step(action) self._steps += 1 reward = rewards_.y if isinstance(rewards_, Rewards) else rewards_ if isinstance(done, bool): self._episodes += int(done) elif isinstance(done, np.ndarray): self._episodes += sum(done) else: self._episodes += done.int().sum() if self.in_evaluation_period: if self.is_vectorized: for env_index, (env_is_done, env_reward) in enumerate(zip(done, reward)): self._current_episode_reward[env_index] += env_reward self._current_episode_steps[env_index] += 1 else: self._current_episode_reward[0] += reward self._current_episode_steps[0] += 1 metrics = self.get_metrics(action, reward, done) if metrics is not None: assert self._steps not in self._metrics, "two metrics at same step?" self._metrics[self._steps] = metrics return observation, rewards_, done, info # def send(self, action: Actions) -> Rewards: # self.action_ = action # rewards_ = super().send(action) # self._steps += 1 # reward = rewards_.y if isinstance(rewards_, Rewards) else rewards_ # # TODO: Need access to the "done" signal in here somehow. # done = self.done_ # if isinstance(done, bool): # self._episodes += int(done) # elif isinstance(done, np.ndarray): # self._episodes += sum(done) # else: # self._episodes += done.int().sum() # if self.in_evaluation_period: # if self.is_vectorized: # for env_index, (env_is_done, env_reward) in enumerate( # zip(done, reward) # ): # self._current_episode_reward[env_index] += env_reward # self._current_episode_steps[env_index] += 1 # else: # self._current_episode_reward[0] += reward # self._current_episode_steps[0] += 1 # metrics = self.get_metrics(action, reward, done) # if metrics is not None: # assert self._steps not in self._metrics, "two metrics at same step?" # self._metrics[self._steps] = metrics # return rewards_ def get_metrics( self, action: Union[Actions, Any], reward: Union[Rewards, Any], done: Union[bool, Sequence[bool]], ) -> Optional[EpisodeMetrics]: # TODO: Add some metric about the entropy of the policy's distribution? rewards = reward.y if isinstance(reward, Rewards) else reward actions = action.y_pred if isinstance(action, Actions) else action dones: Sequence[bool] if not self.is_vectorized: rewards = [rewards] actions = [actions] assert isinstance(done, bool) dones = [done] else: assert isinstance(done, (np.ndarray, Tensor)) dones = done metrics: List[EpisodeMetrics] = [] for env_index, (env_is_done, reward) in enumerate(zip(dones, rewards)): if env_is_done: metrics.append( EpisodeMetrics( n_samples=1, # The average reward per episode. mean_episode_reward=self._current_episode_reward[env_index], # The average length of each episode. mean_episode_length=self._current_episode_steps[env_index], ) ) self._current_episode_reward[env_index] = 0 self._current_episode_steps[env_index] = 0 if not metrics: return None metric = sum(metrics, Metrics()) if wandb.run: log_dict = metric.to_log_dict() if self.wandb_prefix: log_dict = add_prefix(log_dict, prefix=self.wandb_prefix, sep="/") log_dict["steps"] = self._steps log_dict["episode"] = self._episodes wandb.log(log_dict) return metric ================================================ FILE: sequoia/settings/rl/wrappers/measure_performance_test.py ================================================ import itertools from functools import partial from itertools import accumulate import numpy as np import pytest from gym.vector import SyncVectorEnv # from sequoia.settings.rl.continual import ContinualRLSetting from sequoia.common.gym_wrappers import EnvDataset from sequoia.common.metrics.rl_metrics import EpisodeMetrics from sequoia.conftest import DummyEnvironment from .measure_performance import MeasureRLPerformanceWrapper def test_measure_RL_performance_basics(): env = DummyEnvironment(start=0, target=5, max_value=10) # env = TypedObjectsWrapper(env, observations_type=ContinualRLSetting.Observations, actions_type=ContinualRLSetting.Actions, rewards_type=ContinualRLSetting.Rewards) env = MeasureRLPerformanceWrapper(env) env.seed(123) all_episode_rewards = [] all_episode_steps = [] for episode in range(5): episode_steps = 0 episode_reward = 0 obs = env.reset() print(f"Episode {episode}, obs: {obs}") done = False while not done: action = env.action_space.sample() obs, reward, done, info = env.step(action) episode_reward += reward episode_steps += 1 # print(obs, reward, done, info) all_episode_steps.append(episode_steps) all_episode_rewards.append(episode_reward) from itertools import accumulate expected_metrics = {} for episode_steps, cumul_step, episode_reward in zip( all_episode_steps, accumulate(all_episode_steps), all_episode_rewards ): expected_metrics[cumul_step] = EpisodeMetrics( n_samples=1, mean_episode_reward=episode_reward, mean_episode_length=episode_steps, ) assert env.get_online_performance() == expected_metrics def test_measure_RL_performance_iteration(): env = DummyEnvironment(start=0, target=5, max_value=10) from gym.wrappers import TimeLimit max_episode_steps = 50 env = EnvDataset(env) env = TimeLimit(env, max_episode_steps=max_episode_steps) # env = TypedObjectsWrapper(env, observations_type=ContinualRLSetting.Observations, actions_type=ContinualRLSetting.Actions, rewards_type=ContinualRLSetting.Rewards) env = MeasureRLPerformanceWrapper(env) env.seed(123) all_episode_rewards = [] all_episode_steps = [] for episode in range(5): episode_steps = 0 episode_reward = 0 for step, obs in enumerate(env): print(f"Episode {episode}, obs: {obs}") action = env.action_space.sample() reward = env.send(action) episode_reward += reward episode_steps += 1 # print(obs, reward, done, info) assert step <= max_episode_steps, "shouldn't be able to iterate longer than that." all_episode_steps.append(episode_steps) all_episode_rewards.append(episode_reward) expected_metrics = {} for episode_steps, cumul_step, episode_reward in zip( all_episode_steps, accumulate(all_episode_steps), all_episode_rewards ): expected_metrics[cumul_step] = EpisodeMetrics( n_samples=1, mean_episode_reward=episode_reward, mean_episode_length=episode_steps, ) assert env.get_online_performance() == expected_metrics @pytest.mark.xfail( reason=f"TODO: The wrapper seems to works but the test condition is too complicated" ) def test_measure_RL_performance_batched_env(): batch_size = 3 start = [i for i in range(batch_size)] target = 5 env = EnvDataset( SyncVectorEnv( [ partial(DummyEnvironment, start=start[i], target=target, max_value=target * 2) for i in range(batch_size) ] ) ) # env = TypedObjectsWrapper(env, observations_type=ContinualRLSetting.Observations, actions_type=ContinualRLSetting.Actions, rewards_type=ContinualRLSetting.Rewards) env = MeasureRLPerformanceWrapper(env) env.seed(123) all_episode_rewards = [] all_episode_steps = [] for step, obs in enumerate(itertools.islice(env, 100)): print(f"step {step} obs: {obs}") action = np.ones(batch_size) # always increment the counter reward = env.send(action) print(env.done_) # print(obs, reward, done, info) assert step == 99 from collections import defaultdict from sequoia.common.metrics import Metrics expected_metrics = defaultdict(Metrics) for i in range(101): for env_index in range(batch_size): if i and i % target == 0: expected_metrics[i] += EpisodeMetrics( n_samples=1, mean_episode_reward=10.0, # ? FIXME: Actually understand this condition mean_episode_length=target, ) # FIXME: This test is a bit too complicated, hard to follow. I'll keep the # batches synced-up for now. # if i > 0 and (i + env_index) % target == 0: # expected_metrics[i] += EpisodeMetrics( # n_samples=1, # mean_episode_reward=sum(target - (i + env_index % target) for j in range(start[env_index], target)), # mean_episode_length=target - start[env_index] - 1 # ) assert env.get_online_performance() == expected_metrics ================================================ FILE: sequoia/settings/rl/wrappers/no_typed_objects.py ================================================ ================================================ FILE: sequoia/settings/rl/wrappers/task_labels.py ================================================ from collections.abc import Mapping from dataclasses import is_dataclass, replace from functools import singledispatch from typing import Any, Dict, Optional, Tuple, TypeVar, Union import gym from gym import Space, spaces from sequoia.common import Batch from sequoia.common.gym_wrappers import IterableWrapper, TransformObservation from sequoia.common.gym_wrappers.multi_task_environment import add_task_labels from sequoia.common.gym_wrappers.utils import IterableWrapper from sequoia.common.spaces import Sparse, TypedDictSpace from sequoia.common.spaces.named_tuple import NamedTupleSpace from sequoia.settings.base.objects import ObservationType T = TypeVar("T") @singledispatch def hide_task_labels(observation: Tuple[T, int]) -> Tuple[T, Optional[int]]: assert len(observation) == 2 return observation[0], None @hide_task_labels.register(dict) def _hide_task_labels_in_dict(observation: Dict) -> Dict: new_observation = observation.copy() assert "task_labels" in observation new_observation["task_labels"] = None return new_observation @hide_task_labels.register def _hide_task_labels_on_batch(observation: Batch) -> Batch: return replace(observation, task_labels=None) @hide_task_labels.register(Space) def hide_task_labels_in_space(observation: Space) -> Space: raise NotImplementedError( f"TODO: Don't know how to remove task labels from space {observation}." ) @hide_task_labels.register def _hide_task_labels_in_namedtuple_space( observation: NamedTupleSpace, ) -> NamedTupleSpace: spaces = observation._spaces.copy() task_label_space = spaces["task_labels"] if isinstance(task_label_space, Sparse): if task_label_space.sparsity == 1.0: # No need to change anything: return observation # Replace the existing 'Sparse' space with another one with the same # base but with sparsity = 1.0 task_label_space = task_label_space.base assert not isinstance(task_label_space, Sparse) task_label_space = Sparse(task_label_space, sparsity=1.0) spaces["task_labels"] = task_label_space return type(observation)(**spaces) @hide_task_labels.register def _hide_task_labels_in_tuple_space(observation: spaces.Tuple) -> spaces.Tuple: assert len(observation.spaces) == 2, "ambiguous" task_label_space = observation.spaces[1] if isinstance(task_label_space, Sparse): # Replace the existing 'Sparse' space with another one with the same # base but with sparsity = 1.0 task_label_space = task_label_space.base assert not isinstance(task_label_space, Sparse) # We set the task label space as sparse, instead of removing that space. return spaces.Tuple([observation[0], Sparse(task_label_space, sparsity=1.0)]) @hide_task_labels.register def hide_task_labels_in_dict_space(observation: spaces.Dict) -> spaces.Dict: task_label_space = observation.spaces["task_labels"] if isinstance(task_label_space, Sparse): # Replace the existing 'Sparse' space with another one with the same # base but with sparsity = 1.0 task_label_space = task_label_space.base assert not isinstance(task_label_space, Sparse) return type(observation)( { key: subspace if key != "task_labels" else Sparse(task_label_space, 1.0) for key, subspace in observation.spaces.items() } ) @hide_task_labels.register(TypedDictSpace) def hide_task_labels_in_typed_dict_space( observation: TypedDictSpace[T], ) -> TypedDictSpace[T]: task_label_space = observation.spaces["task_labels"] if isinstance(task_label_space, Sparse): # Replace the existing 'Sparse' space with another one with the same # base but with sparsity = 1.0 task_label_space = task_label_space.base assert not isinstance(task_label_space, Sparse) return type(observation)( { key: subspace if key != "task_labels" else Sparse(task_label_space, 1.0) for key, subspace in observation.spaces.items() }, dtype=observation.dtype, ) class HideTaskLabelsWrapper(TransformObservation): """Hides the task labels by setting them to None, rather than removing them entirely. This might be useful in order not to break the inheritance 'contract' when going from contexts where you don't have the task labels to contexts where you do have them. """ def __init__(self, env: gym.Env, f=hide_task_labels): super().__init__(env, f=f) self.observation_space = hide_task_labels(self.env.observation_space) @singledispatch def remove_task_labels(observation: Any) -> Any: """Removes the task labels from an observation / observation space.""" if is_dataclass(observation): return replace(observation, task_labels=None) raise NotImplementedError( f"No handler registered for value {observation} of type {type(observation)}" ) @remove_task_labels.register(spaces.Tuple) @remove_task_labels.register(tuple) def _(observation: Tuple[T, Any]) -> Tuple[T]: if len(observation) == 2: return observation[1] if len(observation) == 1: return observation[0] raise NotImplementedError(observation) @remove_task_labels.register def _remove_task_labels_in_namedtuple_space( observation: NamedTupleSpace, ) -> NamedTupleSpace: spaces = observation._spaces.copy() spaces.pop("task_labels") return type(observation)(**spaces) @remove_task_labels.register(spaces.Dict) @remove_task_labels.register(Mapping) def _(observation: Dict) -> Dict: assert "task_labels" in observation.keys() return type(observation)( **{key: value for key, value in observation.items() if key != "task_labels"} ) class RemoveTaskLabelsWrapper(TransformObservation): """Removes the task labels from the observations and the observation space.""" def __init__(self, env: gym.Env, f=remove_task_labels): super().__init__(env, f=f) self.observation_space = remove_task_labels(self.env.observation_space) @classmethod def space_change(cls, input_space: gym.Space) -> gym.Space: assert isinstance(input_space, spaces.Tuple), input_space # assert len(input_space) == 2, input_space return input_space[0] class FixedTaskLabelWrapper(IterableWrapper): """Wrapper that adds always the same given task id to the observations. Used when the list of envs for each task is passed, so that each env also has the task id as part of their observation space and in their observations. """ def __init__(self, env: gym.Env, task_label: Optional[int], task_label_space: gym.Space): super().__init__(env=env) self.task_label = task_label self.task_label_space = task_label_space self.observation_space = add_task_labels( self.env.observation_space, task_labels=task_label_space ) def observation(self, observation: Union[ObservationType, Any]) -> ObservationType: return add_task_labels(observation, self.task_label) def reset(self): return self.observation(super().reset()) def step(self, action): obs, reward, done, info = super().step(action) return self.observation(obs), reward, done, info ================================================ FILE: sequoia/settings/rl/wrappers/typed_objects.py ================================================ from dataclasses import fields import dataclasses from functools import singledispatch from typing import Any, Dict, Sequence, Tuple, TypeVar, Union import gym import numpy as np from gym import Space, spaces from torch import Tensor from sequoia.common.gym_wrappers import IterableWrapper from sequoia.common.gym_wrappers.convert_tensors import supports_tensors from sequoia.common.spaces import TypedDictSpace from sequoia.common.spaces.named_tuple import NamedTupleSpace from sequoia.settings.base.environment import Environment from sequoia.settings.base.objects import ( Actions, ActionType, Observations, ObservationType, Rewards, RewardType, ) T = TypeVar("T") class TypedObjectsWrapper(IterableWrapper, Environment[ObservationType, ActionType, RewardType]): """Wrapper that converts the observations and rewards coming from the env to `Batch` objects. NOTE: Not super necessary atm, but this would perhaps be useful if methods are built and expect to have a given 'type' of observations to work with, then any new setting that inherits from their target setting should have observations that subclass/inherit from the observations of their parent, so as not to break compatibility. For example, if a Method targets the ClassIncrementalSetting, then it expects to receive "observations" of the type described by ClassIncrementalSetting.Observations, and if it were to be applied on a TaskIncrementalSLSetting (which inherits from ClassIncrementalSetting), then the observations from that setting should be isinstances (or subclasses of) the Observations class that this method was designed to receive! """ def __init__( self, env: gym.Env, observations_type: ObservationType, rewards_type: RewardType, actions_type: ActionType, observation_space: TypedDictSpace = None, action_space: TypedDictSpace = None, reward_space: TypedDictSpace = None, ): self.Observations = observations_type self.Rewards = rewards_type self.Actions = actions_type super().__init__(env=env) observation_fields = fields(self.Observations) action_fields = fields(self.Actions) reward_fields = fields(self.Rewards) if not all([observation_fields, action_fields, reward_fields]): raise RuntimeError( f"The Observations/Actions/Rewards classes passed to the TypedObjectsWrapper all need to have at least one field!" ) simple_spaces = (spaces.Box, spaces.Discrete, spaces.MultiDiscrete, spaces.MultiBinary) num_envs = getattr(self.env, "num_envs", None) # Set the observation space. if observation_space: self.observation_space = observation_space elif isinstance(self.env.observation_space, spaces.Dict): # Convert the spaces.Dict into a TypedDictSpace, or replace a TypedDictSpace's `dtype`. self.observation_space = TypedDictSpace( spaces=self.env.observation_space.spaces, dtype=self.Observations, ) elif isinstance(self.env.observation_space, simple_spaces): # we can get away with this since the class has only one field and the space is simple. field_name = observation_fields[0].name if len(observation_fields) > 1: # all the other fields need to have a default value, since the space doesn't have any. # TODO: Create a `ConstantSpace`, `NoneSpace`. If a field has `None` default value, # put a required_fields = [ f for f in observation_fields if f.default is dataclasses.MISSING and f.default_factory is dataclasses.MISSING and f.init ] required_field_names = [f.name for f in required_fields] if any(f.name != field_name for f in required_fields): raise NotImplementedError( f"Can't infer the observaiton space is given class {self.Observations}, " f"since has required fields {required_field_names} " f"that aren't present in the observation space. " ) self.observation_space = TypedDictSpace( spaces={field_name: self.env.observation_space}, dtype=self.Observations ) else: raise NotImplementedError( f"Need to pass the observation space to the TypedObjectsWrapper constructor when " f"the wrapped env's observation space isn't already a Dict or TypedDictSpace and " f"`Observations` has more than one field. (Observations: {self.Observations}, " f"observation_fields: {[f.name for f in observation_fields]})" ) # Set/construct the action space. if action_space: self.action_space = action_space elif isinstance(self.env.action_space, spaces.Dict): # Convert the spaces.Dict into a TypedDictSpace, or replace a TypedDictSpace's `dtype`. self.action_space = TypedDictSpace( spaces=self.env.action_space.spaces, dtype=self.Actions, ) elif (isinstance(self.env.action_space, simple_spaces) and len(action_fields) == 1) or ( isinstance(self.env.action_space, spaces.Tuple) and num_envs ): field_name = action_fields[0].name self.action_space = TypedDictSpace( spaces={field_name: self.env.action_space}, dtype=self.Actions ) else: raise NotImplementedError( "Need to pass the action space to the TypedObjectsWrapper constructor when " "the wrapped env's action space isn't already a Dict or TypedDictSpace and " "the Actions class doesn't have just one field." f"(wrapped action space: {self.env.action_space}, Actions: {self.Actions})" ) # Set / construct the reward space. # Get the default reward space in case the wrapped env doesn't have a `reward_space` attr. default_reward_space = spaces.Box( low=self.env.reward_range[0], high=self.env.reward_range[1], shape=((num_envs,) if num_envs is not None else ()), dtype=np.float64, ) if reward_space: self.reward_space = reward_space elif not hasattr(self.env, "reward_space"): if len(reward_fields) != 1: raise NotImplementedError( "Need to pass the reward space to the TypedObjectsWrapper constructor when " "the wrapped env doesn't have a `reward_space` attribute and the Rewards " "class has more than one field." ) field_name = reward_fields[0].name self.reward_space = TypedDictSpace( spaces={field_name: default_reward_space}, dtype=self.Rewards, ) elif isinstance(self.env.reward_space, spaces.Dict): # Convert the spaces.Dict into a TypedDictSpace, or replace a TypedDictSpace's `dtype`. self.reward_space = TypedDictSpace( spaces=self.env.reward_space.spaces, dtype=self.Rewards, ) elif isinstance(self.env.reward_space, simple_spaces) and len(reward_fields) == 1: field_name = reward_fields[0].name self.reward_space = TypedDictSpace( spaces={field_name: self.env.reward_space}, dtype=self.Rewards, ) else: raise NotImplementedError( "Need to pass the reward space to the TypedObjectsWrapper constructor when " "the wrapped env's reward space isn't already a Dict or TypedDictSpace and " "the Rewards class doesn't have just one field." ) # TODO: Using a TypedDictSpace for the action/reward spaces is a small change in code, but # will most likely have a large impact on all the methods and tests! # THis here can be used to 'turn off' the changes to those spaces done above: self.action_space = self.env.action_space self.reward_space = getattr(self.env, "reward_space", default_reward_space) # if isinstance(self.env.observation_space, NamedTupleSpace): # self.observation_space = self.env.observation_space # self.observation_space.dtype = self.Observations def step( self, action: ActionType ) -> Tuple[ ObservationType, RewardType, Union[bool, Sequence[bool]], Union[Dict, Sequence[Dict]] ]: # "unwrap" the actions before passing it to the wrapped environment. action = self.action(action) observation, reward, done, info = self.env.step(action) # TODO: Make the observation space a Dict observation = self.observation(observation) reward = self.reward(reward) return observation, reward, done, info def observation(self, observation: Any) -> ObservationType: if isinstance(observation, self.Observations): return observation if isinstance(observation, tuple): # TODO: Dissallow this: shouldn't handle tuples since they can be quite ambiguous. # assert False, observation return self.Observations(*observation) if isinstance(observation, dict): try: return self.Observations(**observation) except TypeError: assert False, (self.Observations, observation) assert isinstance(observation, (Tensor, np.ndarray)) return self.Observations(observation) def action(self, action: ActionType) -> Any: # TODO: Assert this eventually # assert isinstance(action, Actions), action if isinstance(action, Actions): action = action.y_pred if isinstance(action, Tensor) and not supports_tensors(self.env.action_space): action = action.detach().cpu().numpy() if action not in self.env.action_space: if isinstance(self.env.action_space, spaces.Tuple): action = tuple(action) return action def reward(self, reward: Any) -> RewardType: return self.Rewards(reward) def reset(self, **kwargs) -> ObservationType: observation = self.env.reset(**kwargs) return self.observation(observation) def __iter__(self): for batch in self.env: if isinstance(batch, tuple) and len(batch) == 2: yield self.observation(batch[0]), self.reward(batch[1]) elif isinstance(batch, tuple) and len(batch) == 1: yield self.observation(batch[0]) else: yield self.observation(batch) def send(self, action: ActionType) -> RewardType: action = self.action(action) reward = self.env.send(action) return self.reward(reward) # TODO: turn unwrap into a single-dispatch callable. # TODO: Atm 'unwrap' basically means "get rid of everything apart from the first # item", which is a bit ugly. # Unwrap should probably be a method on the corresponding `Batch` class, which could # maybe accept a Space to fit into? @singledispatch def unwrap(obj: Any) -> Any: return obj # raise NotImplementedError(obj) @unwrap.register(int) @unwrap.register(float) @unwrap.register(np.ndarray) @unwrap.register(list) def _unwrap_scalar(v): return v @unwrap.register(Actions) def _unwrap_actions(obj: Actions) -> Union[Tensor, np.ndarray]: return obj.y_pred @unwrap.register(Rewards) def _unwrap_rewards(obj: Rewards) -> Union[Tensor, np.ndarray]: return obj.y @unwrap.register(Observations) def _unwrap_observations(obj: Observations) -> Union[Tensor, np.ndarray]: # This gets rid of everything except just the image. # TODO: Keep the task labels? or no? For now, no. return obj.x @unwrap.register(NamedTupleSpace) def _unwrap_space(obj: NamedTupleSpace) -> Space: # This gets rid of everything except just the first item in the space. # TODO: Keep the task labels? or no? For now, no. return obj[0] @unwrap.register(TypedDictSpace) def _unwrap_space(obj: TypedDictSpace) -> spaces.Dict: # This gets rid of everything except just the first item in the space. # TODO: Keep the task labels? or no? For now, no. return spaces.Dict(obj.spaces) class NoTypedObjectsWrapper(IterableWrapper): """Does the opposite of the 'TypedObjects' wrapper. Can be added on top of that wrapper to strip off the typed objects it returns and just returns tensors/np.ndarrays instead. This is used for example when applying a method from stable-baselines3, as they only want to get np.ndarrays as inputs. Parameters ---------- IterableWrapper : [type] [description] """ def __init__(self, env: gym.Env): super().__init__(env) self.observation_space = unwrap(self.env.observation_space) def step(self, action): if isinstance(action, Actions): action = unwrap(action) if hasattr(action, "detach"): action = action.detach() assert action in self.action_space, (action, type(action), self.action_space) observation, reward, done, info = self.env.step(action) observation = unwrap(observation) reward = unwrap(reward) return observation, reward, done, info def reset(self, **kwargs): observation = self.env.reset(**kwargs) return unwrap(observation) ================================================ FILE: sequoia/settings/settings.puml ================================================ @startuml settings ' skinparam linetype polyline ' skinparam linetype ortho ' skinparam classFontSize 20 ' fieldFontSize 20 ' !include gym.puml ' !include assumptions/assumptions.puml hide empty members ' hide fields ' hide methods ' ' Use this to turn on / off the display of assumptions ' remove Assumptions ' ' Use this to turn on / off groups of assumptions ' remove supervision_assumptions ' remove action_space_assumption ' remove Settings ' Comment/uncomment this to show/hide the descriptions for each node. ' hide fields package settings as sequoia.settings { ' !include base/base.puml ' package settings.base { ' } package settings.assumptions { !include assumptions/assumptions.puml remove assumptions remove <> remove <> remove <> remove <> ' remove supervision_assumptions ' remove context_assumption_family ' remove <> } ' !include settings/rl/rl.puml package rl { ' ContinualRLSetting -.- rl.continuous.ContinuousTaskAgnosticRLSetting abstract class RLSetting <> extends SparseFeedback, ActiveEnvironment {} package continuous as rl.continuous { class ContinuousTaskAgnosticRLSetting <> implements RLSetting, ContinuousTaskAgnosticSetting {} } package discrete as rl.discrete { class DiscreteTaskAgnosticRLSetting <> implements DiscreteTaskAgnosticSetting, ContinuousTaskAgnosticRLSetting {} } package incremental as rl.incremental { class IncrementalRLSetting <> implements IncrementalSetting, DiscreteTaskAgnosticRLSetting {} } package class_incremental as rl.class_incremental { class ClassIncrementalRLSetting <> implements ClassIncrementalSetting, IncrementalRLSetting {} } package domain_incremental as rl.domain_incremental { class DomainIncrementalRLSetting <> implements DomainIncrementalSetting, IncrementalRLSetting {} } package traditional as rl.traditional { class TraditionalRLSetting <> implements TraditionalSetting, IncrementalRLSetting {} } package task_incremental as rl.task_incremental { class TaskIncrementalRLSetting <> implements TaskIncrementalSetting, IncrementalRLSetting {} } package multi_task as rl.multi_task { class MultiTaskRLSetting <> implements MultiTaskSetting, TaskIncrementalRLSetting, TraditionalRLSetting {} } remove rl.class_incremental remove rl.domain_incremental } ' !include settings/rl/sl.puml package sl { abstract class SLSetting <> extends DenseFeedback, PassiveEnvironment {} package continuous as sl.continuous { class ContinuousTaskAgnosticSLSetting <> implements SLSetting, ContinuousTaskAgnosticSetting {} } package discrete as sl.discrete { class DiscreteTaskAgnosticSLSetting <> implements DiscreteTaskAgnosticSetting, ContinuousTaskAgnosticSLSetting {} } package incremental as sl.incremental { class IncrementalSLSetting <> implements IncrementalSetting, DiscreteTaskAgnosticSLSetting {} } package class_incremental as sl.class_incremental { class ClassIncrementalSLSetting <> implements ClassIncrementalSetting, IncrementalSLSetting {} } package domain_incremental as sl.domain_incremental { class DomainIncrementalSLSetting <> implements DomainIncrementalSetting, IncrementalSLSetting {} } package traditional as sl.traditional { class TraditionalSLSetting <> implements TraditionalSetting, IncrementalSLSetting {} } package task_incremental as sl.task_incremental { class TaskIncrementalSLSetting <> implements TaskIncrementalSetting, IncrementalSLSetting {} } package multi_task as sl.multi_task { class MultiTaskSLSetting <> implements MultiTaskSetting, TaskIncrementalSLSetting, TraditionalSLSetting {} } remove sl.class_incremental remove sl.domain_incremental } } @enduml ================================================ FILE: sequoia/settings/sl/README.md ================================================ # SL Tree This is the Tree of Setting on the RL side. ================================================ FILE: sequoia/settings/sl/__init__.py ================================================ from .. import Results from .environment import PassiveEnvironment # TODO: Replace all uses of 'PassiveEnvironment' with 'SLEnvironment' SLEnvironment = PassiveEnvironment from .continual import ContinualSLSetting from .discrete import DiscreteTaskAgnosticSLSetting from .incremental import IncrementalSLSetting from .setting import SLSetting # NOTE: Class-Incremental is now the same as IncrementalSLSetting. # from .class_incremental import ClassIncrementalSetting ClassIncrementalSetting = IncrementalSLSetting from .domain_incremental import DomainIncrementalSLSetting from .multi_task import MultiTaskSLSetting from .task_incremental import TaskIncrementalSLSetting from .traditional import TraditionalSLSetting # TODO: Import variants without the 'SL' in it above, and then don't include then in the # __all__ below, to improve backward compatibility a bit. # __all__ = [ # "PassiveEnvironment", # "SLSetting", ... # ] ================================================ FILE: sequoia/settings/sl/continual/__init__.py ================================================ from .environment import ContinualSLEnvironment, ContinualSLTestEnvironment from .objects import Actions, Observations, ObservationSpace, Rewards from .setting import ContinualSLSetting Environment = ContinualSLEnvironment TestEnvironment = ContinualSLTestEnvironment ================================================ FILE: sequoia/settings/sl/continual/environment.py ================================================ """ Continual SL environment. (smooth task boundaries, etc) """ import warnings from functools import partial from typing import Any, Callable, Dict, Optional, Sequence, Tuple, Type, Union import gym import numpy as np from continuum.datasets import ( CIFAR10, CIFAR100, EMNIST, KMNIST, MNIST, QMNIST, CIFARFellowship, Core50, Core50v2_79, Core50v2_196, Core50v2_391, FashionMNIST, ImageNet100, ImageNet1000, MNISTFellowship, Synbols, ) from gym import Space, spaces from torch import Tensor from torch.nn import functional as F from torch.utils.data import Dataset, IterableDataset from sequoia.common.gym_wrappers.convert_tensors import add_tensor_support as tensor_space from sequoia.common.gym_wrappers.utils import tile_images from sequoia.common.spaces import Image, TypedDictSpace from sequoia.common.transforms import Transforms from sequoia.settings.sl.environment import PassiveEnvironment from sequoia.utils.logging_utils import get_logger from .objects import Actions, ActionType, Observations, ObservationType, Rewards, RewardType logger = get_logger(__name__) base_observation_spaces: Dict[str, Space] = { dataset_class.__name__.lower(): space for dataset_class, space in { MNIST: tensor_space(Image(0, 1, shape=(1, 28, 28))), FashionMNIST: tensor_space(Image(0, 1, shape=(1, 28, 28))), KMNIST: tensor_space(Image(0, 1, shape=(1, 28, 28))), EMNIST: tensor_space(Image(0, 1, shape=(1, 28, 28))), QMNIST: tensor_space(Image(0, 1, shape=(1, 28, 28))), MNISTFellowship: tensor_space(Image(0, 1, shape=(1, 28, 28))), # TODO: Determine the true bounds on the image values in cifar10. # Appears to be ~= [-2.5, 2.5] CIFAR10: tensor_space(Image(-np.inf, np.inf, shape=(3, 32, 32))), CIFAR100: tensor_space(Image(-np.inf, np.inf, shape=(3, 32, 32))), CIFARFellowship: tensor_space(Image(-np.inf, np.inf, shape=(3, 32, 32))), ImageNet100: tensor_space(Image(0, 1, shape=(224, 224, 3))), ImageNet1000: tensor_space(Image(0, 1, shape=(224, 224, 3))), Core50: tensor_space(Image(0, 1, shape=(224, 224, 3))), Core50v2_79: tensor_space(Image(0, 1, shape=(224, 224, 3))), Core50v2_196: tensor_space(Image(0, 1, shape=(224, 224, 3))), Core50v2_391: tensor_space(Image(0, 1, shape=(224, 224, 3))), Synbols: tensor_space(Image(0, 1, shape=(3, 32, 32))), }.items() } base_action_spaces: Dict[str, Space] = { dataset_class.__name__.lower(): space for dataset_class, space in { MNIST: spaces.Discrete(10), FashionMNIST: spaces.Discrete(10), KMNIST: spaces.Discrete(10), EMNIST: spaces.Discrete(10), QMNIST: spaces.Discrete(10), MNISTFellowship: spaces.Discrete(30), CIFAR10: spaces.Discrete(10), CIFAR100: spaces.Discrete(100), CIFARFellowship: spaces.Discrete(110), ImageNet100: spaces.Discrete(100), ImageNet1000: spaces.Discrete(1000), Core50: spaces.Discrete(50), Core50v2_79: spaces.Discrete(50), Core50v2_196: spaces.Discrete(50), Core50v2_391: spaces.Discrete(50), Synbols: spaces.Discrete(48), }.items() } # NOTE: Since the current SL datasets are image classification, the reward spaces are # the same as the action space. But that won't be the case when we add other types of # datasets! base_reward_spaces: Dict[str, Space] = { dataset_name: action_space for dataset_name, action_space in base_action_spaces.items() if isinstance(action_space, spaces.Discrete) } def split_batch( batch: Tuple[Tensor, ...], hide_task_labels: bool, Observations=Observations, Rewards=Rewards, ) -> Tuple[Observations, Rewards]: """Splits the batch into a tuple of Observations and Rewards. Parameters ---------- batch : Tuple[Tensor, ...] A batch of data coming from the dataset. Returns ------- Tuple[Observations, Rewards] A tuple of Observations and Rewards. """ # In this context (class_incremental), we will always have 3 items per # batch, because we use the ClassIncremental scenario from Continuum. if len(batch) == 2 and all(isinstance(item, Tensor) for item in batch): x, y = batch t = None else: assert len(batch) == 3 x, y, t = batch if hide_task_labels: # Remove the task labels if we're not currently allowed to have # them. # TODO: Using None might cause some issues. Maybe set -1 instead? t = None observations = Observations(x=x, task_labels=t) rewards = Rewards(y=y) return observations, rewards # IDEA: Have this env be the 'wrapper' / base env type for the continual SL envs, and # register them in gym! def default_split_batch_function( hide_task_labels: bool, Observations: Type[ObservationType] = Observations, Rewards: Type[RewardType] = Rewards, ) -> Callable[[Tuple[Tensor, ...]], Tuple[ObservationType, RewardType]]: """Returns a callable that is used to split a batch into observations and rewards.""" return partial( split_batch, hide_task_labels=hide_task_labels, Observations=Observations, Rewards=Rewards, ) class ContinualSLEnvironment(PassiveEnvironment[ObservationType, ActionType, RewardType]): """Continual Supervised Learning Environment. TODO: Here we actually inform the environment of its observation / action / reward spaces, which isn't ideal, but is arguably better than giving the env the responsibility (and arguments needed) to create the datasets of each task for the right split, apply the transforms, of each task and to use the right train/val/test split """ def __init__( self, dataset: Union[Dataset, IterableDataset], hide_task_labels: bool = True, observation_space: TypedDictSpace = None, action_space: gym.Space = None, reward_space: gym.Space = None, Observations: Type[ObservationType] = Observations, Actions: Type[ActionType] = Actions, Rewards: Type[RewardType] = Rewards, split_batch_fn: Callable[[Tuple[Any, ...]], Tuple[ObservationType, ActionType]] = None, pretend_to_be_active: bool = False, strict: bool = False, one_epoch_only: bool = True, drop_last: bool = False, **kwargs, ): assert isinstance(dataset, Dataset) self._hide_task_labels = hide_task_labels split_batch_fn = default_split_batch_function( hide_task_labels=hide_task_labels, Observations=Observations, Rewards=Rewards, # TODO: Fix this 'Rewards' being of the 'wrong' type. ) self._one_epoch_only = one_epoch_only super().__init__( dataset=dataset, split_batch_fn=split_batch_fn, observation_space=observation_space, action_space=action_space, reward_space=reward_space, pretend_to_be_active=pretend_to_be_active, strict=strict, drop_last=drop_last, **kwargs, ) # TODO: Clean up the batching of a Sparse(Discrete) space so its less ugly. def step( self, action: ActionType ) -> Tuple[ObservationType, Optional[RewardType], bool, Sequence[Dict]]: obs, reward, done, info = super().step(action) if done and self._one_epoch_only: self.close() return obs, reward, done, info def __iter__(self): yield from super().__iter__() if self._one_epoch_only: self.close() # TODO: Remove / fix this 'split batch function'. The problem is that we need to # tell the environment how to take the three items from continuum and convert them # into from pathlib import Path from typing import Optional import torch from sequoia.common.config import Config from sequoia.common.gym_wrappers import has_wrapper from sequoia.common.metrics import ClassificationMetrics from sequoia.settings.assumptions.continual import TestEnvironment from sequoia.utils.logging_utils import get_logger from .results import ContinualSLResults class ContinualSLTestEnvironment(TestEnvironment[ContinualSLEnvironment]): def __init__( self, env: ContinualSLEnvironment, directory: Path, hide_task_labels: bool = True, step_limit: Optional[int] = None, no_rewards: bool = False, config: Config = None, **kwargs, ): from .wrappers import ShowLabelDistributionWrapper if not has_wrapper(env, ShowLabelDistributionWrapper): env = ShowLabelDistributionWrapper(env, env_name="test") super().__init__( env, directory=directory, step_limit=step_limit, no_rewards=no_rewards, config=config, **kwargs, ) # IDEA: Make the env give us the task ids, and then hide them again after, just # so we can get propper 'per-task' metrics. # NOTE: This wouldn't be ideal however, as would assume that there is a 'discrete' # set of values for the task id, which is only true in Classification datasets. assert isinstance(self.env.unwrapped, ContinualSLEnvironment) self.env.unwrapped.hide_task_labels = False self._steps = 0 self.results = ContinualSLResults() self._reset = False self.action_: Optional[ActionType] = None from collections import deque self.observation_queue = deque(maxlen=3) def get_results(self) -> ContinualSLResults: from .wrappers import ShowLabelDistributionWrapper if has_wrapper(self, ShowLabelDistributionWrapper): self.results.plots_dict["Label distribution"] = self.env.make_figure() return self.results def __iter__(self): """BUG: The iter/send type of test loop doesn't produce any results!""" assert self.unwrapped.pretend_to_be_active # obs = self.reset() # self.observations = obs # yield obs, None self._before_reset() for i, (obs, rewards) in enumerate(self.env.__iter__()): if i == 0: self._after_reset(obs) if len(self.observation_queue) == self.observation_queue.maxlen: raise RuntimeError( f"Can't consume more than {self.observation_queue.maxlen} batches " f"in a row without sending an action!" ) self.observation_queue.append(obs) if self.no_rewards: rewards = None yield obs, rewards self.close() def send(self, actions: ActionType) -> Optional[RewardType]: self._before_step(actions) rewards = self.env.send(actions) obs = self.observation_queue.popleft() info = getattr(obs, "info", {}) done = self.get_total_steps() >= self.step_limit self._after_step(obs, rewards, done, info) if self.no_rewards: rewards = None return rewards def reset(self): return super().reset() # if not self._reset: # logger.debug("Initial reset.") # self._reset = True # return super().reset() # else: # # TODO: Why is this a good thing again? Why not just let an 'EpisodeLimit' # # wrapper handle this? # logger.debug("Resetting the env closes it. (only one episode in SL)") # self.close() # return None def _before_step(self, action): self.action_ = action return super()._before_step(action) def _after_step(self, observation, reward, done, info): # TODO: Fix this once we actually use a ClassificationAction! if not isinstance(reward, Rewards): reward = Rewards(y=torch.as_tensor(reward)) batch_size = reward.batch_size action = self.action_ assert action is not None if isinstance(self.action_space, (spaces.MultiDiscrete, spaces.MultiBinary)): n_classes = self.action_space.nvec[0] from sequoia.settings.assumptions.task_type import ClassificationActions if not isinstance(action, ClassificationActions): if isinstance(action, Actions): y_pred = action.y_pred # 'upgrade', creating some fake logits. else: y_pred = torch.as_tensor(action) fake_logits = F.one_hot(y_pred, n_classes) action = ClassificationActions(y_pred=y_pred, logits=fake_logits) else: raise NotImplementedError( f"TODO: Remove the assumption here that the env is a classification env " f"({self.action_space}, {self.reward_space})" ) if action.batch_size != reward.batch_size: warnings.warn( RuntimeWarning( f"Truncating the action since its batch size {action.batch_size} " f"is larger than the rewards': ({reward.batch_size})" ) ) action = action[:, : reward.batch_size] # TODO: Use some kind of generic `get_metrics(actions: Actions, rewards: Rewards)` # function instead. y = reward.y logits = action.logits y_pred = action.y_pred metric = ClassificationMetrics(y=y, logits=logits, y_pred=y_pred) self.results.metrics.append(metric) self._steps += 1 # Debugging issue with Monitor class: # return super()._after_step(observation, reward, done, info) if not self.enabled: return done if done and self.env_semantics_autoreset: # For envs with BlockingReset wrapping VNCEnv, this observation will be the # first one of the new episode if self.config.render: self.reset_video_recorder() self.episode_id += 1 self._flush() # Record stats: (TODO: accuracy serves as the 'reward'!) reward_for_stats = metric.accuracy self.stats_recorder.after_step(observation, reward_for_stats, done, info) # Record video if self.config.render: self.video_recorder.capture_frame() return done ## def _after_reset(self, observation: ObservationType): image_batch = observation.numpy().x # Need to create a single image with the right dtype for the Monitor # from gym to create gifs / videos with it. if self.batch_size: # Need to tile the image batch so it can be seen as a single image # by the Monitor. image_batch = tile_images(image_batch) image_batch = Transforms.channels_last_if_needed(image_batch) if image_batch.dtype == np.float32: assert (0 <= image_batch).all() and (image_batch <= 1).all() image_batch = (256 * image_batch).astype(np.uint8) assert image_batch.dtype == np.uint8 # Debugging this issue here: # super()._after_reset(image_batch) # -- Code from Monitor if not self.enabled: return # Reset the stat count self.stats_recorder.after_reset(observation) if self.config and self.config.render: self.reset_video_recorder() # Bump *after* all reset activity has finished self.episode_id += 1 self._flush() # -- def render(self, mode="human", **kwargs): # NOTE: This doesn't get called, because the video recorder uses # self.env.render(), rather than self.render() # TODO: Render when the 'render' argument in config is set to True. image_batch = super().render(mode=mode, **kwargs) if mode == "rgb_array" and self.batch_size: image_batch = tile_images(image_batch) return image_batch ================================================ FILE: sequoia/settings/sl/continual/environment_test.py ================================================ """ TODO: Tests for the TestEnvironment of the ContinualSLSetting. """ from pathlib import Path from typing import ClassVar, Type import gym import numpy as np import pytest from torch.utils.data import Subset from torchvision.datasets import MNIST from sequoia.common.config import Config from sequoia.common.metrics import ClassificationMetrics from sequoia.common.spaces import Image from sequoia.common.transforms import Compose, Transforms from sequoia.settings.sl.environment import PassiveEnvironment from .environment import ContinualSLEnvironment, ContinualSLTestEnvironment from .results import ContinualSLResults class TestContinualSLTestEnvironment: Environment: ClassVar[Type[Environment]] = ContinualSLEnvironment TestEnvironment: ClassVar[Type[TestEnvironment]] = ContinualSLTestEnvironment @pytest.fixture() def base_env(self): batch_size = 5 transforms = Compose([Transforms.to_tensor, Transforms.three_channels]) dataset = MNIST( "data", transform=Compose([Transforms.to_tensor, Transforms.three_channels]) ) max_samples = 100 dataset = Subset(dataset, list(range(max_samples))) obs_space = Image(0, 255, (1, 28, 28), np.uint8) obs_space = transforms(obs_space) env = self.Environment( dataset, n_classes=10, batch_size=batch_size, observation_space=obs_space, pretend_to_be_active=True, drop_last=False, ) assert env.observation_space == Image(0, 1, (batch_size, 3, 28, 28)) assert env.action_space.shape == (batch_size,) assert env.reward_space == env.action_space return env @pytest.mark.parametrize("no_rewards", [True, False]) def test_iteration_produces_results( self, no_rewards: bool, base_env: ContinualSLEnvironment, tmp_path: Path, config: Config, ): """TODO: Test that when iterating through the env as a dataloader and sending actions produces results. """ env = self.TestEnvironment( base_env, directory=tmp_path, step_limit=100 // base_env.batch_size, no_rewards=no_rewards, ) env.config = config for obs, rewards in env: assert rewards is None action = env.action_space.sample() rewards = env.send(action) assert (rewards is None) == env.no_rewards assert env.is_closed() results = env.get_results() self.validate_results(results) def validate_results(self, results: ContinualSLResults): assert isinstance(results, ContinualSLResults) assert isinstance(results.average_metrics, ClassificationMetrics) assert results.objective > 0 # TODO: Fix this problem: assert results.average_metrics.n_samples in [95, 100] @pytest.mark.parametrize("no_rewards", [True, False]) def test_gym_interaction_produces_results( self, no_rewards: bool, base_env: PassiveEnvironment, tmp_path: Path, config: Config ): """TODO: Test that when iterating through the env as a dataloader and sending actions produces results. """ env = self.TestEnvironment( base_env, directory=tmp_path, step_limit=100 // base_env.batch_size, no_rewards=no_rewards, ) env.config = config done = False obs = env.reset() steps = 0 while not done: action = env.action_space.sample() obs, rewards, done, info = env.step(action) steps += 1 assert (rewards is None) == env.no_rewards if steps > 20: pytest.fail("Shouldn't have gone longer than 20 steps!") # BUG: There's currently a weird off-by-1 error with the total number of steps, # which makes these checks for `is_closed()` fail. However, in practice we don't # try to iterate twice on the env, so it's not a big deal. # FIXME: Fix this check: assert env.is_closed() # FIXME: Fix this check: with pytest.raises((gym.error.ClosedEnvironmentError, gym.error.Error)): env.reset() # FIXME: Fix this check: with pytest.raises(gym.error.ClosedEnvironmentError): _ = env.step(env.action_space.sample()) results = env.get_results() self.validate_results(results) ================================================ FILE: sequoia/settings/sl/continual/envs.py ================================================ """ Utility functions for determining the observation space for a given SL dataset. """ from typing import Any, Dict, List, Optional, Sequence import gym import numpy as np import torch from continuum.datasets import ( CIFAR10, CIFAR100, EMNIST, KMNIST, MNIST, QMNIST, CIFARFellowship, Core50, Core50v2_79, Core50v2_196, Core50v2_391, FashionMNIST, ImageNet100, ImageNet1000, MNISTFellowship, Synbols, ) from continuum.tasks import TaskSet from gym import Space, spaces from torch.utils.data import Subset, TensorDataset from sequoia.common.spaces import ImageTensorSpace, TensorBox, TensorDiscrete from sequoia.common.spaces.image import could_become_image from sequoia.utils.logging_utils import get_logger logger = get_logger(__name__) base_observation_spaces: Dict[str, Space] = { dataset_class.__name__.lower(): space for dataset_class, space in { MNIST: ImageTensorSpace(0, 1, shape=(1, 28, 28)), FashionMNIST: ImageTensorSpace(0, 1, shape=(1, 28, 28)), KMNIST: ImageTensorSpace(0, 1, shape=(1, 28, 28)), EMNIST: ImageTensorSpace(0, 1, shape=(1, 28, 28)), QMNIST: ImageTensorSpace(0, 1, shape=(1, 28, 28)), MNISTFellowship: ImageTensorSpace(0, 1, shape=(1, 28, 28)), # TODO: Determine the true bounds on the image values in cifar10. # Appears to be ~= [-2.5, 2.5] CIFAR10: ImageTensorSpace(-np.inf, np.inf, shape=(3, 32, 32)), CIFAR100: ImageTensorSpace(-np.inf, np.inf, shape=(3, 32, 32)), CIFARFellowship: ImageTensorSpace(-np.inf, np.inf, shape=(3, 32, 32)), ImageNet100: ImageTensorSpace(0, 1, shape=(224, 224, 3)), ImageNet1000: ImageTensorSpace(0, 1, shape=(224, 224, 3)), Core50: ImageTensorSpace(0, 1, shape=(224, 224, 3)), Core50v2_79: ImageTensorSpace(0, 1, shape=(224, 224, 3)), Core50v2_196: ImageTensorSpace(0, 1, shape=(224, 224, 3)), Core50v2_391: ImageTensorSpace(0, 1, shape=(224, 224, 3)), Synbols: ImageTensorSpace(0, 1, shape=(3, 32, 32)), }.items() } base_action_spaces: Dict[str, Space] = { dataset_class.__name__.lower(): space for dataset_class, space in { MNIST: spaces.Discrete(10), FashionMNIST: spaces.Discrete(10), KMNIST: spaces.Discrete(10), EMNIST: spaces.Discrete(10), QMNIST: spaces.Discrete(10), MNISTFellowship: spaces.Discrete(30), CIFAR10: spaces.Discrete(10), CIFAR100: spaces.Discrete(100), CIFARFellowship: spaces.Discrete(110), ImageNet100: spaces.Discrete(100), ImageNet1000: spaces.Discrete(1000), Core50: spaces.Discrete(50), Core50v2_79: spaces.Discrete(50), Core50v2_196: spaces.Discrete(50), Core50v2_391: spaces.Discrete(50), Synbols: spaces.Discrete(48), }.items() } # NOTE: Since the current SL datasets are image classification, the reward spaces are # the same as the action space. But that won't be the case when we add other types of # datasets! base_reward_spaces: Dict[str, Space] = { dataset_name: action_space for dataset_name, action_space in base_action_spaces.items() if isinstance(action_space, spaces.Discrete) } CTRL_INSTALLED: bool = False CTRL_STREAMS: List[str] = [] CTRL_NB_TASKS: Dict[str, Optional[int]] = {} try: from ctrl.tasks.task import Task from ctrl.tasks.task_generator import TaskGenerator except ImportError as exc: logger.debug(f"ctrl-bench isn't installed: {exc}") # Creating those just for type hinting. class Task: pass class TaskGenerator: pass else: CTRL_INSTALLED = True CTRL_STREAMS = ["s_plus", "s_minus", "s_in", "s_out", "s_pl", "s_long"] n_tasks = [5, 5, 5, 5, 4, None] CTRL_NB_TASKS = dict(zip(CTRL_STREAMS, n_tasks)) x_dims = [(3, 32, 32)] * len(CTRL_STREAMS) n_classes = [10, 10, 10, 10, 10, 5] for i, stream_name in enumerate(CTRL_STREAMS): # Create the 'base observation space' for this stream. obs_space = ImageTensorSpace(0, 1, shape=x_dims[i], dtype=torch.float32) # TODO: Not sure if the classes should be considered 'shared' or 'distinct'. # For now assume they are shared, so the setting's action space is always [0, 5] # but the action changes. # total_n_classes = n_tasks[i] * n_classes[i] # action_space = TensorDiscrete(n=total_n_classes) n_classes_per_task = n_classes[i] action_space = TensorDiscrete(n=n_classes_per_task) base_observation_spaces[stream_name] = obs_space base_action_spaces[stream_name] = action_space from functools import singledispatch @singledispatch def get_observation_space(dataset: Any) -> gym.Space: raise NotImplementedError( f"Don't yet have a registered handler to get the observation space of dataset " f"{dataset}." ) @get_observation_space.register(Subset) def _get_observation_space_for_subset(dataset: Subset) -> gym.Space: # The observations space of a Subset dataset is actually the same as the original # dataset. return get_observation_space(dataset.dataset) @get_observation_space.register(str) def _get_observation_space_for_dataset_name(dataset: str) -> gym.Space: if dataset not in base_observation_spaces: raise NotImplementedError( f"Can't yet tell what the 'base' observation space is for dataset " f"{dataset} because it doesn't have an entry in the " f"`base_observation_spaces` dict." ) return base_observation_spaces[dataset] @get_observation_space.register(TaskSet) def _get_observation_space_for_taskset(dataset: TaskSet) -> gym.Space: assert False, dataset # return get_observation_space(type(dataset).__name__.lower()) @get_observation_space.register(TensorDataset) def _get_observation_space_for_tensor_dataset(dataset: TensorDataset) -> gym.Space: x = dataset.tensors[0] if not (1 <= len(dataset.tensors) <= 2) or not (2 <= x.dim()): raise NotImplementedError( f"For now, can only handle TensorDatasets with 1 or 2 tensors. (x and y) " f"but dataset {dataset} has {len(dataset.tensors)}!" ) low = x.min().cpu().item() high = x.max().cpu().item() obs_space = TensorBox(low=low, high=high, shape=x.shape[1:], dtype=x.dtype) if could_become_image(obs_space): obs_space = ImageTensorSpace.wrap(obs_space) return obs_space @singledispatch def get_action_space(dataset: Any) -> gym.Space: raise NotImplementedError( f"Don't yet have a registered handler to get the action space of dataset " f"{dataset}." ) @get_action_space.register(Subset) def _get_action_space_for_subset(dataset: Subset) -> gym.Space: # The actions space of a Subset dataset is actually the same as the original # dataset. return get_action_space(dataset.dataset) @get_action_space.register(str) def _get_action_space_for_dataset_name(dataset: str) -> gym.Space: if dataset not in base_action_spaces: raise NotImplementedError( f"Can't yet tell what the 'base' action space is for dataset " f"{dataset} because it doesn't have an entry in the " f"`base_action_spaces` dict." ) return base_action_spaces[dataset] @singledispatch def get_reward_space(dataset: Any) -> gym.Space: raise NotImplementedError( f"Don't yet have a registered handler to get the reward space of dataset " f"{dataset}." ) @get_reward_space.register(Subset) def _get_reward_space_for_subset(dataset: Subset) -> gym.Space: # The rewards space of a Subset dataset is *usually* the same as the original # dataset. # TODO: Need to check this though? Maybe we're taking only the indices with a given class return get_reward_space(dataset.dataset) @get_reward_space.register(str) def _get_reward_space_for_dataset_name(dataset: str) -> gym.Space: if dataset not in base_reward_spaces: raise NotImplementedError( f"Can't yet tell what the 'base' reward space is for dataset " f"{dataset} because it doesn't have an entry in the " f"`base_reward_spaces` dict." ) return base_reward_spaces[dataset] @get_reward_space.register(TensorDataset) @get_action_space.register(TensorDataset) def get_y_space_for_tensor_dataset(dataset: TensorDataset) -> gym.Space: if len(dataset.tensors) != 2: raise NotImplementedError( f"Only able to detect the action space of TensorDatasets if they have two " f"tensors for now (x and y), but dataset {dataset} has {len(dataset.tensors)}!" ) y = dataset.tensors[-1] low = y.min().item() high = y.max().item() y_sample_shape = y.shape[1:] if y.dtype.is_floating_point: return TensorBox(low, high, shape=y_sample_shape, dtype=y.dtype) # Integer y: if low == 0: n_classes = high + 1 return TensorDiscrete(n_classes) # TODO: Add a space like DiscreteWithOffset ? return TensorBox(low, high, shape=y_sample_shape, dtype=y.dtype) @get_action_space.register(list) @get_action_space.register(tuple) def _get_action_space_for_list_of_datasets(datasets: Sequence[TaskSet]) -> gym.Space: # TODO: IDEA: If given a list of datasets, try to find the 'union' of their spaces. # This is meant to be one potential solution to the case where custom datasets are # passed for each task, like [0, 2), [3, 4], etc. action_spaces = [get_action_space(dataset) for dataset in datasets] if isinstance(action_spaces[0], spaces.Discrete): lows = [0 if isinstance(space, spaces.Discrete) else space.low for space in action_spaces] highs = [ space.n - 1 if isinstance(space, spaces.Discrete) else space.high for space in action_spaces ] if isinstance(reward_spaces[0], spaces.Discrete) and min(lows) == 0: return TensorDiscrete(max(highs) + 1) raise NotImplementedError( f"Don't yet know how to get the 'union' of the action spaces ({action_spaces}) " f" of datasets {datasets}" ) @get_reward_space.register(list) @get_reward_space.register(tuple) def _get_reward_space_for_list_of_datasets(datasets: Sequence[TaskSet]) -> gym.Space: # TODO: IDEA: If given a list of datasets, try to find the 'union' of their spaces. # This is meant to be one potential solution to the case where custom datasets are # passed for each task, like [0, 2), [3, 4], etc. reward_spaces = [get_reward_space(dataset) for dataset in datasets] if isinstance(reward_spaces[0], spaces.Discrete): lows = [0 if isinstance(space, spaces.Discrete) else space.low for space in reward_spaces] highs = [ space.n - 1 if isinstance(space, spaces.Discrete) else space.high for space in reward_spaces ] if isinstance(reward_spaces[0], spaces.Discrete) and min(lows) == 0: return TensorDiscrete(max(highs) + 1) raise NotImplementedError( f"Don't yet know how to get the 'union' of the reward spaces ({reward_spaces}) " f" of datasets {datasets}" ) ================================================ FILE: sequoia/settings/sl/continual/objects.py ================================================ from dataclasses import dataclass from typing import Optional, TypeVar from gym import spaces from torch import Tensor from sequoia.common.spaces import ImageTensorSpace, Sparse, TypedDictSpace from sequoia.settings.assumptions.continual import ContinualAssumption from sequoia.settings.sl.setting import SLSetting @dataclass(frozen=True) class Observations(SLSetting.Observations, ContinualAssumption.Observations): """Observations from a Continual Supervised Learning environment.""" x: Tensor task_labels: Optional[Tensor] = None ObservationType = TypeVar("ObservationType", bound=Observations) import torch class ObservationSpace(TypedDictSpace[ObservationType]): """Observation space of a Continual SL Setting.""" # The sample space: this is a gym.spaces.Box subclass with added properties for # images, such as `channels`, `h`, `w`, `is_channels_first`, etc. # This space will return Tensors. x: ImageTensorSpace # The task label space: This is a gym.spaces.MultiDiscrete of Tensors. task_labels: Sparse[torch.LongTensor] # TODO: Eventually also use some kind of structured action and reward space! # TODO: Figure out how/where to switch the actions type to be specific to classification # from sequoia.settings.assumptions.task_type import ClassificationActions @dataclass(frozen=True) class Actions(SLSetting.Actions): """Actions to be sent to a Continual Supervised Learning environment.""" y_pred: Tensor class ActionSpace(TypedDictSpace): """Action space of a Continual SL Setting.""" y_pred: spaces.Space @dataclass(frozen=True) class Rewards(SLSetting.Rewards): """Rewards obtained from a Continual Supervised Learning environment.""" y: Tensor class RewardSpace(TypedDictSpace): """Reward space of a Continual SL Setting.""" y: spaces.Space ActionType = TypeVar("ActionType", bound=Actions) RewardType = TypeVar("RewardType", bound=Rewards) ================================================ FILE: sequoia/settings/sl/continual/results.py ================================================ from sequoia.common.metrics import MetricsType from sequoia.settings.assumptions.continual import ContinualResults class ContinualSLResults(ContinualResults[MetricsType]): pass ================================================ FILE: sequoia/settings/sl/continual/setting.py ================================================ import itertools from dataclasses import dataclass from pathlib import Path from typing import ClassVar, Dict, List, Optional, Type, TypeVar, Union import gym import numpy as np import torch from continuum.datasets import ( CIFAR10, CIFAR100, EMNIST, KMNIST, MNIST, QMNIST, CIFARFellowship, FashionMNIST, ImageNet100, ImageNet1000, MNISTFellowship, Synbols, _ContinuumDataset, ) from continuum.scenarios import ClassIncremental, _BaseScenario from continuum.tasks import TaskSet, concat, split_train_val from gym import spaces from simple_parsing import choice, field, list_field from torch import Tensor from torch.utils.data import ConcatDataset, Dataset, Subset import wandb from sequoia.common.config import Config from sequoia.common.gym_wrappers import RenderEnvWrapper, TransformObservation from sequoia.common.gym_wrappers.convert_tensors import add_tensor_support from sequoia.common.spaces import Sparse from sequoia.common.transforms import Compose, Transforms from sequoia.settings.assumptions.continual import ContinualAssumption from sequoia.settings.base import Method from sequoia.settings.sl.setting import SLSetting from sequoia.settings.sl.wrappers import MeasureSLPerformanceWrapper from sequoia.utils.generic_functions import concatenate from sequoia.utils.logging_utils import get_logger from sequoia.utils.utils import flag from .environment import ContinualSLEnvironment, ContinualSLTestEnvironment from .envs import ( CTRL_INSTALLED, CTRL_STREAMS, base_action_spaces, base_observation_spaces, base_reward_spaces, get_action_space, get_observation_space, get_reward_space, ) from .objects import Actions, ActionSpace, Observations, ObservationSpace, Rewards, RewardSpace from .results import ContinualSLResults from .wrappers import relabel logger = get_logger(__name__) EnvironmentType = TypeVar("EnvironmentType", bound=ContinualSLEnvironment) available_datasets = { c.__name__.lower(): c for c in [ CIFARFellowship, MNISTFellowship, ImageNet100, ImageNet1000, CIFAR10, CIFAR100, EMNIST, KMNIST, MNIST, QMNIST, FashionMNIST, Synbols, ] # "synbols": Synbols, # "synbols_font": partial(Synbols, task="fonts"), } if CTRL_INSTALLED: available_datasets.update(dict(zip(CTRL_STREAMS, CTRL_STREAMS))) @dataclass class ContinualSLSetting(SLSetting, ContinualAssumption): """Continuous, Task-Agnostic, Continual Supervised Learning. This is *currently* the most "general" Supervised Continual Learning setting in Sequoia. - Data distribution changes smoothly over time. - Smooth transitions between "tasks" - No information about task boundaries or task identity (no task IDs) - Maximum of one 'epoch' through the environment. """ # Class variables that hold the 'base' observation/action/reward spaces for the # available datasets. base_observation_spaces: ClassVar[Dict[str, gym.Space]] = base_observation_spaces base_action_spaces: ClassVar[Dict[str, gym.Space]] = base_action_spaces base_reward_spaces: ClassVar[Dict[str, gym.Space]] = base_reward_spaces # (NOTE: commenting out SLSetting.Observations as it is the same class # as Setting.Observations, and we want a consistent method resolution order. Observations: ClassVar[Type[Observations]] = Observations Actions: ClassVar[Type[Actions]] = Actions Rewards: ClassVar[Type[Rewards]] = Rewards ObservationSpace: ClassVar[Type[ObservationSpace]] = ObservationSpace Environment: ClassVar[Type[SLSetting.Environment]] = ContinualSLEnvironment[ Observations, Actions, Rewards ] Results: ClassVar[Type[ContinualSLResults]] = ContinualSLResults # Class variable holding a dict of the names and types of all available # datasets. # TODO: Issue #43: Support other datasets than just classification available_datasets: ClassVar[Dict[str, Type[_ContinuumDataset]]] = available_datasets # A continual dataset to use. (Should be taken from the continuum package). dataset: str = choice(available_datasets.keys(), default="mnist") # Transformations to use. See the Transforms enum for the available values. transforms: List[Transforms] = list_field( Transforms.to_tensor, # BUG: The input_shape given to the Model doesn't have the right number # of channels, even if we 'fixed' them here. However the images are fine # after. Transforms.three_channels, Transforms.channels_first_if_needed, ) # Either number of classes per task, or a list specifying for # every task the amount of new classes. increment: Union[int, List[int]] = list_field( 2, type=int, nargs="*", alias="n_classes_per_task" ) # The scenario number of tasks. # If zero, defaults to the number of classes divied by the increment. nb_tasks: int = 0 # A different task size applied only for the first task. # Desactivated if `increment` is a list. initial_increment: int = 0 # An optional custom class order, used for NC. class_order: Optional[List[int]] = None # Either number of classes per task, or a list specifying for # every task the amount of new classes (defaults to the value of # `increment`). test_increment: Optional[Union[List[int], int]] = None # A different task size applied only for the first test task. # Desactivated if `test_increment` is a list. Defaults to the # value of `initial_increment`. test_initial_increment: Optional[int] = None # An optional custom class order for testing, used for NC. # Defaults to the value of `class_order`. test_class_order: Optional[List[int]] = None # Wether task boundaries are smooth or not. smooth_task_boundaries: bool = flag(True) # Wether the context (task) variable is stationary or not. stationary_context: bool = flag(False) # Wether tasks share the same action space or not. # TODO: This will probably be moved into a different assumption. shared_action_space: Optional[bool] = None # TODO: Need to put num_workers in only one place. batch_size: int = field(default=32, cmd=False) num_workers: int = field(default=4, cmd=False) # When True, a Monitor-like wrapper will be applied to the training environment # and monitor the 'online' performance during training. Note that in SL, this will # also cause the Rewards (y) to be withheld until actions are passed to the `send` # method of the Environment. monitor_training_performance: bool = flag(False) train_datasets: List[Dataset] = field( default_factory=list, cmd=False, repr=False, to_dict=False ) val_datasets: List[Dataset] = field(default_factory=list, cmd=False, repr=False, to_dict=False) test_datasets: List[Dataset] = field(default_factory=list, cmd=False, repr=False, to_dict=False) def __post_init__(self): super().__post_init__() # assert not self.has_setup_fit # Test values default to the same as train. self.test_increment = self.test_increment or self.increment self.test_initial_increment = self.test_initial_increment or self.initial_increment self.test_class_order = self.test_class_order or self.class_order # TODO: For now we assume a fixed, equal number of classes per task, for # sake of simplicity. We could take out this assumption, but it might # make things a bit more complicated. if isinstance(self.increment, list) and len(self.increment) == 1: self.increment = self.increment[0] if isinstance(self.test_increment, list) and len(self.test_increment) == 1: self.test_increment = self.test_increment[0] assert isinstance(self.increment, int) assert isinstance(self.test_increment, int) # The 'scenarios' for train and test from continuum. (ClassIncremental for now). self.train_cl_loader: Optional[_BaseScenario] = None self.test_cl_loader: Optional[_BaseScenario] = None self.train_cl_dataset: Optional[_ContinuumDataset] = None self.test_cl_dataset: Optional[_ContinuumDataset] = None # This will be set by the Experiment, or passed to the `apply` method. # TODO: This could be a bit cleaner. self.config: Config # Default path to which the datasets will be downloaded. self.data_dir: Optional[Path] = None self.train_env: ContinualSLEnvironment = None # type: ignore self.val_env: ContinualSLEnvironment = None # type: ignore self.test_env: ContinualSLEnvironment = None # type: ignore # BUG: These `has_setup_fit`, `has_setup_test`, `has_prepared_data` properties # aren't working correctly: they get set before the call to the function has # been executed, making it impossible to check those values from inside those # functions. self._has_prepared_data = False self._has_setup_fit = False self._has_setup_test = False if CTRL_INSTALLED and self.dataset in CTRL_STREAMS: import ctrl from ctrl.tasks.task_generator import TaskGenerator from .envs import CTRL_NB_TASKS self.nb_tasks = self.nb_tasks or CTRL_NB_TASKS[self.dataset] if self.dataset == "s_long" and not self.nb_tasks: warnings.warn( RuntimeWarning( f"Limiting the scenario to 100 tasks for now when using 's_long' stream." ) ) self.nb_tasks = 100 task_generator: TaskGenerator = ctrl.get_stream(self.dataset, seed=42) # Get the train/val/test splits from the tasks. for task_dataset in itertools.islice(task_generator, self.nb_tasks): train_dataset = task_dataset.datasets[task_dataset.split_names.index("Train")] val_dataset = task_dataset.datasets[task_dataset.split_names.index("Val")] test_dataset = task_dataset.datasets[task_dataset.split_names.index("Test")] self.train_datasets.append(train_dataset) self.val_datasets.append(val_dataset) self.test_datasets.append(test_dataset) ## NOTE: Not sure this is a good idea, because we might easily mix the train/val ## and test splits between different runs! Actually, now that I think about it, ## I need to make sure that this isn't happening already with Avalanche! # if self.datasets: # if any(self.train_datasets, self.val_datasets, self.test_datasets): # raise RuntimeError( # f"When passing your own datasets to the setting, you have to pass " # f"either `datasets` or all three of `train_datasets`, " # f"`val_datasets` and `test_datasets`." # ) # self.train_datasets = [] # self.val_datasets = [] # self.test_datasets = [] # rng = np.random.default_rng(self.config.seed if self.config else 123) # for dataset in datasets: # n = len(dataset) # n_train_val = int(n * 0.8) # n_test = n - n_train_val # n_train = int(n_train_val * 0.8) # n_valid = n_train_val - n_train # train_val_dataset, test_dataset = random_split( # dataset, [n_train_val, n_test], generator=rng, # ) # train_dataset, val_dataset = random_split( # train_val_dataset, [n_train, n_valid], generator=rng, # ) # self.train_datasets.append(train_dataset) # self.val_datasets.append(val_dataset) # self.test_datasets.append(test_dataset) if any([self.train_datasets, self.val_datasets, self.test_datasets]): if not all([self.train_datasets, self.val_datasets, self.test_datasets]): raise RuntimeError( f"When passing your own datasets to the setting, you have to pass " f"`train_datasets`, `val_datasets` and `test_datasets`." ) self.nb_tasks = len(self.train_datasets) if not (len(self.val_datasets) == len(self.test_datasets) == self.nb_tasks): raise RuntimeError( f"When passing your own datasets to the setting, you need to pass " f"The same number of train/valid and test datasets for now." ) # FIXME: For now, setting `self.dataset` to None, because it has a default # of 'mnist'. Should probably make it a required argument instead. self.dataset = None # x_shape = self.train_datasets[0][0][0].shape # self.observation_space.x.shape = x_shape # assert False, (x_shape, self.observation_space) # Note: Using the same name as in the RL Setting for now, since that's where # this feature of passing the "envs" for each task was first added. self._using_custom_envs_foreach_task: bool = bool(self.train_datasets) # TODO: Remove this if self.dataset in self.base_action_spaces: if isinstance(self.action_space, spaces.Discrete): base_action_space = self.base_action_spaces[self.dataset] n_classes = base_action_space.n self.class_order = self.class_order or list(range(n_classes)) if self.nb_tasks: self.increment = n_classes // self.nb_tasks if not self.nb_tasks: base_action_space = self.base_action_spaces[self.dataset] if isinstance(base_action_space, spaces.Discrete): self.nb_tasks = base_action_space.n // self.increment assert self.nb_tasks != 0, self.nb_tasks def apply( self, method: Method["ContinualSLSetting"], config: Config = None ) -> ContinualSLResults: """Apply the given method on this setting to producing some results.""" # TODO: It still isn't super clear what should be in charge of creating # the config, and how to create it, when it isn't passed explicitly. self.config = config or self._setup_config(method) assert self.config is not None method.configure(setting=self) # Run the main loop (defined in ContinualAssumption). # Basically does the following: # 1. Call method.fit(train_env, valid_env) # 2. Test the method on test_env. # Return the results, as reported by the test environment. results: ContinualSLResults = super().main_loop(method) method.receive_results(self, results=results) return results def train_dataloader( self, batch_size: int = 32, num_workers: Optional[int] = 4 ) -> EnvironmentType: if not self.has_prepared_data: self.prepare_data() if not self.has_setup_fit: self.setup("fit") if self.train_env: self.train_env.close() batch_size = batch_size if batch_size is not None else self.batch_size num_workers = num_workers if num_workers is not None else self.num_workers # NOTE: ATM the dataset here doesn't have any transforms. We add the transforms after the # dataloader below using the TransformObservations wrapper. This isn't ideal. dataset = self._make_train_dataset() # TODO: Add some kind of Wrapper around the dataset to make it # semi-supervised? env = self.Environment( dataset, hide_task_labels=(not self.task_labels_at_train_time), observation_space=self.observation_space, action_space=self.action_space, reward_space=self.reward_space, Observations=self.Observations, Actions=self.Actions, Rewards=self.Rewards, pin_memory=True, batch_size=batch_size, num_workers=num_workers, drop_last=self.drop_last, shuffle=False, one_epoch_only=(not self.known_task_boundaries_at_train_time), ) if self.config.render: # Add a wrapper that calls 'env.render' at each step? env = RenderEnvWrapper(env) train_transforms = Compose(self.transforms + self.train_transforms) if train_transforms: env = TransformObservation(env, f=train_transforms) if self.config.device: # TODO: Put this before or after the image transforms? from sequoia.common.gym_wrappers.convert_tensors import ConvertToFromTensors env = ConvertToFromTensors(env, device=self.config.device) # env = TransformObservation(env, f=partial(move, device=self.config.device)) # env = TransformReward(env, f=partial(move, device=self.config.device)) if self.monitor_training_performance: env = MeasureSLPerformanceWrapper( env, first_epoch_only=True, wandb_prefix=f"Train/", ) # NOTE: Quickfix for the 'dtype' of the TypedDictSpace perhaps getting lost # when transforms don't propagate the 'dtype' field. env.observation_space.dtype = self.Observations self.train_env = env return self.train_env def val_dataloader( self, batch_size: int = 32, num_workers: Optional[int] = 4 ) -> EnvironmentType: if not self.has_prepared_data: self.prepare_data() if not self.has_setup_validate: self.setup("validate") if self.val_env: self.val_env.close() batch_size = batch_size if batch_size is not None else self.batch_size num_workers = num_workers if num_workers is not None else self.num_workers dataset = self._make_val_dataset() # TODO: Add some kind of Wrapper around the dataset to make it # semi-supervised? # TODO: Change the reward and action spaces to also use objects. env = self.Environment( dataset, hide_task_labels=(not self.task_labels_at_train_time), observation_space=self.observation_space, action_space=self.action_space, reward_space=self.reward_space, Observations=self.Observations, Actions=self.Actions, Rewards=self.Rewards, pin_memory=True, drop_last=self.drop_last, batch_size=batch_size, num_workers=num_workers, one_epoch_only=(not self.known_task_boundaries_at_train_time), ) # TODO: If wandb is enabled, then add customized Monitor wrapper (with # IterableWrapper as an additional subclass). There would then be a lot of # overlap between such a Monitor and the current TestEnvironment. if self.config.render: # Add a wrapper that calls 'env.render' at each step? env = RenderEnvWrapper(env) # NOTE: The transforms from `self.transforms` (the 'base' transforms) were # already added when creating the datasets and the CL scenario. val_transforms = self.transforms + self.val_transforms if val_transforms: env = TransformObservation(env, f=val_transforms) if self.config.device: # TODO: Put this before or after the image transforms? from sequoia.common.gym_wrappers.convert_tensors import ConvertToFromTensors env = ConvertToFromTensors(env, device=self.config.device) # env = TransformObservation(env, f=partial(move, device=self.config.device)) # env = TransformReward(env, f=partial(move, device=self.config.device)) # NOTE: We don't measure online performance on the validation set. # if self.monitor_training_performance: # env = MeasureSLPerformanceWrapper( # env, # first_epoch_only=True, # wandb_prefix=f"Train/Task {self.current_task_id}", # ) # NOTE: Quickfix for the 'dtype' of the TypedDictSpace perhaps getting lost # when transforms don't propagate the 'dtype' field. env.observation_space.dtype = self.Observations self.val_env = env return self.val_env def test_dataloader( self, batch_size: int = None, num_workers: int = None ) -> ContinualSLEnvironment[Observations, Actions, Rewards]: """Returns a Continual SL Test environment.""" if not self.has_prepared_data: self.prepare_data() if not self.has_setup_test: self.setup("test") batch_size = batch_size if batch_size is not None else self.batch_size num_workers = num_workers if num_workers is not None else self.num_workers dataset = self._make_test_dataset() env = self.Environment( dataset, batch_size=batch_size, num_workers=num_workers, hide_task_labels=(not self.task_labels_at_test_time), observation_space=self.observation_space, action_space=self.action_space, reward_space=self.reward_space, Observations=self.Observations, Actions=self.Actions, Rewards=self.Rewards, pretend_to_be_active=True, drop_last=self.drop_last, shuffle=False, one_epoch_only=True, ) # NOTE: The transforms from `self.transforms` (the 'base' transforms) were # already added when creating the datasets and the CL scenario. test_transforms = self.transforms + self.test_transforms if test_transforms: env = TransformObservation(env, f=test_transforms) if self.config.device: # TODO: Put this before or after the image transforms? from sequoia.common.gym_wrappers.convert_tensors import ConvertToFromTensors env = ConvertToFromTensors(env, device=self.config.device) # env = TransformObservation(env, f=partial(move, device=self.config.device)) # env = TransformReward(env, f=partial(move, device=self.config.device)) # FIXME: Instead of trying to create a 'fake' task schedule for the test # environment, instead let the test environment see the task ids, (and then hide # them if necessary) so that it can compile the stats for each task based on the # task IDs of the observations. # TODO: Configure the 'monitoring' dir properly. if wandb.run: test_dir = wandb.run.dir else: test_dir = self.config.log_dir test_loop_max_steps = len(dataset) // (env.batch_size or 1) test_env = ContinualSLTestEnvironment( env, directory=test_dir, step_limit=test_loop_max_steps, force=True, config=self.config, video_callable=None if (wandb.run or self.config.render) else False, ) # NOTE: Quickfix for the 'dtype' of the TypedDictSpace perhaps getting lost # when transforms don't propagate the 'dtype' field. env.observation_space.dtype = self.Observations if self.test_env: self.test_env.close() self.test_env = test_env return self.test_env def prepare_data(self, data_dir: Path = None) -> None: # TODO: Pass the transformations to the CL scenario, or to the dataset? if data_dir is None: if self.config: data_dir = self.config.data_dir else: data_dir = Path("data") logger.info(f"Downloading datasets to directory {data_dir}") self._using_custom_envs_foreach_task = bool(self.train_datasets) if not self._using_custom_envs_foreach_task: self.train_cl_dataset = self.make_dataset(data_dir, download=True, train=True) self.test_cl_dataset = self.make_dataset(data_dir, download=True, train=False) return super().prepare_data() def setup(self, stage: str = None): if not self.has_prepared_data: self.prepare_data() super().setup(stage=stage) if stage not in (None, "fit", "test", "validate"): raise RuntimeError(f"`stage` should be 'fit', 'test', 'validate' or None.") if stage in (None, "fit", "validate"): if not self._using_custom_envs_foreach_task: self.train_cl_dataset = self.train_cl_dataset or self.make_dataset( self.config.data_dir, download=False, train=True ) nb_tasks_kwarg = {} if self.nb_tasks is not None: nb_tasks_kwarg.update(nb_tasks=self.nb_tasks) else: nb_tasks_kwarg.update(increment=self.increment) if not self._using_custom_envs_foreach_task: self.train_cl_loader = self.train_cl_loader or ClassIncremental( cl_dataset=self.train_cl_dataset, **nb_tasks_kwarg, initial_increment=self.initial_increment, transformations=[], # NOTE: Changing this: The transforms will get added after. class_order=self.class_order, ) if not self.train_datasets and not self.val_datasets: for task_id, train_taskset in enumerate(self.train_cl_loader): train_taskset, valid_taskset = split_train_val(train_taskset, val_split=0.1) self.train_datasets.append(train_taskset) self.val_datasets.append(valid_taskset) # IDEA: We could do the remapping here instead of adding a wrapper later. if self.shared_action_space and isinstance(self.action_space, spaces.Discrete): # If we have a shared output space, then they are all mapped to [0, n_per_task] self.train_datasets = list(map(relabel, self.train_datasets)) self.val_datasets = list(map(relabel, self.val_datasets)) if stage in (None, "test"): if not self._using_custom_envs_foreach_task: self.test_cl_dataset = self.test_cl_dataset or self.make_dataset( self.config.data_dir, download=False, train=False ) self.test_class_order = self.test_class_order or self.class_order self.test_cl_loader = self.test_cl_loader or ClassIncremental( cl_dataset=self.test_cl_dataset, nb_tasks=self.nb_tasks, increment=self.test_increment, initial_increment=self.test_initial_increment, transformations=[], # note: not passing transforms here, they get added later class_order=self.test_class_order, ) if not self.test_datasets: # TODO: If we decide to 'shuffle' the test tasks, then store the sequence of # task ids in a new property, probably here. # self.test_task_order = list(range(len(self.test_datasets))) self.test_datasets = list(self.test_cl_loader) # IDEA: We could do the remapping here instead of adding a wrapper later. if self.shared_action_space and isinstance(self.action_space, spaces.Discrete): # If we have a shared output space, then they are all mapped to [0, n_per_task] self.test_datasets = list(map(relabel, self.test_datasets)) def _make_train_dataset(self) -> Union[TaskSet, Dataset]: # NOTE: Passing the same seed to `train`/`valid`/`test` is fine, because it's # only used for the shuffling used to make the task boundaries smooth. if self.smooth_task_boundaries: return smooth_task_boundaries_concat( self.train_datasets, seed=self.config.seed if self.config else None ) if self.stationary_context: joined_dataset = concat(self.train_datasets) return shuffle(joined_dataset, seed=self.config.seed) if self.known_task_boundaries_at_train_time: return self.train_datasets[self.current_task_id] else: return concatenate(self.train_datasets) def _make_val_dataset(self) -> Dataset: if self.smooth_task_boundaries: return smooth_task_boundaries_concat(self.val_datasets, seed=self.config.seed) if self.stationary_context: joined_dataset = concat(self.val_datasets) return shuffle(joined_dataset, seed=self.config.seed) if self.known_task_boundaries_at_train_time: return self.val_datasets[self.current_task_id] return concatenate(self.val_datasets) def _make_test_dataset(self) -> Dataset: if self.smooth_task_boundaries: return smooth_task_boundaries_concat(self.test_datasets, seed=self.config.seed) else: return concatenate(self.test_datasets) def make_dataset( self, data_dir: Path, download: bool = True, train: bool = True, **kwargs ) -> _ContinuumDataset: # TODO: #7 Use this method here to fix the errors that happen when # trying to create every single dataset from continuum. data_dir = Path(data_dir) if not data_dir.exists(): data_dir.mkdir(parents=True, exist_ok=True) if self.dataset in self.available_datasets: dataset_class = self.available_datasets[self.dataset] return dataset_class(data_path=data_dir, download=download, train=train, **kwargs) elif self.dataset in self.available_datasets.values(): dataset_class = self.dataset return dataset_class(data_path=data_dir, download=download, train=train, **kwargs) elif isinstance(self.dataset, Dataset): logger.info(f"Using a custom dataset {self.dataset}") return self.dataset else: raise NotImplementedError(self.dataset) @property def observation_space(self) -> ObservationSpace[Observations]: """The un-batched observation space, based on the choice of dataset and the transforms at `self.transforms` (which apply to the train/valid/test environments). The returned space is a TypedDictSpace, with the following properties: - `x`: observation space (e.g. `Image` space) - `task_labels`: Union[Discrete, Sparse[Discrete]] The task labels for each sample. When task labels are not available, the task labels space is Sparse, and entries will be `None`. """ # TODO: Need to clean this up a bit: if self._using_custom_envs_foreach_task: x_space = get_observation_space(self.train_datasets[0]) else: x_space = get_observation_space(self.dataset) if not self.transforms: # NOTE: When we don't pass any transforms, continuum scenarios still # at least use 'to_tensor'. x_space = Transforms.to_tensor(x_space) # apply the transforms to the observation space. for transform in self.transforms: x_space = transform(x_space) x_space = add_tensor_support(x_space) task_label_space = spaces.Discrete(self.nb_tasks) if not self.task_labels_at_train_time: task_label_space = Sparse(task_label_space, 1.0) task_label_space = add_tensor_support(task_label_space) self._observation_space = self.ObservationSpace( x=x_space, task_labels=task_label_space, dtype=self.Observations, ) return self._observation_space # TODO: Add a `train_observation_space`, `train_action_space`, `train_reward_space`? @property def action_space(self) -> spaces.Discrete: """Action space for this setting.""" if self._action_space: return self._action_space # Determine the action space using the right dataset. # (NOTE: same across train/val/test for now.) dataset = self.dataset if self._using_custom_envs_foreach_task: dataset = self.train_datasets[0] action_space = get_action_space(dataset) # TODO: Remove this if isinstance(action_space, spaces.Discrete) and self.dataset in self.base_action_spaces: if self.shared_action_space: assert isinstance(self.increment, int), ( "Need to have same number of classes in each task when " "`shared_action_space` is true." ) action_space = spaces.Discrete(self.increment) self._action_space = action_space return self._action_space # TODO: IDEA: Have the action space only reflect the number of 'current' classes # in order to create a "true" class-incremental learning setting. # n_classes_seen_so_far = 0 # for task_id in range(self.current_task_id): # n_classes_seen_so_far += self.num_classes_in_task(task_id) # return spaces.Discrete(n_classes_seen_so_far) @property def reward_space(self) -> spaces.Discrete: if self._reward_space: return self._reward_space # Determine the reward space using the right dataset. # (NOTE: same across train/val/test for now.) dataset = self.dataset if self._using_custom_envs_foreach_task: dataset = self.train_datasets reward_space = get_reward_space(dataset) # TODO: Remove this if isinstance(reward_space, spaces.Discrete) and self.dataset in self.base_reward_spaces: if self.shared_action_space: assert isinstance(self.increment, int), ( "Need to have same number of classes in each task when " "`shared_action_space` is true." ) reward_space = spaces.Discrete(self.increment) self._reward_space = reward_space return self._reward_space def smooth_task_boundaries_concat( datasets: List[Dataset], seed: int = None, window_length: float = 0.03 ) -> ConcatDataset: """TODO: Use a smarter way of mixing from one to the other?""" lengths = [len(dataset) for dataset in datasets] total_length = sum(lengths) n_tasks = len(datasets) if not isinstance(window_length, int): window_length = int(total_length * window_length) assert ( window_length > 1 ), f"Window length should be positive or a fraction of the dataset length. ({window_length})" rng = np.random.default_rng(seed) def option1(): shuffled_indices = np.arange(total_length) for start_index in range(0, total_length - window_length + 1, window_length // 2): rng.shuffle(shuffled_indices[start_index : start_index + window_length]) return shuffled_indices # Maybe do the same but backwards? # IDEA #2: Sample based on how close to the 'center' of the task we are. def option2(): boundaries = np.array(list(itertools.accumulate(lengths, initial=0))) middles = [(start + end) / 2 for start, end in zip(boundaries[0:], boundaries[1:])] samples_left: Dict[int, int] = {i: length for i, length in enumerate(lengths)} indices_left: Dict[int, List[int]] = { i: list(range(boundaries[i], boundaries[i] + length)) for i, length in enumerate(lengths) } out_indices: List[int] = [] last_dataset_index = n_tasks - 1 for step in range(total_length): if step < middles[0] and samples_left[0]: # Prevent sampling things from task 1 at the beginning of task 0, and eligible_dataset_ids = [0] elif step > middles[-1] and samples_left[last_dataset_index]: # Prevent sampling things from task N-1 at the emd of task N eligible_dataset_ids = [last_dataset_index] else: # 'smooth', but at the boundaries there are actually two or three datasets, # from future tasks even! eligible_dataset_ids = list(k for k, v in samples_left.items() if v > 0) # if len(eligible_dataset_ids) > 2: # # Prevent sampling from future tasks (past the next task) when at a # # boundary. # left_dataset_index = min(eligible_dataset_ids) # right_dataset_index = min( # v for v in eligible_dataset_ids if v > left_dataset_index # ) # eligible_dataset_ids = [left_dataset_index, right_dataset_index] options = np.array(eligible_dataset_ids, dtype=int) # Calculate the 'distance' to the center of the task's dataset. distances = np.abs([step - middles[dataset_index] for dataset_index in options]) # NOTE: THis exponent is kindof arbitrary, setting it to this value because it # sortof works for MNIST so far. probs = 1 / (1 + np.abs(distances) ** 2) probs /= sum(probs) chosen_dataset = rng.choice(options, p=probs) chosen_index = indices_left[chosen_dataset].pop() samples_left[chosen_dataset] -= 1 out_indices.append(chosen_index) shuffled_indices = np.array(out_indices) return shuffled_indices def option3(): shuffled_indices = np.arange(total_length) for start_index in range(0, total_length - window_length + 1, window_length // 2): rng.shuffle(shuffled_indices[start_index : start_index + window_length]) for start_index in reversed(range(0, total_length - window_length + 1, window_length // 2)): rng.shuffle(shuffled_indices[start_index : start_index + window_length]) return shuffled_indices shuffled_indices = option3() if all(isinstance(dataset, TaskSet) for dataset in datasets): # Use the 'concat' from continuum, just to preserve the field/methods of a # TaskSet. joined_taskset = concat(datasets) return subset(joined_taskset, shuffled_indices) else: joined_dataset = ConcatDataset(datasets) return Subset(joined_dataset, shuffled_indices) return shuffled_indices from functools import singledispatch from typing import Sequence, overload from .wrappers import replace_taskset_attributes DatasetType = TypeVar("DatasetType", bound=Dataset) @overload def subset(dataset: TaskSet, indices: Sequence[int]) -> TaskSet: ... @singledispatch def subset(dataset: DatasetType, indices: Sequence[int]) -> Union[Subset, DatasetType]: raise NotImplementedError(f"Don't know how to take a subset of dataset {dataset}") return Subset(dataset, indices) @subset.register def taskset_subset(taskset: TaskSet, indices: np.ndarray) -> TaskSet: # x, y, t = taskset.get_raw_samples(indices) x, y, t = taskset.get_raw_samples(indices) # TODO: Not sure if/how to handle the `bounding_boxes` attribute here. bounding_boxes = taskset.bounding_boxes if bounding_boxes is not None: bounding_boxes = bounding_boxes[indices] return replace_taskset_attributes(taskset, x=x, y=y, t=t, bounding_boxes=bounding_boxes) def random_subset( taskset: TaskSet, n_samples: int, seed: int = None, ordered: bool = True ) -> TaskSet: """Returns a random (ordered) subset of the given TaskSet.""" rng = np.random.default_rng(seed) dataset_length = len(taskset) if n_samples > dataset_length: raise RuntimeError(f"Dataset has {dataset_length}, asked for {n_samples} samples.") indices = rng.permutation(range(dataset_length))[:n_samples] # indices = rng.choice(len(taskset), size=n_samples, replace=False) if ordered: indices = sorted(indices) assert len(indices) == n_samples return subset(taskset, indices) DatasetType = TypeVar("DatasetType", bound=Dataset) def shuffle(dataset: DatasetType, seed: int = None) -> DatasetType: length = len(dataset) rng = np.random.default_rng(seed) perm = rng.permutation(range(length)) return subset(dataset, perm) import torch from torch import Tensor def smart_class_prediction( logits: Tensor, task_labels: Tensor, setting: SLSetting, train: bool ) -> Tensor: """Predicts classes which are available, given the task labels.""" unique_task_ids = set(task_labels.unique().cpu().tolist()) classes_in_each_task = { task_id: setting.task_classes(task_id, train=train) for task_id in unique_task_ids } y_pred = limit_to_available_classes(logits, task_labels, classes_in_each_task) return y_pred def limit_to_available_classes( logits: Tensor, task_labels: Tensor, classes_in_each_present_task: Dict[int, List[int]] ) -> Tensor: B = logits.shape[0] C = logits.shape[-1] assert logits.shape[0] == task_labels.shape[0] == B y_preds = [] indices = torch.arange(C, dtype=torch.long, device=logits.device) elligible_masks = { task_id: sum( [indices == label for label in labels], start=torch.zeros([C], dtype=bool, device=logits.device), ) for task_id, labels in classes_in_each_present_task.items() } y_preds = [] # TODO: Also return the logits, so we can get a loss for the selected indices? # logits = [] for logit, task_label in zip(logits, task_labels): t = task_label.item() eligible_classes_list = classes_in_each_present_task[t] eligible_classes = torch.as_tensor(eligible_classes_list, dtype=int, device=logits.device) is_eligible = elligible_masks[t] if not is_eligible.any(): # Return a random prediction from the set of possible classes, since # the network has fewer outputs than there are classes. # NOTE: This can occur for instance when testing on future tasks # when using a MultiTask module. y_pred = eligible_classes[torch.randint(len(eligible_classes), (1,))] else: masked_logit = logit[is_eligible] y_pred_without_offset = masked_logit.argmax(-1) y_pred = eligible_classes[y_pred_without_offset] assert y_pred.item() in eligible_classes_list y_preds.append(y_pred.reshape(())) # Just to make sure they all have the same shape. return torch.stack(y_preds) from sequoia.common.transforms.channels import has_channels_last, has_channels_first @has_channels_last.register(ContinualSLSetting.Observations) def _has_channels_last(obs: ContinualSLSetting.Observations) -> bool: return has_channels_last(obs.x) ================================================ FILE: sequoia/settings/sl/continual/setting_test.py ================================================ import functools from collections import Counter from pathlib import Path from typing import Any, ClassVar, Dict, Tuple, Type import gym import pytest import torch from sklearn.datasets import make_classification from torch.utils.data import TensorDataset, random_split from sequoia.common.config import Config from sequoia.methods import RandomBaselineMethod from sequoia.settings.base.setting_test import SettingTests from sequoia.settings.sl.continual.setting import shuffle from .setting import ContinualSLSetting, random_subset, smooth_task_boundaries_concat from .wrappers import ShowLabelDistributionWrapper def test_continuum_shuffle(config: Config): from continuum.datasets import MNIST from continuum.scenarios import ClassIncremental from continuum.tasks import concat dataset = MNIST(data_path=config.data_dir, train=True) cl_dataset = concat(ClassIncremental(dataset, increment=2)) shuffled_dataset = shuffle(cl_dataset) assert (shuffled_dataset._y != cl_dataset._y).sum() > len(cl_dataset) / 2 assert (shuffled_dataset._t != cl_dataset._t).sum() > len(cl_dataset) / 2 class TestContinualSLSetting(SettingTests): Setting: ClassVar[Type[Setting]] = ContinualSLSetting # The kwargs to be passed to the Setting when we want to create a 'short' setting. # TODO: Transform this into a fixture instead. fast_dev_run_kwargs: ClassVar[Dict[str, Any]] = dict( dataset="mnist", batch_size=64, ) @pytest.fixture(scope="session") def short_setting(self, session_config): kwargs = self.fast_dev_run_kwargs.copy() kwargs["config"] = session_config setting = self.Setting(**kwargs) setting.config = session_config setting.prepare_data() setting.setup() # Testing this out: Shortening the train datasets: setting.train_datasets = [ random_subset(task_dataset, 100) for task_dataset in setting.train_datasets ] setting.val_datasets = [ random_subset(task_dataset, 100) for task_dataset in setting.val_datasets ] setting.test_datasets = [ random_subset(task_dataset, 100) for task_dataset in setting.test_datasets ] assert len(setting.train_datasets) == 5 assert len(setting.val_datasets) == 5 assert len(setting.test_datasets) == 5 assert all(len(dataset) == 100 for dataset in setting.train_datasets) assert all(len(dataset) == 100 for dataset in setting.val_datasets) assert all(len(dataset) == 100 for dataset in setting.test_datasets) # Assert that calling setup doesn't overwrite the datasets. setting.setup() assert len(setting.train_datasets) == 5 assert len(setting.val_datasets) == 5 assert len(setting.test_datasets) == 5 assert all(len(dataset) == 100 for dataset in setting.train_datasets) assert all(len(dataset) == 100 for dataset in setting.val_datasets) assert all(len(dataset) == 100 for dataset in setting.test_datasets) return setting def test_shared_action_space(self, config: Config): kwargs = self.fast_dev_run_kwargs.copy() kwargs["config"] = config if ( isinstance(self.Setting, functools.partial) and not self.Setting.args[0].shared_action_space ): # NOTE: This `self.Setting` being a partial instead of a Setting class only # happens in the tests for the SettingProxy. kwargs.update(shared_action_space=True) elif not self.Setting.shared_action_space: kwargs.update(shared_action_space=True) setting = self.Setting(**kwargs) y_counter = Counter() t_counter = Counter() test_env = setting.test_dataloader() for obs, rewards in test_env: if rewards is None: action = test_env.action_space.sample() # NOTE: On the last batch, the rewards might have a smaller batch size # than the action space. # TODO: Add tests to check that the envs can explicitly handle this, so # that we don't give the burden to the Method. rewards = test_env.send(action) y = rewards.y.tolist() t = ( obs.task_labels.tolist() if obs.task_labels is not None else [None for _ in range(obs.x.shape[0])] ) y_counter.update(y) t_counter.update(t) # This is what you get with mnist, with the default class ordering: # if setting.known_task_boundaries_at_train_time: # # Only the first task of mnist, in this case. # assert y_counter == {1: 6065, 0: 5534} assert y_counter == {0: 4926, 1: 5074} if setting.task_labels_at_test_time: assert t_counter == {0: 2115, 1: 2042, 3: 1986, 4: 1983, 2: 1874} else: assert t_counter == {None: 10_000} # assert t_counter # Full Train envs: # assert y_counter == {1: 27456, 0: 26546} # assert False, c def test_only_one_epoch(self, short_setting): setting = short_setting train_env = setting.train_dataloader() for _ in train_env: pass if not setting.known_task_boundaries_at_train_time: assert train_env.is_closed() with pytest.raises(gym.error.ClosedEnvironmentError): for _ in train_env: pass else: assert not train_env.is_closed() @pytest.mark.no_xvfb @pytest.mark.timeout(20) @pytest.mark.skipif( not Path("temp").exists(), reason="Need temp dir for saving the figure this test creates.", ) def test_show_distributions(self, config: Config): setting = self.Setting(dataset="mnist", config=config) figures_dir = Path("temp") # fig, axes = plt.subplots(2, 3) name_to_env_fn = { "train": setting.train_dataloader, "valid": setting.val_dataloader, "test": setting.test_dataloader, } # TODO: Maybe add these plots as part of the results for ContinualSL? How much # memory would actually be needed to store these here? for i, (name, env_fn) in enumerate(name_to_env_fn.items()): env = env_fn(batch_size=100, num_workers=4) env = ShowLabelDistributionWrapper(env, env_name=name) # Iterate through the env. for obs, rewards in env: if rewards is None: rewards = env.send(env.action_space.sample()) fig = env.make_figure() fig.set_size_inches((6, 4), forward=False) save_path = Path(f"{figures_dir}/{setting.get_name()}_{name}.png") save_path.parent.mkdir(exist_ok=True) fig.savefig(save_path) # plt.waitforbuttonpress(10) # plt.show() def test_passing_datasets_to_setting(self, config: Config): image_shape = (16, 16, 3) n_classes = 10 datasets = [ create_image_classification_dataset( image_shape=image_shape, n_classes=2, y_offset=i * 2 ) for i in range(5) ] train_datasets = [] val_datasets = [] test_datasets = [] for dataset in datasets: n = len(dataset) n_train_val = int(n * 0.8) n_test = n - n_train_val n_train = int(n_train_val * 0.8) n_valid = n_train_val - n_train train_val_dataset, test_dataset = random_split(dataset, [n_train_val, n_test]) train_dataset, val_dataset = random_split(train_val_dataset, [n_train, n_valid]) train_datasets.append(train_dataset) val_datasets.append(val_dataset) test_datasets.append(test_dataset) setting = self.Setting( train_datasets=train_datasets, val_datasets=val_datasets, test_datasets=test_datasets, transforms=[], # train_transforms=[], # val_transforms=[], # test_transforms=[] ) assert setting.train_datasets is train_datasets assert setting.val_datasets is val_datasets assert setting.test_datasets is test_datasets assert setting.nb_tasks == len(setting.train_datasets) assert setting.observation_space.x.shape == image_shape assert setting.reward_space.n == n_classes from sequoia.conftest import skip_param from .envs import CTRL_INSTALLED, CTRL_STREAMS @pytest.mark.skipif(not CTRL_INSTALLED, reason="Need ctrl-benchmark for this test.") @pytest.mark.parametrize( "stream", [ "s_plus", "s_minus", "s_in", "s_out", "s_pl", skip_param("s_long", reason="Very long"), ], ) def test_ctrl_stream_support(self, stream: str, config: Config): setting_kwargs = self.fast_dev_run_kwargs.copy() setting_kwargs["dataset"] = stream setting = self.Setting(**setting_kwargs) method = RandomBaselineMethod() results = setting.apply(method, config=config) self.assert_chance_level(setting, results=results) def create_image_classification_dataset( image_shape: Tuple[int, ...], n_classes: int, n_samples_per_class: int = 100, y_offset: int = 0, ): """Copied and Adapted from https://github.com/ContinualAI/avalanche/blob/master/tests/unit_tests_utils.py """ # n_classes = 10 # image_shape = (16, 16, 3) # n_samples_per_class = 100 n_features = np.prod(image_shape) dataset = make_classification( n_samples=n_classes * n_samples_per_class, n_classes=n_classes, n_features=n_features, n_informative=n_features, n_redundant=0, ) x = torch.from_numpy(dataset[0]).reshape([-1, *image_shape]).float() y = torch.from_numpy(dataset[1]).long() # y_offset can be used to get [2,3] rather than [0,1] for instance. if y_offset: y += y_offset return TensorDataset(x, y) # train_X, test_X, train_y, test_y = train_test_split( # X, y, train_size=0.6, shuffle=True, stratify=y) # train_dataset = TensorDataset(train_X, train_y) # test_dataset = TensorDataset(test_X, test_y) # return my_nc_benchmark from typing import List, Tuple import numpy as np import pytest from torch.utils.data import DataLoader @pytest.mark.timeout(30) @pytest.mark.no_xvfb def test_concat_smooth_boundaries(config: Config): from continuum.datasets import MNIST from continuum.scenarios import ClassIncremental from continuum.tasks import split_train_val dataset = MNIST(config.data_dir, download=True, train=True) scenario = ClassIncremental( dataset, increment=2, ) print(f"Number of classes: {scenario.nb_classes}.") print(f"Number of tasks: {scenario.nb_tasks}.") train_datasets = [] valid_datasets = [] for task_id, train_taskset in enumerate(scenario): train_taskset, val_taskset = split_train_val(train_taskset, val_split=0.1) train_datasets.append(train_taskset) valid_datasets.append(val_taskset) # train_datasets = [Subset(task_dataset, np.arange(20)) for task_dataset in train_datasets] train_dataset = smooth_task_boundaries_concat(train_datasets, seed=123) xs = np.arange(len(train_dataset)) y_counters: List[Counter] = [] t_counters: List[Counter] = [] dataloader = DataLoader(train_dataset, batch_size=100, shuffle=False) for x, y, t in dataloader: y_count = Counter(y.tolist()) t_count = Counter(t.tolist()) y_counters.append(y_count) t_counters.append(t_count) classes = list(set().union(*y_counters)) nb_classes = len(classes) x = np.arange(len(dataloader)) import matplotlib.pyplot as plt fig, axes = plt.subplots(2) for label in range(nb_classes): y = [y_counter.get(label) for y_counter in y_counters] axes[0].plot(x, y, label=f"class {label}") axes[0].legend() axes[0].set_title("y") axes[0].set_xlabel("Batch index") axes[0].set_ylabel("Count in batch") for task_id in range(scenario.nb_tasks): y = [t_counter.get(task_id) for t_counter in t_counters] axes[1].plot(x, y, label=f"Task id {task_id}") axes[1].legend() axes[1].set_title("task_id") axes[1].set_xlabel("Batch index") axes[1].set_ylabel("Count in batch") plt.legend() # plt.waitforbuttonpress(10) # plt.show() ================================================ FILE: sequoia/settings/sl/continual/wrappers.py ================================================ from functools import partial, singledispatch from itertools import accumulate from typing import Any, Dict, List import gym import matplotlib.pyplot as plt import numpy as np import torch from continuum import TaskSet from torch import Tensor from sequoia.common.gym_wrappers import IterableWrapper @singledispatch def relabel(data: Any, mapping: Dict[int, int] = None) -> Any: """Relabels the given data (from a task) so they all share the same action space.""" raise NotImplementedError(f"Don't know how to relabel {data} of type {type(data)}") @relabel.register def relabel_ndarray(y: np.ndarray, mapping: Dict[int, int] = None) -> np.ndarray: new_y = y.copy() mapping = mapping or {c: i for i, c in enumerate(np.unique(y))} for old_label, new_label in mapping.items(): new_y[y == old_label] = new_label return new_y @relabel.register def relabel_tensor(y: Tensor, mapping: Dict[int, int] = None) -> Tensor: new_y = y.copy() mapping = mapping or {c: i for i, c in enumerate(torch.unique(y))} for old_label, new_label in mapping.items(): new_y[y == old_label] = new_label return new_y @relabel.register def relabel_taskset(task_set: TaskSet, mapping: Dict[int, int] = None) -> TaskSet: mapping = mapping or {c: i for i, c in enumerate(task_set.get_classes())} old_y = task_set._y new_y = relabel(old_y, mapping=mapping) assert not task_set.target_trsf # TODO: Two options here: Either create a new 'y' array, OR add a target_trsf that # does the remapping. Not sure if there's a benefit in doing one vs the other atm. # NOTE: Choosing to replace the `y` to make sure that the concatenated datasets keep # the transformed y. new_taskset = replace_taskset_attributes(task_set, y=new_y) return new_taskset from sequoia.utils.generic_functions.replace import replace @replace.register def replace_taskset_attributes(task_set: TaskSet, **kwargs) -> TaskSet: new_kwargs = dict( x=task_set._x, y=task_set._y, t=task_set._t, trsf=task_set.trsf, target_trsf=task_set.target_trsf, data_type=task_set.data_type, bounding_boxes=task_set.bounding_boxes, ) new_kwargs.update(kwargs) return type(task_set)(**new_kwargs) class SharedActionSpaceWrapper(IterableWrapper): # """ Wrapper that gets applied to a ContinualSLEnvironment def __init__(self, env: gym.Env, task_classes: List[int]): self.task_classes = task_classes super().__init__(env=env, f=partial(relabel, task_classes=self.task_classes)) from collections import Counter from .environment import ContinualSLEnvironment from .objects import ObservationType, RewardType class ShowLabelDistributionWrapper(IterableWrapper[ContinualSLEnvironment]): """Wrapper around a SL environment that shows the distribution of the labels. Shows the distributions of the task labels, if applicable. """ def __init__(self, env: ContinualSLEnvironment, env_name: str): super().__init__(env=env) self.env_name = env_name # IDEA: Could use bins for continuous values ? # IDEA: Also use a counter for the actions? self.counters: Dict[str, List[Counter]] = { "y": [], "t": [], } def observation(self, observation: ObservationType) -> ObservationType: t = observation.task_labels if t is None: t = [None] * observation.batch_size if isinstance(t, Tensor): t = t.cpu().numpy() t_count = Counter(t) self.counters["t"].append(t_count) return observation def reward(self, reward: RewardType) -> RewardType: y = reward.y.cpu().numpy() y_count = Counter(y) self.counters["y"].append(y_count) return reward def make_figure(self) -> plt.Figure: fig: plt.Figure axes: List[plt.Axes] fig, axes = plt.subplots(len(self.counters)) # total_length: int = sum(sum(counter.values()) for counter in self.y_counters) for i, (name, counters) in enumerate(self.counters.items()): # Values for the x axis are the number of samples seen so far for each # batch. x = list(accumulate(sum(counter.values()) for counter in counters)) unique_values = list(sorted(set().union(*counters))) for label in unique_values: y = [counter.get(label) for counter in counters] axes[i].plot(x, y, label=f"{name}={label}") axes[i].legend() axes[i].set_title(f"{self.env_name} {name}") axes[i].set_xlabel("Batch index") axes[i].set_ylabel("Count in batch") fig.set_size_inches((6, 4), forward=False) fig.legend() return fig ================================================ FILE: sequoia/settings/sl/discrete/__init__.py ================================================ from .setting import DiscreteTaskAgnosticSLSetting ================================================ FILE: sequoia/settings/sl/discrete/setting.py ================================================ from dataclasses import dataclass from sequoia.settings.assumptions.context_discreteness import DiscreteContextAssumption from sequoia.settings.sl.continual import ContinualSLSetting @dataclass class DiscreteTaskAgnosticSLSetting(DiscreteContextAssumption, ContinualSLSetting): """Continual Supervised Learning Setting where there are clear task boundaries, but where the task information isn't available. """ ================================================ FILE: sequoia/settings/sl/discrete/setting_test.py ================================================ from typing import Any, ClassVar, Dict, Type from sequoia.settings.sl.continual.setting_test import ( TestContinualSLSetting as ContinualSLSettingTests, ) from .setting import DiscreteTaskAgnosticSLSetting class TestDiscreteTaskAgnosticSLSetting(ContinualSLSettingTests): Setting: ClassVar[Type[Setting]] = DiscreteTaskAgnosticSLSetting # The kwargs to be passed to the Setting when we want to create a 'short' setting. fast_dev_run_kwargs: ClassVar[Dict[str, Any]] = dict( dataset="mnist", batch_size=64, ) ================================================ FILE: sequoia/settings/sl/domain_incremental/__init__.py ================================================ from .setting import DomainIncrementalSLSetting ================================================ FILE: sequoia/settings/sl/domain_incremental/setting.py ================================================ from dataclasses import dataclass from sequoia.settings.sl.incremental.setting import IncrementalSLSetting from sequoia.utils.utils import constant @dataclass class DomainIncrementalSLSetting(IncrementalSLSetting): """Supervised CL Setting where the input domain shifts incrementally. Task labels and task boundaries are given at training time, but not at test-time. The crucial difference between the Domain-Incremental and Class-Incremental settings is that the action space is smaller in domain-incremental learning, as it is a `Discrete(n_classes_per_task)`, rather than the `Discrete(total_classes)` in Class-Incremental setting. For example: Create a classifier for odd vs even hand-written digits. It first be trained on digits 0 and 1, then digits 2 and 3, then digits 4 and 5, etc. At evaluation time, it will be evaluated on all digits """ shared_action_space: bool = constant(True) ================================================ FILE: sequoia/settings/sl/domain_incremental/setting_test.py ================================================ import itertools from typing import Any, ClassVar, Dict, Type import numpy as np from gym import spaces from gym.spaces import Discrete from sequoia.common.metrics import ClassificationMetrics from sequoia.common.spaces import Image, TypedDictSpace from sequoia.settings.sl.incremental.setting_test import ( TestIncrementalSLSetting as IncrementalSLSettingTests, ) from .setting import DomainIncrementalSLSetting class TestDiscreteTaskAgnosticSLSetting(IncrementalSLSettingTests): Setting: ClassVar[Type[Setting]] = DomainIncrementalSLSetting # The kwargs to be passed to the Setting when we want to create a 'short' setting. fast_dev_run_kwargs: ClassVar[Dict[str, Any]] = dict( dataset="mnist", batch_size=64, ) # Override how we measure 'chance' accuracy for DomainIncrementalSetting. def assert_chance_level( self, setting: DomainIncrementalSLSetting, results: DomainIncrementalSLSetting.Results, ): assert isinstance(setting, DomainIncrementalSLSetting), setting assert isinstance(results, DomainIncrementalSLSetting.Results), results # TODO: Remove this assertion: assert isinstance(setting.action_space, spaces.Discrete) # TODO: This test so far needs the 'N' to be the number of classes in total, # not the number of classes per task. num_classes = setting.action_space.n # <-- Should be using this instead. average_accuracy = results.objective # Calculate the expected 'average' chance accuracy. # We assume that there is an equal number of classes in each task. chance_accuracy = 1 / num_classes assert 0.5 * chance_accuracy <= average_accuracy <= 1.5 * chance_accuracy for i, metric in enumerate(results.final_performance_metrics): assert isinstance(metric, ClassificationMetrics) # TODO: Same as above: Should be using `n_classes_per_task` or something # like it instead. chance_accuracy = 1 / num_classes task_accuracy = metric.accuracy # FIXME: Look into this, we're often getting results substantially # worse than chance, and to 'make the tests pass' (which is bad) # we're setting the lower bound super low, which makes no sense. assert 0.25 * chance_accuracy <= task_accuracy <= 2.1 * chance_accuracy def test_domain_incremental_mnist_setup(): setting = DomainIncrementalSLSetting( dataset="mnist", increment=2, ) setting.prepare_data(data_dir="data") setting.setup() assert setting.observation_space == TypedDictSpace( x=Image(0.0, 1.0, (3, 28, 28), np.float32), task_labels=Discrete(5), dtype=setting.Observations, ) assert setting.observation_space.dtype == setting.Observations assert setting.action_space == spaces.Discrete(2) assert setting.reward_space == spaces.Discrete(2) for i in range(setting.nb_tasks): setting.current_task_id = i batch_size = 5 train_loader = setting.train_dataloader(batch_size=batch_size) for j, (observations, rewards) in enumerate(itertools.islice(train_loader, 100)): x = observations.x t = observations.task_labels y = rewards.y print(i, j, y, t) assert x.shape == (batch_size, 3, 28, 28) assert ((0 <= y) & (y < setting.n_classes_per_task)).all() assert all(t == i) x = x.permute(0, 2, 3, 1)[0] assert x.shape == (28, 28, 3) rewards_ = train_loader.send([4 for _ in range(batch_size)]) assert (rewards.y == rewards_.y).all() train_loader.close() test_loader = setting.test_dataloader(batch_size=batch_size) for j, (observations, rewards) in enumerate(itertools.islice(test_loader, 100)): assert rewards is None x = observations.x t = observations.task_labels assert t is None assert x.shape == (batch_size, 3, 28, 28) x = x.permute(0, 2, 3, 1)[0] assert x.shape == (28, 28, 3) rewards = test_loader.send([0 for _ in range(batch_size)]) assert rewards is not None y = rewards.y assert ((0 <= y) & (y < setting.n_classes_per_task)).all() ================================================ FILE: sequoia/settings/sl/environment.py ================================================ """TODO: Creates a Gym Environment (and DataLoader) from a traditional Supervised dataset. """ from collections import deque from typing import * import gym import numpy as np from gym import spaces from gym.vector.utils import batch_space from torch import Tensor from torch.utils.data import DataLoader, Dataset, IterableDataset from torch.utils.data.dataloader import _BaseDataLoaderIter from sequoia.common.gym_wrappers.convert_tensors import add_tensor_support from sequoia.common.gym_wrappers.utils import tile_images from sequoia.common.spaces import Image from sequoia.common.transforms import Transforms from sequoia.settings.base.environment import Environment from sequoia.settings.base.objects import ( Actions, ActionType, Observations, ObservationType, Rewards, RewardType, ) from sequoia.utils.logging_utils import get_logger logger = get_logger(__name__) class PassiveEnvironment( DataLoader, Environment[Tuple[ObservationType, Optional[ActionType]], ActionType, RewardType], ): """Environment in which actions have no influence on future observations. Can either be iterated on like a normal DataLoader, in which case it gives back the observation and the reward at the same time, or as a gym Environment, in which case it gives the rewards and the next batch of observations once an action is given. Normal supervised datasets such as Mnist, ImageNet, etc. fit under this category. Similarly to Environment, this just adds some methods on top of the usual PyTorch DataLoader. """ passive: ClassVar[bool] = True metadata = {"render.modes": ["rgb_array", "human"]} def __init__( self, dataset: Union[IterableDataset, Dataset], split_batch_fn: Callable[[Tuple[Any, ...]], Tuple[ObservationType, ActionType]] = None, observation_space: gym.Space = None, action_space: gym.Space = None, reward_space: gym.Space = None, n_classes: int = None, pretend_to_be_active: bool = False, strict: bool = False, drop_last: bool = False, **kwargs, ): """Creates the DataLoader/Environment for the given dataset. Parameters ---------- dataset : Union[IterableDataset, Dataset] The dataset to iterate on. Should ideally be indexable (a Map-style dataset). split_batch_fn : Callable[ [Tuple[Any, ...]], Tuple[ObservationType, ActionType] ], optional A function to call on each item in the dataset in order to split it into Observations and Rewards, by default None, in which case we assume that the dataset items are tuples of length 2. observation_space : gym.Space, optional The single (non-batched) observation space. Default to `None`, in which case this will try to infer the shape of the space using the first item in the dataset. action_space : gym.Space, optional The non-batched action space. Defaults to None, in which case the `n_classes` argument must be passed, and the action space is assumed to be discrete (i.e. that the loader is for a classification dataset). reward_space : gym.Space, optional The non-batched reward (label) space. Defaults to `None`, in which case it will be the same as the action space (as is the case in classification). n_classes : int, optional Number of classes in the dataset. Used in case `action_space` isn't passed. Defaults to `None`. pretend_to_be_active : bool, optional Wether to withhold the rewards (labels) from the batches when being iterated on like the usual dataloader, and to only give them back after an action is received through the 'send' method. False by default, in which case this behaves exactly as a normal dataloader when being iterated on. When False, the batches yielded by this dataloader will be of the form `Tuple[Observations, Rewards]` (as usual in SL). However, when set to True, the batches will be `Tuple[Observations, None]`! Rewards will then be returned by the environment when an action is passed to the Send method. strict : bool, optional [description], by default False # Examples: ```python train_env = PassiveEnvironment(MNIST("data"), batch_size=32, num_classes=10) # The usual Dataloader-style: for x, y in train_env: # train as usual (...) # OpenAI Gym style: for episode in range(5): # NOTE: "episode" in RL is an "epoch" in SL: obs = train_env.reset() done = False while not done: actions = train_env.action_space.sample() obs, rewards, done, info = train_env.step(actions) ``` """ super().__init__(dataset=dataset, drop_last=drop_last, **kwargs) self.split_batch_fn = split_batch_fn # TODO: When the spaces aren't passed explicitly, assumes a classification dataset. if not observation_space: # NOTE: Assuming min/max of 0 and 1 respectively, but could actually use # min_max of the dataset samples too. first_item = self.dataset[0] if isinstance(first_item, tuple): x, *_ = first_item else: assert isinstance(first_item, (np.ndarray, Tensor)) x = first_item observation_space = Image(0.0, 1.0, x.shape) if not action_space: assert n_classes, "must pass either `action_space`, or `n_classes` for now" action_space = spaces.Discrete(n_classes) elif isinstance(action_space, spaces.Discrete): n_classes = action_space.n if not reward_space: # Assuming a classification dataset by default: # (action space = reward space = Discrete(n_classes)) reward_space = action_space assert observation_space assert action_space assert reward_space self.single_observation_space: gym.Space = observation_space self.single_action_space: gym.Space = action_space self.single_reward_space: gym.Space = reward_space if self.batch_size: observation_space = batch_space(observation_space, self.batch_size) action_space = batch_space(action_space, self.batch_size) reward_space = batch_space(reward_space, self.batch_size) self.observation_space: gym.Space = add_tensor_support(observation_space) self.action_space: gym.Space = add_tensor_support(action_space) self.reward_space: gym.Space = add_tensor_support(reward_space) self.pretend_to_be_active = pretend_to_be_active self._strict = strict self._reward_queue = deque(maxlen=10) self.n_classes: Optional[int] = n_classes self._iterator: Optional[_BaseDataLoaderIter] = None # NOTE: These here are never processed with self.observation or self.reward. self._previous_batch: Optional[Tuple[ObservationType, RewardType]] = None self._current_batch: Optional[Tuple[ObservationType, RewardType]] = None self._next_batch: Optional[Tuple[ObservationType, RewardType]] = None self._done: Optional[bool] = None self._is_closed: bool = False self._action: Optional[ActionType] = None # from gym.envs.classic_control.rendering import SimpleImageViewer self.viewer = None def is_closed(self) -> bool: return self._is_closed def reset(self) -> ObservationType: """Resets the env by deleting and re-creating the dataloader iterator. TODO: This might be pretty expensive, since it's maybe re-creating all the worker processes. There might be an easier way of going about this. Returns the first batch of observations. """ if self._is_closed: raise gym.error.ClosedEnvironmentError("Can't reset: Env is closed.") self._iterator = super().__iter__() self._previous_batch = None self._current_batch = self.get_next_batch() self._done = False obs = self._current_batch[0] return self.observation(obs) def close(self) -> None: if not self._is_closed: if self.viewer: self.viewer.close() if self.num_workers > 0 and self._iterator: self._iterator._shutdown_workers() self._is_closed = True def __del__(self): if not self._is_closed: self.close() def render(self, mode: str = "rgb_array") -> np.ndarray: observations = self._current_batch[0] if isinstance(observations, Observations): image_batch = observations.x else: assert isinstance(observations, Tensor) image_batch = observations if isinstance(image_batch, Tensor): image_batch = image_batch.cpu().numpy() if self.batch_size: image_batch = tile_images(image_batch) image_batch = Transforms.channels_last_if_needed(image_batch) image_batch = Transforms.three_channels(image_batch) assert image_batch.shape[-1] in {3, 4}, image_batch.shape if image_batch.dtype == np.float32: assert (0 <= image_batch).all() and (image_batch <= 1).all() image_batch = (256 * image_batch).astype(np.uint8) assert image_batch.dtype == np.uint8 if mode == "rgb_array": # NOTE: Need to create a single image, channels_last format, and # possibly even of dtype uint8, in order for things like Monitor to # work. return image_batch if mode == "human": # return plt.imshow(image_batch) if self.viewer is None: display = None # TODO: There seems to be a bit of a bug, tests sometime fail because # "Can't connect to display: None" etc. from gym.utils import pyglet_rendering # from pyvirtualdisplay import Display # display = Display(visible=0, size=(1366, 768)) # display.start() self.viewer = pyglet_rendering.SimpleImageViewer() self.viewer.imshow(image_batch) return self.viewer.isopen raise NotImplementedError(f"Unsuported mode {mode}") def get_next_batch(self) -> Tuple[ObservationType, RewardType]: """Gets the next batch from the underlying dataset. Uses the `split_batch_fn`, if needed. Does NOT apply the self.observation and self.reward methods. Returns ------- Tuple[ObservationType, RewardType] [description] """ if self._is_closed: raise gym.error.ClosedEnvironmentError("Can't get the next batch: Env is closed.") if self._iterator is None: self._iterator = super().__iter__() try: batch = next(self._iterator) except StopIteration: batch = None if self.split_batch_fn and batch is not None: batch = self.split_batch_fn(batch) return batch # obs, reward = batch # return self.observation(obs), self.reward(reward) def step(self, action: ActionType) -> Tuple[ObservationType, RewardType, bool, Dict]: if self._is_closed: raise gym.error.ClosedEnvironmentError("Can't step on a closed env.") if self._done is None: raise gym.error.ResetNeeded("Need to reset the env before calling step.") if self._done: raise gym.error.ResetNeeded("Need to reset the env since it is done.") # Transform the Action, if needed: action = self.action(action) # NOTE: This prev/current/next setup is so we can give the right 'done' # signal. self._previous_batch = self._current_batch if self._next_batch is None: # This should only ever happen right after resetting. self._next_batch = self.get_next_batch() self._current_batch = self._next_batch self._next_batch = self.get_next_batch() # self._next_batch = self._observations, self._rewards assert self._previous_batch is not None # TODO: Return done=True when the iterator is exhausted? self._done = self._next_batch is None obs = self._current_batch[0] reward = self._previous_batch[1] # Empty for now I guess? info = {} return obs, reward, self._done, info def action(self, action: ActionType) -> ActionType: """Transform the action, if needed. Parameters ---------- action : ActionType [description] Returns ------- ActionType [description] """ return action def observation(self, observation: ObservationType) -> ObservationType: """Transform the observation, if needed. Parameters ---------- observation : ObservationType [description] Returns ------- ObservationType [description] """ return observation def reward(self, reward: RewardType) -> RewardType: """Transform the reward, if needed. Parameters ---------- reward : RewardType [description] Returns ------- RewardType [description] """ return reward def get_info(self) -> Dict: """Returns the dict to be returned as the 'info' in step(). IDEA: We could subclass this to change whats in the 'info' dict, maybe add some task information? Returns ------- Dict [description] """ return {} def __iter__(self) -> Iterable[Tuple[ObservationType, Optional[RewardType]]]: """Iterate over the dataset, yielding batches of Observations and Rewards, just like a regular DataLoader. """ # if self.split_batch_fn: # return map(self.split_batch_fn, super().__iter__()) # else: # return super().__iter__() if self._is_closed: raise gym.error.ClosedEnvironmentError("Can't iterate over closed env.") for batch in super().__iter__(): if self.split_batch_fn: observations, rewards = self.split_batch_fn(batch) else: if len(batch) != 2: raise RuntimeError( f"You need to pass a `split_batch_fn` to create " f"observations and rewards, since batch doesn't have " f"2 items: {batch}" ) observations, rewards = batch # Apply any transformations (in case this is wrapped with # TransformObservation or something similar) self._observations = self.observation(observations) self._rewards = self.reward(rewards) self._previous_batch = self._current_batch self._current_batch = (self._observations, self._rewards) if self.pretend_to_be_active: self._action = None self._reward_queue.append(self._rewards) yield self._observations, None if self._action is None: if self._strict: # IDEA: yield the same observation, as long as we dont receive an action. raise RuntimeError("Need to send an action between each observations.") logger.warning("Didn't receive an action, rewards will be delayed!.") else: yield self._observations, self._rewards def send(self, action: Actions) -> Rewards: """Return the last latch of rewards from the dataset (which were withheld if in 'active' mode) """ if self.pretend_to_be_active: self._action = action return self._reward_queue.popleft() else: # NOTE: What about sending the reward as well this way? return self._rewards ================================================ FILE: sequoia/settings/sl/environment_test.py ================================================ from typing import ClassVar, Iterable, Tuple, Type import gym import numpy as np import pytest import torch from gym import spaces from torch import Tensor from torch.utils.data import Subset, TensorDataset from torchvision.datasets import MNIST from sequoia.common.gym_wrappers import TransformObservation from sequoia.common.spaces import Image from sequoia.common.transforms import Compose, Transforms from .environment import PassiveEnvironment def check_env(env: PassiveEnvironment): """Perform a step gym-style and dataloader-style and check that items fit their respective spaces. """ reset_obs = env.reset() # Test out the reset & step methods (gym style) assert reset_obs in env.observation_space, reset_obs.shape assert env.observation_space.sample() in env.observation_space assert env.action_space.sample() in env.action_space assert env.reward_space == env.action_space step_obs, step_rewards, done, info = env.step(env.action_space.sample()) assert step_obs in env.observation_space assert step_rewards in env.reward_space # TODO: Should passive environments return a single 'done' value? or a list # like vectorized environments in RL? assert not done # shouldn't be `done`. for iter_obs, iter_rewards in env: assert iter_obs in env.observation_space, iter_obs.shape assert iter_rewards in env.reward_space break else: assert False, "should have iterated" class TestPassiveEnvironment: # NOTE: Defining tests in a class like this so we can reuse them while changing some # component, for example in the case of `env_proxy_test.py`. PassiveEnvironment: ClassVar[Type[PassiveEnvironment]] = PassiveEnvironment @pytest.fixture(scope="session") def mnist_dataset(self): transforms = Compose([Transforms.to_tensor, Transforms.three_channels]) dataset = MNIST("data", transform=transforms) return dataset def test_passive_environment_as_dataloader(self, mnist_dataset): batch_size = 1 transforms = Compose([Transforms.to_tensor, Transforms.three_channels]) dataset = mnist_dataset obs_space = Image(0, 255, (1, 28, 28), np.uint8) obs_space = transforms(obs_space) env: Iterable[Tuple[Tensor, Tensor]] = self.PassiveEnvironment( dataset, batch_size=batch_size, n_classes=10, observation_space=obs_space, ) for x, y in env: assert x.shape == (batch_size, 3, 28, 28) x = x.permute(0, 2, 3, 1) assert y.tolist() == [5] break # reward = env.send(4) # assert reward is None, reward # plt.imshow(x[0]) # plt.title(f"y: {y[0]}") # plt.waitforbuttonpress(10) def test_mnist_as_gym_env(self, mnist_dataset): # from continuum.datasets import MNIST dataset = mnist_dataset batch_size = 4 env = self.PassiveEnvironment(dataset, n_classes=10, batch_size=batch_size) assert env.observation_space.shape == (batch_size, 3, 28, 28) assert env.action_space.shape == (batch_size,) assert env.reward_space.shape == (batch_size,) env.seed(123) obs = env.reset() assert obs.shape == (batch_size, 3, 28, 28) for i in range(10): obs, reward, done, info = env.step(env.action_space.sample()) assert obs.shape == (batch_size, 3, 28, 28) assert reward.shape == (batch_size,) assert not done env.close() def test_env_gives_done_on_last_item(self): # from continuum.datasets import MNIST max_samples = 100 batch_size = 1 dataset = MNIST( "data", transform=Compose([Transforms.to_tensor, Transforms.three_channels]) ) dataset = Subset(dataset, list(range(max_samples))) env = self.PassiveEnvironment(dataset, n_classes=10, batch_size=batch_size) assert env.observation_space.shape == (batch_size, 3, 28, 28) assert env.action_space.shape == (batch_size,) assert env.reward_space.shape == (batch_size,) env.seed(123) obs = env.reset() assert obs.shape == (batch_size, 3, 28, 28) # Starting at 1 since reset() gives one observation already. for i in range(1, max_samples): obs, reward, done, info = env.step(env.action_space.sample()) assert obs.shape == (batch_size, 3, 28, 28) assert reward.shape == (batch_size,) assert done == (i == max_samples - 1), i if done: break else: assert False, "Should have reached done=True!" assert i == max_samples - 1 env.close() def test_env_done_works_with_batch_size(self): # from continuum.datasets import MNIST max_samples = 100 batch_size = 5 max_batches = max_samples // batch_size dataset = MNIST( "data", transform=Compose([Transforms.to_tensor, Transforms.three_channels]) ) dataset = Subset(dataset, list(range(max_samples))) env = self.PassiveEnvironment(dataset, n_classes=10, batch_size=batch_size) assert env.observation_space.shape == (batch_size, 3, 28, 28) assert env.action_space.shape == (batch_size,) assert env.reward_space.shape == (batch_size,) env.seed(123) obs = env.reset() assert obs.shape == (batch_size, 3, 28, 28) # Starting at 1 since reset() gives one observation already. for i in range(1, max_batches): obs, reward, done, info = env.step(env.action_space.sample()) assert obs.shape == (batch_size, 3, 28, 28) assert reward.shape == (batch_size,) assert done == (i == max_batches - 1), i if done: break else: assert False, "Should have reached done=True!" assert i == max_batches - 1 env.close() def test_multiple_epochs_env(self): max_epochs = 3 max_samples = 100 batch_size = 5 max_batches = max_samples // batch_size dataset = MNIST( "data", transform=Compose([Transforms.to_tensor, Transforms.three_channels]) ) dataset = Subset(dataset, list(range(max_samples))) env = self.PassiveEnvironment(dataset, n_classes=10, batch_size=batch_size) assert env.observation_space.shape == (batch_size, 3, 28, 28) assert env.action_space.shape == (batch_size,) assert env.reward_space.shape == (batch_size,) env.seed(123) total_steps = 0 for epoch in range(max_epochs): obs = env.reset() total_steps += 1 assert obs.shape == (batch_size, 3, 28, 28) # Starting at 1 since reset() gives one observation already. for i in range(1, max_batches): obs, reward, done, info = env.step(env.action_space.sample()) assert obs.shape == (batch_size, 3, 28, 28) assert reward.shape == (batch_size,) assert done == (i == max_batches - 1), i total_steps += 1 if done: break else: assert False, "Should have reached done=True!" assert i == max_batches - 1 assert total_steps == max_batches * max_epochs env.close() def test_cant_iterate_after_closing_passive_env(self): max_epochs = 3 max_samples = 200 batch_size = 5 max_batches = max_samples // batch_size dataset = MNIST( "data", transform=Compose([Transforms.to_tensor, Transforms.three_channels]) ) dataset = Subset(dataset, list(range(max_samples))) env = self.PassiveEnvironment(dataset, n_classes=10, batch_size=batch_size, num_workers=4) assert env.observation_space.shape == (batch_size, 3, 28, 28) assert env.action_space.shape == (batch_size,) assert env.reward_space.shape == (batch_size,) total_steps = 0 for epoch in range(max_epochs): for obs, reward in env: assert obs.shape == (batch_size, 3, 28, 28) assert reward.shape == (batch_size,) total_steps += 1 assert total_steps == max_batches * max_epochs env.close() with pytest.raises(gym.error.ClosedEnvironmentError): for _ in zip(range(3), env): pass with pytest.raises(gym.error.ClosedEnvironmentError): env.reset() with pytest.raises(gym.error.ClosedEnvironmentError): env.get_next_batch() with pytest.raises(gym.error.ClosedEnvironmentError): env.step(env.action_space.sample()) def test_multiple_epochs_dataloader(self): """Test that we can iterate on the dataloader more than once.""" max_epochs = 3 max_samples = 200 batch_size = 5 max_batches = max_samples // batch_size dataset = MNIST( "data", transform=Compose([Transforms.to_tensor, Transforms.three_channels]) ) dataset = Subset(dataset, list(range(max_samples))) env = self.PassiveEnvironment(dataset, n_classes=10, batch_size=batch_size) assert env.observation_space.shape == (batch_size, 3, 28, 28) assert env.action_space.shape == (batch_size,) assert env.reward_space.shape == (batch_size,) total_steps = 0 for epoch in range(max_epochs): for obs, reward in env: assert obs.shape == (batch_size, 3, 28, 28) assert reward.shape == (batch_size,) total_steps += 1 assert total_steps == max_batches * max_epochs def test_multiple_epochs_dataloader_with_split_batch_fn(self): """Test that we can iterate on the dataloader more than once.""" max_epochs = 3 max_samples = 200 batch_size = 5 def split_batch_fn(batch): ( x, y, ) = batch # some dummy function. return torch.zeros_like(x), y max_batches = max_samples // batch_size dataset = MNIST( "data", transform=Compose([Transforms.to_tensor, Transforms.three_channels]) ) dataset = Subset(dataset, list(range(max_samples))) env = self.PassiveEnvironment( dataset, n_classes=10, batch_size=batch_size, split_batch_fn=split_batch_fn ) assert env.observation_space.shape == (batch_size, 3, 28, 28) assert env.action_space.shape == (batch_size,) assert env.reward_space.shape == (batch_size,) total_steps = 0 for epoch in range(max_epochs): for obs, reward in env: assert obs.shape == (batch_size, 3, 28, 28) assert torch.all(obs == 0) assert reward.shape == (batch_size,) total_steps += 1 assert total_steps == max_batches * max_epochs def test_env_requires_reset_before_step(self): # from continuum.datasets import MNIST max_samples = 100 batch_size = 5 max_batches = max_samples // batch_size dataset = MNIST( "data", transform=Compose([Transforms.to_tensor, Transforms.three_channels]) ) dataset = Subset(dataset, list(range(max_samples))) env = self.PassiveEnvironment(dataset, n_classes=10, batch_size=batch_size) with pytest.raises(gym.error.ResetNeeded): env.step(env.action_space.sample()) def test_split_batch_fn(self): # from continuum.datasets import MNIST batch_size = 5 max_batches = 10 def split_batch_fn( batch: Tuple[Tensor, Tensor, Tensor] ) -> Tuple[Tuple[Tensor, Tensor], Tensor]: x, y, t = batch return (x, t), y # dataset = MNIST("data", transform=Compose([Transforms.to_tensor, Transforms.three_channels])) from continuum import ClassIncremental from continuum.datasets import MNIST scenario = ClassIncremental( MNIST("data", download=True, train=True), increment=2, transformations=Compose([Transforms.to_tensor, Transforms.three_channels]), ) classes_per_task = scenario.nb_classes // scenario.nb_tasks print(f"Number of classes per task {classes_per_task}.") for i, task_dataset in enumerate(scenario): env = self.PassiveEnvironment( task_dataset, n_classes=classes_per_task, batch_size=batch_size, split_batch_fn=split_batch_fn, # Need to pass the observation space, in this case. observation_space=spaces.Dict( x=spaces.Box(low=0, high=1, shape=(3, 28, 28)), t=spaces.Discrete(scenario.nb_tasks), # task label ), action_space=spaces.Box( low=np.array([i * classes_per_task]), high=np.array([(i + 1) * classes_per_task]), dtype=int, ), ) assert spaces.Box( low=np.array([i * classes_per_task]), high=np.array([(i + 1) * classes_per_task]), dtype=int, ).shape == (1,) assert isinstance(env.observation_space["x"], spaces.Box) assert env.observation_space["x"].shape == (batch_size, 3, 28, 28) assert env.observation_space["t"].shape == (batch_size,) assert env.action_space.shape == (batch_size, 1) assert env.reward_space.shape == (batch_size, 1) env.seed(123) obs = env.reset() assert len(obs) == 2 x, t = obs assert x.shape == (batch_size, 3, 28, 28) assert t.shape == (batch_size,) obs, reward, done, info = env.step(env.action_space.sample()) assert x.shape == (batch_size, 3, 28, 28) assert t.shape == (batch_size,) assert reward.shape == (batch_size,) assert not done env.close() def test_observation_wrapper_applied_to_passive_environment(self): """Test that when we apply a gym wrapper to a PassiveEnvironment, it also affects the observations / actions / rewards produced when iterating on the env. """ batch_size = 5 transforms = Compose([Transforms.to_tensor, Transforms.three_channels]) dataset = MNIST("data", transform=transforms) obs_space = Image(0, 255, (1, 28, 28), np.uint8) obs_space = transforms(obs_space) dataset.classes env = self.PassiveEnvironment( dataset, n_classes=10, batch_size=batch_size, observation_space=obs_space, ) assert env.observation_space == Image(0, 1, (batch_size, 3, 28, 28)) assert env.action_space.shape == (batch_size,) assert env.reward_space == env.action_space env.seed(123) check_env(env) # Apply a transformation that changes the observation space. env = TransformObservation(env=env, f=Compose([Transforms.resize_64x64])) assert env.observation_space == Image(0, 1, (batch_size, 3, 64, 64)) assert env.action_space.shape == (batch_size,) assert env.reward_space.shape == (batch_size,) env.seed(123) check_env(env) env.close() # from continuum import ClassIncremental # from continuum.datasets import MNIST # from continuum.tasks import split_train_val def test_passive_environment_interaction(self): """Test the gym.Env-style interaction with a PassiveEnvironment.""" batch_size = 5 transforms = Compose([Transforms.to_tensor, Transforms.three_channels]) dataset = MNIST( "data", transform=Compose([Transforms.to_tensor, Transforms.three_channels]) ) max_samples = 100 dataset = Subset(dataset, list(range(max_samples))) obs_space = Image(0, 255, (1, 28, 28), np.uint8) obs_space = transforms(obs_space) env = self.PassiveEnvironment( dataset, n_classes=10, batch_size=batch_size, observation_space=obs_space, pretend_to_be_active=True, ) assert env.observation_space == Image(0, 1, (batch_size, 3, 28, 28)) assert env.action_space.shape == (batch_size,) assert env.reward_space == env.action_space env.seed(123) obs = env.reset() assert obs in env.observation_space obs, reward, done, info = env.step(env.action_space.sample()) assert reward is not None assert obs in env.observation_space for i, (obs, reward) in enumerate(env): assert obs in env.observation_space assert reward is None other_reward = env.send(env.action_space.sample()) assert other_reward is not None assert i == max_samples // batch_size - 1 def test_passive_environment_without_pretend_to_be_active(self): """Test the gym.Env-style interaction with a PassiveEnvironment.""" batch_size = 5 transforms = Compose([Transforms.to_tensor, Transforms.three_channels]) dataset = MNIST( "data", transform=Compose([Transforms.to_tensor, Transforms.three_channels]) ) max_samples = 100 dataset = Subset(dataset, list(range(max_samples))) obs_space = Image(0, 255, (1, 28, 28), np.uint8) obs_space = transforms(obs_space) env = self.PassiveEnvironment( dataset, n_classes=10, batch_size=batch_size, observation_space=obs_space, pretend_to_be_active=False, ) assert env.observation_space == Image(0, 1, (batch_size, 3, 28, 28)) assert env.action_space.shape == (batch_size,) assert env.reward_space == env.action_space env.seed(123) obs = env.reset() assert obs in env.observation_space obs, reward, done, info = env.step(env.action_space.sample()) assert reward is not None for i, (obs, reward) in enumerate(env): assert reward is not None other_reward = env.send(env.action_space.sample()) assert (other_reward == reward).all() assert i == max_samples // batch_size - 1 def test_passive_environment_needs_actions_to_be_sent(self): """Test the 'active dataloader' style interaction.""" batch_size = 10 transforms = Compose([Transforms.to_tensor, Transforms.three_channels]) dataset = MNIST( "data", transform=Compose([Transforms.to_tensor, Transforms.three_channels]) ) max_samples = 105 dataset = Subset(dataset, list(range(max_samples))) obs_space = Image(0, 255, (1, 28, 28), np.uint8) obs_space = transforms(obs_space) env = PassiveEnvironment( dataset, n_classes=10, batch_size=batch_size, observation_space=obs_space, pretend_to_be_active=True, strict=True, ) with pytest.raises(RuntimeError): for i, (obs, _) in enumerate(env): pass env = self.PassiveEnvironment( dataset, n_classes=10, batch_size=batch_size, observation_space=obs_space, pretend_to_be_active=True, ) for i, (obs, _) in enumerate(env): assert isinstance(obs, Tensor) action = env.action_space.sample()[: obs.shape[0]] rewards = env.send(action) assert rewards is not None assert rewards.shape[0] == action.shape[0] def test_passive_environment_active_mode_action_reward_match(self): """Test the 'active dataloader' style interaction.""" batch_size = 10 max_samples = 105 dataset = TensorDataset( torch.arange(max_samples).reshape([max_samples, 1, 1, 1]) * torch.ones([max_samples, 3, 32, 32]), torch.arange(max_samples), ) dataset = Subset(dataset, list(range(max_samples))) env = self.PassiveEnvironment( dataset, n_classes=max_samples, batch_size=batch_size, pretend_to_be_active=True, ) for i, (obs, _) in enumerate(env): print(i) expected_obs = torch.arange(i * batch_size, (i + 1) * batch_size) expected_obs = expected_obs[: obs.shape[0]] assert (obs == expected_obs.reshape([obs.shape[0], 1, 1, 1])).all() action = torch.arange(i * batch_size, (i + 1) * batch_size, dtype=int) action = action[: obs.shape[0]] rewards = env.send(action) assert (rewards == action).all() ================================================ FILE: sequoia/settings/sl/incremental/__init__.py ================================================ from .environment import IncrementalSLEnvironment from .objects import Actions, ActionType, Observations, ObservationType, Rewards, RewardType from .results import IncrementalSLResults from .setting import IncrementalSLSetting Environment = IncrementalSLEnvironment ClassIncrementalSetting = IncrementalSLSetting ================================================ FILE: sequoia/settings/sl/incremental/environment.py ================================================ from typing import Any, Callable, Tuple, Union import gym from gym import spaces from torch.utils.data import Dataset, IterableDataset from sequoia.common.spaces import TypedDictSpace from sequoia.settings.base.objects import Rewards as BaseRewards from sequoia.settings.sl.continual.environment import ContinualSLEnvironment from sequoia.utils.logging_utils import get_logger from ..continual.environment import ContinualSLTestEnvironment from .objects import Actions, ActionType, Observations, ObservationType, RewardType logger = get_logger(__name__) class IncrementalSLEnvironment(ContinualSLEnvironment[ObservationType, ActionType, RewardType]): def __init__( self, dataset: Union[Dataset, IterableDataset], hide_task_labels: bool = True, observation_space: TypedDictSpace[ObservationType] = None, action_space: gym.Space = None, reward_space: gym.Space = None, split_batch_fn: Callable[[Tuple[Any, ...]], Tuple[ObservationType, ActionType]] = None, pretend_to_be_active: bool = False, strict: bool = False, one_epoch_only: bool = False, **kwargs, ): super().__init__( dataset, hide_task_labels=hide_task_labels, observation_space=observation_space, action_space=action_space, reward_space=reward_space, split_batch_fn=split_batch_fn, pretend_to_be_active=pretend_to_be_active, strict=strict, one_epoch_only=one_epoch_only, **kwargs, ) import bisect import warnings from typing import Any, Dict import numpy as np import torch from torch.nn import functional as F from sequoia.common.gym_wrappers.utils import tile_images from sequoia.common.metrics import ClassificationMetrics from sequoia.common.transforms import Transforms from sequoia.settings.assumptions.iid_results import TaskResults from sequoia.settings.assumptions.incremental import TaskSequenceResults from .results import IncrementalSLResults class IncrementalSLTestEnvironment(ContinualSLTestEnvironment): def __init__(self, env: gym.Env, *args, task_schedule: Dict[int, Any] = None, **kwargs): super().__init__(env, *args, **kwargs) self._steps = 0 # TODO: Maybe rework this so we don't depend on the test phase being one task at # a time, instead store the test metrics in the task corresponding to the # task_label in the observations. # BUG: The problem is, right now we're depending on being passed the # 'task schedule', which we then use to get the task ids. This # is actually pretty bad, because if the class ordering was changed between # training and testing, then, this wouldn't actually report the correct results! self.task_schedule = task_schedule or {} self.task_steps = sorted(self.task_schedule.keys()) self.results: TaskSequenceResults[ClassificationMetrics] = TaskSequenceResults( task_results=[TaskResults() for step in self.task_steps] ) # self._reset = False # NOTE: The task schedule is already in terms of the number of batches. self.boundary_steps = [step for step in self.task_schedule.keys()] def get_results(self) -> IncrementalSLResults: return self.results def reset(self): return super().reset() # if not self._reset: # logger.debug("Initial reset.") # self._reset = True # return super().reset() # else: # logger.debug("Resetting the env closes it.") # self.close() # return None def _before_step(self, action): self._action = action return super()._before_step(action) def _after_step(self, observation, reward, done, info): if not isinstance(reward, BaseRewards): reward = BaseRewards(y=torch.as_tensor(reward)) batch_size = reward.batch_size action = self._action assert action is not None if isinstance(self.action_space, (spaces.MultiDiscrete, spaces.MultiBinary)): n_classes = self.action_space.nvec[0] from sequoia.settings.assumptions.task_type import ClassificationActions if not isinstance(action, ClassificationActions): if isinstance(action, Actions): y_pred = action.y_pred # 'upgrade', creating some fake logits. else: y_pred = torch.as_tensor(action) fake_logits = F.one_hot(y_pred, n_classes) action = ClassificationActions(y_pred=y_pred, logits=fake_logits) else: raise NotImplementedError( f"TODO: Remove the assumption here that the env is a classification env " f"({self.action_space}, {self.reward_space})" ) if action.batch_size != reward.batch_size: warnings.warn( RuntimeWarning( f"Truncating the action since its batch size {action.batch_size} " f"is larger than the rewards': ({reward.batch_size})" ) ) action = action[:, : reward.batch_size] # TODO: Use some kind of generic `get_metrics(actions: Actions, rewards: Rewards)` # function instead. y = reward.y logits = action.logits y_pred = action.y_pred metric = ClassificationMetrics(y=y, logits=logits, y_pred=y_pred) reward = metric.accuracy task_steps = sorted(self.task_schedule.keys()) assert 0 in task_steps, task_steps nb_tasks = len(task_steps) assert nb_tasks >= 1 # Given the step, find the task id. task_id = bisect.bisect_right(task_steps, self._steps) - 1 self.results.task_results[task_id].metrics.append(metric) self._steps += 1 # FIXME: Temporary fix: TODO: Make sure this doesn't truncate the number of labels if self._steps == self.step_limit - 1: self.close() done = True # Debugging issue with Monitor class: # return super()._after_step(observation, reward, done, info) if not self.enabled: return done if done and self.env_semantics_autoreset: # For envs with BlockingReset wrapping VNCEnv, this observation will be the # first one of the new episode if self.config.render: self.reset_video_recorder() self.episode_id += 1 self._flush() # Record stats: (TODO: accuracy serves as the 'reward'!) reward_for_stats = metric.accuracy self.stats_recorder.after_step(observation, reward_for_stats, done, info) # Record video if self.config and self.config.render: self.video_recorder.capture_frame() return done def _after_reset(self, observation: Observations): image_batch = observation.numpy().x # Need to create a single image with the right dtype for the Monitor # from gym to create gifs / videos with it. if self.batch_size: # Need to tile the image batch so it can be seen as a single image # by the Monitor. image_batch = tile_images(image_batch) image_batch = Transforms.channels_last_if_needed(image_batch) if image_batch.dtype == np.float32: assert (0 <= image_batch).all() and (image_batch <= 1).all() image_batch = (256 * image_batch).astype(np.uint8) assert image_batch.dtype == np.uint8 # Debugging this issue here: # super()._after_reset(image_batch) # -- Code from Monitor if not self.enabled: return # Reset the stat count self.stats_recorder.after_reset(observation) if self.config.render: self.reset_video_recorder() # Bump *after* all reset activity has finished self.episode_id += 1 self._flush() # -- def render(self, mode="human", **kwargs): # NOTE: This doesn't get called, because the video recorder uses # self.env.render(), rather than self.render() # TODO: Render when the 'render' argument in config is set to True. image_batch = super().render(mode=mode, **kwargs) if mode == "rgb_array" and self.batch_size: image_batch = tile_images(image_batch) return image_batch ================================================ FILE: sequoia/settings/sl/incremental/environment_test.py ================================================ from functools import partial from typing import ClassVar, Type from sequoia.common.metrics import ClassificationMetrics from sequoia.settings.assumptions.discrete_results import TaskSequenceResults from ..continual.environment_test import ( TestContinualSLTestEnvironment as ContinualSLTestEnvironmentTests, ) from .environment import IncrementalSLEnvironment, IncrementalSLTestEnvironment class TestIncrementalSLTestEnvironment(ContinualSLTestEnvironmentTests): Environment: ClassVar[Type[Environment]] = IncrementalSLEnvironment TestEnvironment: ClassVar[Type[TestEnvironment]] = partial( IncrementalSLTestEnvironment, task_schedule={i * 20: {} for i in range(5)} ) def validate_results(self, results: TaskSequenceResults): # NOTE: We're not checking that the results here represent the entire transfer # matrix, because the test env is only used for one test loop. # The Setting creates the transfer matrix using multiple of these # `TaskSequenceResults` objects, each of which is obtained after training on # a task in the training loop. assert isinstance(results, TaskSequenceResults) assert isinstance(results.average_metrics, ClassificationMetrics) assert results.objective > 0 # TODO: Fix this check: assert results.average_metrics.n_samples in [95, 100] ================================================ FILE: sequoia/settings/sl/incremental/objects.py ================================================ """ Observations/Actions/Rewards particular to an IncrementalSLSetting. This is just meant as a cleaner way to import the Observations/Actions/Rewards. """ from dataclasses import dataclass from typing import Optional, TypeVar from torch import Tensor from sequoia.settings.sl.discrete.setting import DiscreteTaskAgnosticSLSetting # from sequoia.settings.sl.continual.objects import Observations, Actions, Rewards # from sequoia.settings.assumptions.context_visibility @dataclass(frozen=True) class IncrementalSLObservations(DiscreteTaskAgnosticSLSetting.Observations): """Incremental Observations, in a supervised context.""" x: Tensor task_labels: Optional[Tensor] = None @dataclass(frozen=True) class IncrementalSLActions(DiscreteTaskAgnosticSLSetting.Actions): """Incremental Actions, in a supervised (passive) context.""" @dataclass(frozen=True) class IncrementalSLRewards(DiscreteTaskAgnosticSLSetting.Rewards): """Incremental Rewards, in a supervised context.""" Observations = IncrementalSLObservations Actions = IncrementalSLActions Rewards = IncrementalSLRewards # Environment = C # Results = IncrementalSLResults # ObservationType = TypeVar("ObservationType", bound=Observations) # ActionType = TypeVar("ActionType", bound=Actions) # RewardType = TypeVar("RewardType", bound=Rewards) ObservationType = TypeVar("ObservationType", bound=IncrementalSLObservations) ActionType = TypeVar("ActionType", bound=IncrementalSLActions) RewardType = TypeVar("RewardType", bound=IncrementalSLRewards) # from .environment import IncrementalSLEnvironment # Environment = IncrementalSLEnvironment ================================================ FILE: sequoia/settings/sl/incremental/results.py ================================================ """ Object representing the "Results" of applying a Method on a Class-Incremental Setting. This object basically calculates the 'objective' specific to this setting as well as provide a set of methods for making useful plots and utilities for logging results to wandb. """ from typing import ClassVar import matplotlib.pyplot as plt import wandb from sequoia.settings.assumptions.incremental import IncrementalAssumption from sequoia.utils.logging_utils import get_logger from sequoia.utils.plotting import autolabel logger = get_logger(__name__) class IncrementalSLResults(IncrementalAssumption.Results): """Results for a ClassIncrementalSetting. The main objective in this setting is the average test accuracy over all tasks. The plots to generate are: - Accuracy per task - Average Test Accuray over the course of testing - Confusion matrix at the end of testing All of these will be created from the list of test metrics (Classification metrics for now). TODO: Add back Wandb logging somehow, even though we might be doing the evaluation loop ourselves. TODO: Fix this for the 'incremental regression' case. """ # Higher accuracy => better lower_is_better: ClassVar[bool] = False objective_name: ClassVar[str] = "Average Accuracy" # Minimum runtime considered (in hours). # (No extra points are obtained when going faster than this.) min_runtime_hours: ClassVar[float] = 5.0 / 60.0 # 5 minutes # Maximum runtime allowed (in hours). max_runtime_hours: ClassVar[float] = 1.0 # one hour. def make_plots(self): plots_dict = {} if wandb.run: # TODO: Add a Histogram plot from wandb? pass else: # TODO: Add back the plots. plots_dict["task_metrics"] = self.task_accuracies_plot() return plots_dict def task_accuracies_plot(self): figure: plt.Figure axes: plt.Axes figure, axes = plt.subplots() x = list(range(self.num_tasks)) y = [metrics.accuracy for metrics in self.final_performance_metrics] rects = axes.bar(x, y) axes.set_title("Task Accuracy") axes.set_xlabel("Task") axes.set_ylabel("Accuracy") axes.set_ylim(0, 1.0) autolabel(axes, rects) return figure def cumul_metrics_plot(self): """TODO: Create a plot that shows the evolution of the test performance over all test tasks seen so far. (during training or during testing?) """ figure: plt.Figure axes: plt.Axes figure, axes = plt.subplots() x = list(range(self.num_tasks)) y = [] metric_name: str = "" for i in range(self.num_tasks): previous_metrics = self.metrics_matrix[i][: i + 1] cumul_metrics = sum(previous_metrics) y.append(cumul_metrics.objective) if not metric_name: metric_name = cumul_metrics.objective_name # x = [metrics.n_samples for metrics in cumulative_metrics] # y = [metrics.accuracy for metrics in cumulative_metrics] axes.plot(x, y) axes.set_xlabel("# of learned tasks") axes.set_ylabel(f"Average {metric_name} on tasks seen so far") return figure # def summary(self) -> str: # s = StringIO() # with redirect_stdout(s): # for i, average_task_metrics in enumerate(self[-1].average_metrics_per_task): # print(f"Test Results on task {i}: {average_task_metrics}") # print(f"Average test metrics accross all the test tasks: {self[-1].average_metrics}") # s.seek(0) # return s.read() # def to_log_dict(self) -> Dict[str, float]: # results = {} # results[self.objective_name] = self.objective # average_metrics = self[-1].average_metrics # if isinstance(average_metrics, ClassificationMetrics): # results["accuracy/average"] = average_metrics.accuracy # elif isinstance(average_metrics, RegressionMetrics): # results["mse/average"] = average_metrics.mse # else: # results["average metrics"] = average_metrics # for i, average_task_metrics in enumerate(self[-1].average_metrics_per_task): # if isinstance(average_task_metrics, ClassificationMetrics): # results[f"accuracy/task_{i}"] = average_task_metrics.accuracy # elif isinstance(average_task_metrics, RegressionMetrics): # results[f"mse/task_{i}"] = average_task_metrics.mse # else: # results[f"task_{i}"] = average_task_metrics # return results ================================================ FILE: sequoia/settings/sl/incremental/setting.py ================================================ """ Defines a `Setting` subclass for "Class-Incremental" Continual Learning. Example command to run a method on this setting (in debug mode): ``` python main.py --setting class_incremental --method baseline --debug \ --batch_size 128 --max_epochs 1 ``` Class-Incremental definition from [iCaRL](https://arxiv.org/abs/1611.07725): "Formally, we demand the following three properties of an algorithm to qualify as class-incremental: i) it should be trainable from a stream of data in which examples of different classes occur at different times ii) it should at any time provide a competitive multi-class classifier for the classes observed so far, iii) its computational requirements and memory footprint should remain bounded, or at least grow very slowly, with respect to the number of classes seen so far." """ import itertools from dataclasses import dataclass from pathlib import Path from typing import Callable, ClassVar, Dict, List, Optional, Tuple, Type, Union from continuum import ClassIncremental from continuum.datasets import _ContinuumDataset from continuum.scenarios.base import _BaseScenario from simple_parsing import choice, field from torch import Tensor from torch.utils.data import Dataset import wandb from sequoia.common.config import Config from sequoia.common.gym_wrappers import TransformObservation from sequoia.settings.assumptions.incremental import IncrementalAssumption, IncrementalResults from sequoia.settings.base import Method from sequoia.settings.rl.wrappers import HideTaskLabelsWrapper from sequoia.settings.sl.continual.wrappers import relabel from sequoia.settings.sl.environment import Actions, PassiveEnvironment, Rewards from sequoia.settings.sl.setting import SLSetting from sequoia.settings.sl.wrappers import MeasureSLPerformanceWrapper from sequoia.utils import get_logger from ..discrete.setting import DiscreteTaskAgnosticSLSetting from .environment import IncrementalSLEnvironment, IncrementalSLTestEnvironment from .objects import Actions, Observations, Rewards from .results import IncrementalSLResults logger = get_logger(__name__) # # NOTE: This dict reflects the observation space of the different datasets # # *BEFORE* any transforms are applied. The resulting property on the Setting is # # based on this 'base' observation space, passed through the transforms. # # TODO: Make it possible to automatically add tensor support if the dtype passed to a # # gym space is a `torch.dtype`. # tensor_space = add_tensor_support @dataclass class IncrementalSLSetting(IncrementalAssumption, DiscreteTaskAgnosticSLSetting): """Supervised Setting where the data is a sequence of 'tasks'. This class is basically is the supervised version of an Incremental Setting The current task can be set at the `current_task_id` attribute. """ Results: ClassVar[Type[IncrementalResults]] = IncrementalSLResults Observations: ClassVar[Type[Observations]] = Observations Actions: ClassVar[Type[Actions]] = Actions Rewards: ClassVar[Type[Rewards]] = Rewards Environment: ClassVar[Type[SLSetting.Environment]] = IncrementalSLEnvironment[ Observations, Actions, Rewards ] Results: ClassVar[Type[IncrementalSLResults]] = IncrementalSLResults # Class variable holding a dict of the names and types of all available # datasets. available_datasets: ClassVar[ Dict[str, Type[_ContinuumDataset]] ] = DiscreteTaskAgnosticSLSetting.available_datasets.copy() # A continual dataset to use. (Should be taken from the continuum package). dataset: str = choice(available_datasets.keys(), default="mnist") # TODO: IDEA: Adding these fields/constructor arguments so that people can pass a # custom ready-made `Scenario` from continuum to use (not sure this is a good idea # though) train_cl_scenario: Optional[_BaseScenario] = field(default=None, cmd=False, to_dict=False) test_cl_scenario: Optional[_BaseScenario] = field(default=None, cmd=False, to_dict=False) def __post_init__(self): """Initializes the fields of the Setting (and LightningDataModule), including the transforms, shapes, etc. """ super().__post_init__() # TODO: For now we assume a fixed, equal number of classes per task, for # sake of simplicity. We could take out this assumption, but it might # make things a bit more complicated. assert isinstance(self.increment, int) assert isinstance(self.test_increment, int) self.n_classes_per_task: int = self.increment self.test_increment = self.increment def apply(self, method: Method, config: Config = None) -> IncrementalSLResults: """Apply the given method on this setting to producing some results.""" # TODO: It still isn't super clear what should be in charge of creating # the config, and how to create it, when it isn't passed explicitly. self.config = config or self._setup_config(method) assert self.config method.configure(setting=self) # Run the main loop (which is defined in IncrementalAssumption). results: IncrementalSLResults = super().main_loop(method) logger.info(results.summary()) method.receive_results(self, results=results) return results def prepare_data(self, data_dir: Path = None, **kwargs): self.config = self.config or Config.from_args(self._argv, strict=False) # if self.batch_size is None: # logger.warning(UserWarning( # f"Using the default batch size of 32. (You can set the " # f"batch size by passing a value to the Setting constructor, or " # f"by setting the attribute inside your 'configure' method) " # )) # self.batch_size = 32 # data_dir = data_dir or self.data_dir or self.config.data_dir # self.make_dataset(data_dir, download=True) # self.data_dir = data_dir return super().prepare_data(data_dir=data_dir, **kwargs) def setup(self, stage: str = None): super().setup(stage=stage) # TODO: Adding this temporarily just for the competition: The TestEnvironment # needs access to this information in order to split the metrics for each task. self.test_boundary_steps = [0] + list(itertools.accumulate(map(len, self.test_datasets)))[ :-1 ] self.test_steps = sum(map(len, self.test_datasets)) # self.test_steps = [0] + list( # itertools.accumulate(map(len, self.test_datasets)) # )[:-1] # def _make_train_dataset(self) -> Dataset: # return self.train_datasets[self.current_task_id] # def _make_val_dataset(self) -> Dataset: # return self.val_datasets[self.current_task_id] # def _make_test_dataset(self) -> Dataset: # return concat(self.test_datasets) def train_dataloader( self, batch_size: int = None, num_workers: int = None ) -> IncrementalSLEnvironment: """Returns a DataLoader for the train dataset of the current task.""" # NOTE: The implementation for this is in `DiscreteTaskAgnosticSLSetting`: # TODO: Fix the inheritance order so that clicking on this super().train_dataloader gets us # to the right point in code. # train_env = DiscreteTaskAgnosticSLSetting.train_dataloader( # self, batch_size=batch_size, num_workers=num_workers # ) train_env = super().train_dataloader(batch_size=batch_size, num_workers=num_workers) # Overwrite the wandb prefix for the `MeasureSLPerformanceWrapper` to include # the task id. if self.monitor_training_performance: # Overwrite the 'wandb prefix' assert isinstance(train_env, MeasureSLPerformanceWrapper) train_env.wandb_prefix = f"Train/Task {self.current_task_id}" self.train_env = train_env return self.train_env def val_dataloader(self, batch_size: int = None, num_workers: int = None) -> PassiveEnvironment: """Returns a DataLoader for the validation dataset of the current task.""" val_env = super().val_dataloader(batch_size=batch_size, num_workers=num_workers) return self.val_env def test_dataloader( self, batch_size: int = None, num_workers: int = None ) -> PassiveEnvironment["ClassIncrementalSetting.Observations", Actions, Rewards]: """Returns a DataLoader for the test dataset of the current task.""" if not self.has_prepared_data: self.prepare_data() if not self.has_setup_test: self.setup("test") # Join all the test datasets. dataset = self._make_test_dataset() batch_size = batch_size if batch_size is not None else self.batch_size num_workers = num_workers if num_workers is not None else self.num_workers env = self.Environment( dataset, batch_size=batch_size, num_workers=num_workers, hide_task_labels=(not self.task_labels_at_test_time), observation_space=self.observation_space, action_space=self.action_space, reward_space=self.reward_space, Observations=self.Observations, Actions=self.Actions, Rewards=self.Rewards, pretend_to_be_active=True, shuffle=False, drop_last=self.drop_last, ) # NOTE: The transforms from `self.transforms` (the 'base' transforms) were # already added when creating the datasets and the CL scenario. test_transforms = self.transforms + self.test_transforms if test_transforms: env = TransformObservation(env, f=test_transforms) if self.config.device: # TODO: Put this before or after the image transforms? from sequoia.common.gym_wrappers.convert_tensors import ConvertToFromTensors env = ConvertToFromTensors(env, device=self.config.device) # TODO: Remove this, I don't think it's used anymore, since `hide_task_labels` # is an argument to self.Environment now. if not self.task_labels_at_test_time: env = HideTaskLabelsWrapper(env) # TODO: Remove this once that stuff with the 'fake' task schedule is fixed below, # base it on the equivalent in ContinualSLSetting instead (which should actually # be moved into DiscreteTaskAgnosticSL, now that I think about it!) # Testing this out, we're gonna have a "test schedule" like this to try # to imitate the MultiTaskEnvironment in RL. transition_steps = [0] + list(itertools.accumulate(map(len, self.test_datasets)))[:-1] # FIXME: Creating a 'task schedule' for the TestEnvironment, mimicing what's in # the RL settings. test_task_schedule = dict.fromkeys( [step // (env.batch_size or 1) for step in transition_steps], range(len(transition_steps)), ) # TODO: Configure the 'monitoring' dir properly. if wandb.run: test_dir = wandb.run.dir else: test_dir = self.config.log_dir test_loop_max_steps = len(dataset) // (env.batch_size or 1) # TODO: Fix this: iteration doesn't ever end for some reason. test_env = IncrementalSLTestEnvironment( env, directory=test_dir, step_limit=test_loop_max_steps, task_schedule=test_task_schedule, force=True, config=self.config, video_callable=None if (wandb.run or self.config.render) else False, ) if self.test_env: self.test_env.close() self.test_env = test_env return self.test_env def split_batch_function( self, training: bool ) -> Callable[[Tuple[Tensor, ...]], Tuple[Observations, Rewards]]: """Returns a callable that is used to split a batch into observations and rewards.""" assert False, "TODO: Removing this." task_classes = {i: self.task_classes(i, train=training) for i in range(self.nb_tasks)} def split_batch(batch: Tuple[Tensor, ...]) -> Tuple[Observations, Rewards]: """Splits the batch into a tuple of Observations and Rewards. Parameters ---------- batch : Tuple[Tensor, ...] A batch of data coming from the dataset. Returns ------- Tuple[Observations, Rewards] A tuple of Observations and Rewards. """ # In this context (class_incremental), we will always have 3 items per # batch, because we use the ClassIncremental scenario from Continuum. assert len(batch) == 3 x, y, t = batch # Relabel y so it is always in [0, n_classes_per_task) for each task. if self.shared_action_space: y = relabel(y, task_classes) if (training and not self.task_labels_at_train_time) or ( not training and not self.task_labels_at_test_time ): # Remove the task labels if we're not currently allowed to have # them. # TODO: Using None might cause some issues. Maybe set -1 instead? t = None observations = self.Observations(x=x, task_labels=t) rewards = self.Rewards(y=y) return observations, rewards return split_batch def make_train_cl_scenario(self, train_dataset: _ContinuumDataset) -> _BaseScenario: """Creates a train ClassIncremental object from continuum.""" return ClassIncremental( train_dataset, nb_tasks=self.nb_tasks, increment=self.increment, initial_increment=self.initial_increment, class_order=self.class_order, transformations=self.transforms, ) def make_test_cl_scenario(self, test_dataset: _ContinuumDataset) -> _BaseScenario: """Creates a test ClassIncremental object from continuum.""" return ClassIncremental( test_dataset, nb_tasks=self.nb_tasks, increment=self.test_increment, initial_increment=self.test_initial_increment, class_order=self.test_class_order, transformations=self.transforms, ) def make_dataset( self, data_dir: Path, download: bool = True, train: bool = True, **kwargs ) -> _ContinuumDataset: # TODO: #7 Use this method here to fix the errors that happen when # trying to create every single dataset from continuum. data_dir = Path(data_dir) if not data_dir.exists(): data_dir.mkdir(parents=True, exist_ok=True) if self.dataset in self.available_datasets: dataset_class = self.available_datasets[self.dataset] return dataset_class(data_path=data_dir, download=download, train=train, **kwargs) elif self.dataset in self.available_datasets.values(): dataset_class = self.dataset return dataset_class(data_path=data_dir, download=download, train=train, **kwargs) elif isinstance(self.dataset, Dataset): logger.info(f"Using a custom dataset {self.dataset}") return self.dataset else: raise NotImplementedError(self.dataset) # These methods below are used by the MultiHeadModel, mostly when # using a multihead model, to figure out how to relabel the batches, or how # many classes there are in the current task (since we support a different # number of classes per task). # TODO: Remove this? Since I'm simplifying to a fixed number of classes per # task for now... def num_classes_in_task(self, task_id: int, train: bool) -> Union[int, List[int]]: """Returns the number of classes in the given task.""" increment = self.increment if train else self.test_increment if isinstance(increment, list): return increment[task_id] return increment def num_classes_in_current_task(self, train: bool = None) -> int: """Returns the number of classes in the current task.""" # TODO: Its ugly to have the 'method' tell us if we're currently in # train/eval/test, no? Maybe just make a method for each? return self.num_classes_in_task(self._current_task_id, train=train) def task_classes(self, task_id: int, train: bool) -> List[int]: """Gives back the 'true' labels present in the given task.""" start_index = sum(self.num_classes_in_task(i, train) for i in range(task_id)) end_index = start_index + self.num_classes_in_task(task_id, train) if train: return self.class_order[start_index:end_index] # Set the same ordering as during training, by default. self.test_class_order = self.test_class_order or self.class_order return self.test_class_order[start_index:end_index] def current_task_classes(self, train: bool) -> List[int]: """Gives back the labels present in the current task.""" return self.task_classes(self._current_task_id, train) def _check_environments(self): """Do a quick check to make sure that the dataloaders give back the right observations / reward types. """ for loader_method in [ self.train_dataloader, self.val_dataloader, self.test_dataloader, ]: logger.debug(f"Checking loader method {loader_method.__name__}") env = loader_method(batch_size=5) obs = env.reset() assert isinstance(obs, self.Observations) # Convert the observation to numpy arrays, to make it easier to # check if the elements are in the spaces. obs = obs.numpy() # take a slice of the first batch, to get sample tensors. first_obs = obs[:, 0] # TODO: Here we'd like to be able to check that the first observation # is inside the observation space, but we can't do that because the # task label might be None, and so that would make it fail. x, task_label = first_obs if task_label is None: assert x in self.observation_space["x"] for i in range(5): actions = env.action_space.sample() observations, rewards, done, info = env.step(actions) assert isinstance(observations, self.Observations), type(observations) assert isinstance(rewards, self.Rewards), type(rewards) actions = env.action_space.sample() if done: observations = env.reset() env.close() # def relabel(y: Tensor, task_classes: Dict[int, List[int]]) -> Tensor: # """ Relabel the elements of 'y' to their index in the list of classes for # their task. # Example: # >>> import torch # >>> y = torch.as_tensor([2, 3, 2, 3, 2, 2]) # >>> task_classes = {0: [0, 1], 1: [2, 3]} # >>> relabel(y, task_classes) # tensor([0, 1, 0, 1, 0, 0]) # """ # # TODO: Double-check that this never leaves any zeros where it shouldn't. # new_y = torch.zeros_like(y) # # assert unique_y <= set(task_classes), (unique_y, task_classes) # for task_id, task_true_classes in task_classes.items(): # for i, label in enumerate(task_true_classes): # new_y[y == label] = i # return new_y # This is just meant as a cleaner way to import the Observations/Actions/Rewards # than particular setting. Observations = IncrementalSLSetting.Observations Actions = IncrementalSLSetting.Actions Rewards = IncrementalSLSetting.Rewards # TODO: I wouldn't want these above to overwrite / interfere with the import of # the "base" versions of these objects from sequoia.settings.bases.objects, which are # imported in settings/__init__.py. Will have to check that doing # `from .passive import *` over there doesn't actually import these here. if __name__ == "__main__": import doctest doctest.testmod() ================================================ FILE: sequoia/settings/sl/incremental/setting_test.py ================================================ from typing import Any, ClassVar, Dict, Type import pytest from continuum import ClassIncremental from gym import spaces from gym.spaces import Discrete, Space from sequoia.common.config import Config from sequoia.common.metrics import ClassificationMetrics from sequoia.common.spaces import Sparse from sequoia.common.spaces.typed_dict import TypedDictSpace from sequoia.conftest import skip_param, xfail_param, requires_pyglet from sequoia.settings.sl.continual.envs import get_action_space from ..discrete.setting_test import ( TestDiscreteTaskAgnosticSLSetting as DiscreteTaskAgnosticSLSettingTests, ) from .setting import IncrementalSLSetting from .setting import IncrementalSLSetting as ClassIncrementalSetting class TestIncrementalSLSetting(DiscreteTaskAgnosticSLSettingTests): Setting: ClassVar[Type[IncrementalSLSetting]] = IncrementalSLSetting fast_dev_run_kwargs: ClassVar[Dict[str, Any]] = dict( dataset="mnist", batch_size=64, ) def assert_chance_level( self, setting: IncrementalSLSetting, results: IncrementalSLSetting.Results ): assert isinstance(setting, ClassIncrementalSetting), setting assert isinstance(results, ClassIncrementalSetting.Results), results # TODO: Remove this assertion: assert isinstance(setting.action_space, spaces.Discrete) # TODO: This test so far needs the 'N' to be the number of classes in total, # not the number of classes per task. # num_classes = setting.action_space.n # <-- Should be using this instead. if setting._using_custom_envs_foreach_task: num_classes = get_action_space(setting.train_datasets[0]).n else: num_classes = get_action_space(setting.dataset).n average_accuracy = results.objective # Calculate the expected 'average' chance accuracy. # We assume that there is an equal number of classes in each task. # chance_accuracy = 1 / setting.n_classes_per_task chance_accuracy = 1 / num_classes assert 0.5 * chance_accuracy <= average_accuracy <= 1.5 * chance_accuracy for i, metric in enumerate(results.final_performance_metrics): assert isinstance(metric, ClassificationMetrics) # TODO: Same as above: Should be using `n_classes_per_task` or something # like it instead. chance_accuracy = 1 / setting.n_classes_per_task chance_accuracy = 1 / num_classes task_accuracy = metric.accuracy # FIXME: Look into this, we're often getting results substantially # worse than chance, and to 'make the tests pass' (which is bad) # we're setting the lower bound super low, which makes no sense. assert 0.25 * chance_accuracy <= task_accuracy <= 2.1 * chance_accuracy # TODO: Add a fixture that specifies a data folder common to all tests. @pytest.mark.parametrize( "dataset_name", [ "mnist", # "synbols", skip_param("synbols", reason="Causes tests to hang for some reason?"), "cifar10", "cifar100", "fashionmnist", "kmnist", xfail_param("emnist", reason="Bug in emnist, requires split positional arg?"), xfail_param("qmnist", reason="Bug in qmnist, 229421 not in list"), "mnistfellowship", "cifar10", "cifarfellowship", ], ) @pytest.mark.timeout(60) def test_observation_spaces_match_dataset(self, dataset_name: str): """Test to check that the `observation_spaces` and `reward_spaces` dict really correspond to the entries of the corresponding datasets, before we do anything with them. """ # CIFARFellowship, MNISTFellowship, ImageNet100, # ImageNet1000, CIFAR10, CIFAR100, EMNIST, KMNIST, MNIST, # QMNIST, FashionMNIST, dataset_class = self.Setting.available_datasets[dataset_name] dataset = dataset_class("data") observation_space = self.Setting.base_observation_spaces[dataset_name] reward_space = self.Setting.base_reward_spaces[dataset_name] for task_dataset in ClassIncremental(dataset, nb_tasks=1): first_item = task_dataset[0] x, t, y = first_item assert x.shape == observation_space.shape assert x in observation_space, (x.min(), x.max(), observation_space) assert y in reward_space @pytest.mark.parametrize("dataset_name", ["mnist"]) @pytest.mark.parametrize("nb_tasks", [2, 5]) def test_task_label_space(self, dataset_name: str, nb_tasks: int): nb_tasks = 2 setting = ClassIncrementalSetting( dataset=dataset_name, nb_tasks=nb_tasks, ) task_label_space: Space = setting.observation_space.task_labels # TODO: Should the task label space be Sparse[Discrete]? or Discrete? assert task_label_space == Discrete(nb_tasks) @pytest.mark.parametrize("dataset_name", ["mnist"]) def test_setting_obs_space_changes_when_transforms_change(self, dataset_name: str): """TODO: Test that the `observation_space` property on the ClassIncrementalSetting reflects the data produced by the dataloaders, and that changing a transform on a Setting also changes the value of that property on both the Setting itself, as well as on the corresponding dataloaders/environments. """ import torch # dataset = ClassIncrementalSetting.available_datasets[dataset_name] setting = self.Setting( dataset=dataset_name, nb_tasks=1, transforms=[], train_transforms=[], val_transforms=[], test_transforms=[], batch_size=None, num_workers=0, config=Config(device=torch.device("cpu")), ) base_x_space = type(setting).base_observation_spaces[dataset_name] assert setting.observation_space.x == base_x_space # TODO: Should the 'transforms' apply to ALL the environments, and the # train/valid/test transforms apply only to those envs? from sequoia.common.transforms import Transforms from sequoia.common.transforms import Compose transforms = Compose( [ Transforms.to_tensor, Transforms.three_channels, Transforms.channels_first_if_needed, Transforms.resize_32x32, ] ) setting.transforms = transforms expected_x_space = transforms(base_x_space) # Check the the `x` property of the setting's observation space has also been transformed: assert setting.observation_space.x == expected_x_space # When there are no transforms in setting.train_tansforms, the observation # space of the Setting and of the train dataloader are the same: train_env = setting.train_dataloader(batch_size=None, num_workers=None) assert not setting.train_transforms assert train_env.observation_space == setting.observation_space reset_obs = train_env.reset() assert reset_obs["x"] in train_env.observation_space["x"], reset_obs[0].shape assert reset_obs["task_labels"] in train_env.observation_space["task_labels"] assert reset_obs in train_env.observation_space assert reset_obs in setting.observation_space assert isinstance(reset_obs, ClassIncrementalSetting.Observations) # When we add a transform to `setting.train_transforms` the observation # space of the Setting and of the train dataloader are different: # NOTE: Transforms should act as the 'base', and train_transforms gets added to it. setting.train_transforms = [Transforms.resize_64x64] train_env = setting.train_dataloader(batch_size=None) assert train_env.f == setting.transforms + setting.train_transforms assert train_env.observation_space.x.shape == (3, 64, 64) assert train_env.reset() in train_env.observation_space # The Setting's property didn't change: assert setting.observation_space.x.shape == (3, 32, 32) # # ---------- Same tests for the val_environment -------------- # val_env = setting.val_dataloader(batch_size=None) assert val_env.observation_space == setting.observation_space assert val_env.reset() in val_env.observation_space # When we add a transform to `setting.val_transforms` the observation # space of the Setting and of the val dataloader are different: setting.val_transforms = [Transforms.resize_64x64] val_env = setting.val_dataloader(batch_size=None) assert val_env.observation_space != setting.observation_space assert val_env.observation_space.x.shape == (3, 64, 64) assert val_env.reset() in val_env.observation_space # # ---------- Same tests for the test_environment -------------- # with setting.test_dataloader(batch_size=None) as test_env: if setting.task_labels_at_test_time: assert test_env.observation_space == setting.observation_space else: assert isinstance(test_env.observation_space["task_labels"], Sparse) obs = test_env.reset() assert obs in test_env.observation_space setting.test_transforms = [Transforms.resize_64x64] with setting.test_dataloader(batch_size=None) as test_env: # When we add a transform to `setting.test_transforms` the observation # space of the Setting and of the test dataloader are different: assert test_env.observation_space != setting.observation_space assert test_env.observation_space.x.shape == (3, 64, 64) assert test_env.reset() in test_env.observation_space # TODO: This renders, even when we're using the pytest-xvfb plugin, which might # mean that it's actually creating a Display somewhere? @pytest.mark.timeout(30) @requires_pyglet def test_render(config: Config): setting = ClassIncrementalSetting(dataset="mnist", config=config) import matplotlib.pyplot as plt plt.ion() for task_id in range(setting.nb_tasks): setting.current_task_id = task_id env = setting.train_dataloader(batch_size=16, num_workers=0) obs = env.reset() done = False while not done: obs, rewards, done, info = env.step(env.action_space.sample()) env.render("human") # break env.close() def test_class_incremental_random_baseline(): pass ================================================ FILE: sequoia/settings/sl/incremental/unused_batch_transforms.py ================================================ from dataclasses import dataclass, replace from functools import partial from typing import Callable, List, Tuple, Union import gym import torch from gym.wrappers import TransformReward from simple_parsing import list_field from torch import Tensor from sequoia.settings import Observations, Rewards def relabel(y: Tensor, task_classes: List[int]) -> Tensor: new_y = torch.zeros_like(y) for i, label in enumerate(task_classes): new_y[y == label] = i return new_y class RelabelWrapper(TransformReward): def __init__(self, env: gym.Env, task_classes: List[int]): self.task_classes = task_classes super().__init__(env=env, f=partial(relabel, task_classes=self.task_classes)) @dataclass class RelabelTransform(Callable[[Tuple[Tensor, ...]], Tuple[Tensor, ...]]): """Transform that puts labels back into the [0, n_classes_per_task] range. For instance, if it's given a bunch of images that have labels [2, 3, 2] and the `task_classes = [2, 3]`, then the new labels will be `[0, 1, 0]`. Note that the order in `task_classes` is perserved. For instance, in the above example, if `task_classes = [3, 2]`, then the new labels would be `[1, 0, 1]`. IMPORTANT: This transform needs to be applied BEFORE ReorderTensor or SplitBatch, because it expects the batch to be (x, y, t) order """ task_classes: List[int] = list_field() def __call__(self, batch: Tuple[Tensor, ...]): assert isinstance(batch, (list, tuple)), batch if len(batch) == 2: observations, rewards = batch if len(batch) == 1: return batch x, y, *task_labels = batch # if y.max() == len(self.task_classes): # # No need to relabel this batch. # # @lebrice: Can we really skip relabeling in this case? # return batch new_y = relabel(y, task_classes=self.task_classes) return (x, new_y, *task_labels) @dataclass class ReorderTensors(Callable[[Tuple[Tensor, ...]], Tuple[Tensor, ...]]): # reorder tensors in the batch so the task labels go into the observations: # (x, y, t) -> (x, t, y) # TODO: Change this to: # (x, y, t) -> ((x, t), y) maybe? def __call__(self, batch: Tuple[Tensor, ...]): assert isinstance(batch, (list, tuple)) if len(batch) == 2: observations, rewards = batch if isinstance(observations, Observations) and isinstance(rewards, Rewards): return batch elif len(batch) == 3: x, y, *extra_labels = batch if len(extra_labels) == 1: task_labels = extra_labels[0] return (x, task_labels, y) assert False, batch @dataclass class DropTaskLabels(Callable[[Tuple[Tensor, ...]], Tuple[Tensor, ...]]): def __call__(self, batch: Union[Tuple[Tensor, ...], Observations]): assert isinstance(batch, (tuple, list)) if len(batch) == 2: observations, rewards = batch if isinstance(observations, Observations) and isinstance(rewards, Rewards): return replace(observations, task_labels=None), rewards elif len(batch) == 3: # This is tricky. If we're placed BEFORE the 'ReorderTensors', # then the ordering is `x, y, t`, while if we're AFTER, the # ordering would then be 'x, t, y'.. x, v1, v2 = batch # IDEA: For now, we assume that the 'y' is a lot more erratic than # the task label. Therefore, the number of unique consecutive should # be greater for `y` than for `t`. u1 = len(v1.unique_consecutive()) u2 = len(v2.unique_consecutive()) if u1 > u2: y, t = v1, v2 elif u1 == u2: # hmmm wtf? assert False, (v1, v2, u1, u2) else: y, t = v2, v1 return x, y, t assert False, f"There are no task labels to drop: {batch}" ================================================ FILE: sequoia/settings/sl/multi_task/__init__.py ================================================ from .setting import MultiTaskSLSetting Observations = MultiTaskSLSetting.Observations Actions = MultiTaskSLSetting.Actions Rewards = MultiTaskSLSetting.Rewards # TODO? # Environment = MultiTaskSetting.Environment ================================================ FILE: sequoia/settings/sl/multi_task/setting.py ================================================ from dataclasses import dataclass from typing import ClassVar, Type from sequoia.settings.sl.task_incremental import TaskIncrementalSLSetting from sequoia.utils import get_logger # TODO: Playing around with this 'constant_property' idea as an alternative to the # init=False of `constant` field. from sequoia.utils.utils import constant_property from ..task_incremental.setting import TaskIncrementalSLSetting from ..traditional.setting import TraditionalSLSetting logger = get_logger(__name__) @dataclass class MultiTaskSLSetting(TaskIncrementalSLSetting, TraditionalSLSetting): """IID version of the Task-Incremental Setting, where the data is shuffled. Can be used to estimate the upper bound performance of Task-Incremental CL Methods. """ Results: ClassVar[Type[Results]] = TraditionalSLSetting.Results stationary_context: bool = constant_property(True) def __post_init__(self): super().__post_init__() # We reuse the training loop from Incremental, by modifying it so it # discriminates between "phases" and "tasks". @property def phases(self) -> int: return 1 # def _make_train_dataset(self) -> Dataset: # """ Returns the training dataset, which in this case will be shuffled. # IDEA: We could probably do it the same way in both RL and SL: # 1. Create the 'datasets' for all the tasks; # 2. "concatenate"+"Shuffle" the "datasets": # - in SL: ConcatDataset / shuffle the datasets # - in RL: Create a true `MultiTaskEnvironment` that accepts a list of envs as # an input and alternates between environments at each episode. # (either round-robin style, or randomly) # Returns # ------- # Dataset # """ # joined_dataset = concat(self.train_datasets) # return shuffle(joined_dataset, seed=self.config.seed) # def _make_val_dataset(self) -> Dataset: # joined_dataset = concat(self.val_datasets) # return shuffle(joined_dataset, seed=self.config.seed) # def _make_test_dataset(self) -> Dataset: # return concat(self.test_datasets) # def train_dataloader( # self, batch_size: int = None, num_workers: int = None # ) -> PassiveEnvironment: # """Returns a DataLoader for the training dataset. # This dataloader will yield batches which will very likely contain data from # multiple different tasks, and will contain task labels. # Parameters # ---------- # batch_size : int, optional # Batch size to use. Defaults to None, in which case the value of # `self.batch_size` is used. # num_workers : int, optional # Number of workers to use. Defaults to None, in which case the value of # `self.num_workers` is used. # Returns # ------- # PassiveEnvironment # A "Passive" Dataloader/gym.Env. # """ # return super().train_dataloader(batch_size=batch_size, num_workers=num_workers) # def val_dataloader( # self, batch_size: int = None, num_workers: int = None # ) -> PassiveEnvironment: # """Returns a DataLoader for the validation dataset. # This dataloader will yield batches which will very likely contain data from # multiple different tasks, and will contain task labels. # Parameters # ---------- # batch_size : int, optional # Batch size to use. Defaults to None, in which case the value of # `self.batch_size` is used. # num_workers : int, optional # Number of workers to use. Defaults to None, in which case the value of # `self.num_workers` is used. # Returns # ------- # PassiveEnvironment # A "Passive" Dataloader/gym.Env. # """ # return super().val_dataloader(batch_size=batch_size, num_workers=num_workers) # def test_dataloader( # self, batch_size: int = None, num_workers: int = None # ) -> PassiveEnvironment: # """Returns a DataLoader for the test dataset. # This dataloader will yield batches which will very likely contain data from # multiple different tasks, and will contain task labels. # Unlike the train and validation environments, the test environment will not # yield rewards until the action has been sent to it using either `send` (when # iterating in the DataLoader-style) or `step` (when interacting with the # environment in the gym.Env style). For more info, take a look at the # `PassiveEnvironment` class. # Parameters # ---------- # batch_size : int, optional # Batch size to use. Defaults to None, in which case the value of # `self.batch_size` is used. # num_workers : int, optional # Number of workers to use. Defaults to None, in which case the value of # `self.num_workers` is used. # Returns # ------- # PassiveEnvironment # A "Passive" Dataloader/gym.Env. # """ # return super().test_dataloader(batch_size=batch_size, num_workers=num_workers) # def test_loop(self, method: Method) -> "IncrementalAssumption.Results": # """ Runs a multi-task test loop and returns the Results. # """ # return super().test_loop(method) # # TODO: # test_env = self.test_dataloader() # try: # # If the Method has `test` defined, use it. # method.test(test_env) # test_env.close() # # Get the metrics from the test environment # test_results: Results = test_env.get_results() # print(f"Test results: {test_results}") # return test_results # except NotImplementedError: # logger.info( # f"Will query the method for actions at each step, " # f"since it doesn't implement a `test` method." # ) # obs = test_env.reset() # # TODO: Do we always have a maximum number of steps? or of episodes? # # Will it work the same for Supervised and Reinforcement learning? # max_steps: int = getattr(test_env, "step_limit", None) # # Reset on the last step is causing trouble, since the env is closed. # pbar = tqdm.tqdm(itertools.count(), total=max_steps, desc="Test") # episode = 0 # for step in pbar: # if test_env.is_closed(): # logger.debug(f"Env is closed") # break # # logger.debug(f"At step {step}") # action = method.get_actions(obs, test_env.action_space) # # logger.debug(f"action: {action}") # # TODO: Remove this: # if isinstance(action, Actions): # action = action.y_pred # if isinstance(action, Tensor): # action = action.cpu().numpy() # obs, reward, done, info = test_env.step(action) # if done and not test_env.is_closed(): # # logger.debug(f"end of test episode {episode}") # obs = test_env.reset() # episode += 1 # test_env.close() # test_results = test_env.get_results() # return test_results ================================================ FILE: sequoia/settings/sl/multi_task/setting_test.py ================================================ """ TODO: Tests for the multi-task SL setting. - Has only one train/test 'phase' - The nb_tasks attribute should still reflect the number of tasks. - on_task_switch should never be called during training - (not so sure during testing) - Task labels should be available for both training and testing. - Classes shouldn't be relabeled. """ import dataclasses import itertools import numpy as np import pytest import torch from gym.spaces import Discrete from sequoia.common.spaces import Image, TypedDictSpace from sequoia.settings import Actions, Environment from .setting import MultiTaskSLSetting def check_is_multitask_env(env: Environment, has_rewards: bool): # dataloader-style: for i, (observations, rewards) in itertools.islice(enumerate(env), 10): assert isinstance(observations, MultiTaskSLSetting.Observations) task_labels = observations.task_labels.cpu().tolist() assert len(set(task_labels)) > 1 if has_rewards: assert isinstance(rewards, MultiTaskSLSetting.Rewards) # Check that there is no relabelling happening, by checking that there are # more different y's then there are usually classes in each batch. assert len(set(rewards.y.cpu().tolist())) > 2 else: assert rewards is None # gym-style interaction: obs = env.reset() assert isinstance(env.observation_space, TypedDictSpace) space_shapes = {k: s.shape for k, s in env.observation_space.spaces.items()} space_dtypes = {k: s.dtype for k, s in env.observation_space.spaces.items()} # assert False, (obs.keys(), obs.numpy().keys()) assert obs.shapes == space_shapes assert obs.numpy().shapes == space_shapes assert obs.dtypes == space_dtypes x_space = env.observation_space.x t_space = env.observation_space.task_labels assert obs.x in x_space, (obs.x, x_space) assert obs.task_labels in t_space, (obs.task_labels, t_space) assert isinstance(obs, env.observation_space.dtype) assert obs in env.observation_space done = False steps = 0 while not done and steps < 10: action = Actions(y_pred=torch.randint(10, [env.batch_size])) # BUG: convert_tensors seems to be causing issues again: We shouldn't have # to manually convert obs to numpy before checking `obs in obs_space`. # TODO: Also not super clean that we can't just do `action in action_space`. # assert action.numpy() in env.action_space assert action.y_pred.numpy() in env.action_space obs, reward, done, info = env.step(action) assert obs.numpy() in env.observation_space assert reward.y in env.reward_space steps += 1 assert done is False assert steps == 10 from sequoia.common.config import Config def test_multitask_setting(config: Config): config = dataclasses.replace(config, device=torch.device("cpu")) setting = MultiTaskSLSetting(dataset="mnist", config=config) assert setting.phases == 1 assert setting.nb_tasks == 5 from sequoia.common.spaces.image import ImageTensorSpace from sequoia.common.spaces.tensor_spaces import TensorDiscrete assert setting.observation_space == TypedDictSpace( x=ImageTensorSpace(0.0, 1.0, (3, 28, 28), np.float32, device=config.device), task_labels=TensorDiscrete(5, device=config.device), dtype=setting.Observations, ) assert setting.action_space == Discrete(10) # assert setting.config.device.type == "cuda" if torch.cuda.is_available() else "cpu" with setting.train_dataloader(batch_size=32, num_workers=0) as train_env: check_is_multitask_env(train_env, has_rewards=True) with setting.val_dataloader(batch_size=32, num_workers=0) as val_env: check_is_multitask_env(val_env, has_rewards=True) @pytest.mark.xfail(reason="test environments still operate in a 'sequential tasks' way") def test_multitask_setting_test_env(): setting = MultiTaskSLSetting(dataset="mnist") assert setting.phases == 1 assert setting.nb_tasks == 5 assert setting.observation_space == TypedDictSpace( x=Image(0.0, 1.0, (3, 28, 28), np.float32), task_labels=Discrete(5) ) assert setting.action_space == Discrete(10) # FIXME: Wait, actually, this test environment, will it be shuffled, or not? with setting.test_dataloader(batch_size=32, num_workers=0) as test_env: check_is_multitask_env(test_env, has_rewards=False) from sequoia.settings.assumptions.incremental_test import DummyMethod def test_on_task_switch_is_called_multi_task(): setting = MultiTaskSLSetting( dataset="mnist", nb_tasks=5, # train_steps_per_task=100, # max_steps=500, # test_steps_per_task=100, train_transforms=[], test_transforms=[], val_transforms=[], ) method = DummyMethod() results = setting.apply(method) assert method.n_task_switches == setting.nb_tasks assert method.received_task_ids == list(range(setting.nb_tasks)) assert method.received_while_training == [False for _ in range(setting.nb_tasks)] ================================================ FILE: sequoia/settings/sl/setting.py ================================================ from dataclasses import dataclass from typing import ClassVar, Dict, List, Type, TypeVar from pytorch_lightning import LightningDataModule from simple_parsing import choice, list_field from torch import Tensor from sequoia.common.transforms import Transforms from sequoia.settings import Setting from sequoia.settings.base.environment import ActionType, ObservationType, RewardType from .environment import PassiveEnvironment @dataclass class SLSetting(Setting[PassiveEnvironment[ObservationType, ActionType, RewardType]]): """Supervised Learning Setting. Core assuptions: - Current actions have no influence on future observations. - The environment gives back "dense feedback", (the 'reward' associated with all possible actions at each step, rather than a single action) For example, supervised learning is a Passive setting, since predicting a label has no effect on the reward you're given (the label) or on the next samples you observe. """ @dataclass(frozen=True) class Observations(Setting.Observations): x: Tensor @dataclass(frozen=True) class Actions(Setting.Actions): pass @dataclass(frozen=True) class Rewards(Setting.Rewards): pass Environment: ClassVar[Type[PassiveEnvironment]] = PassiveEnvironment # TODO: rename/remove this, as it isn't used, and there could be some # confusion with the available_datasets in task-incremental and iid. # Also, since those are already LightningDataModules, what should we do? available_datasets: ClassVar[Dict[str, Type[LightningDataModule]]] = { # "mnist": MNISTDataModule, # "fashion_mnist": FashionMNISTDataModule, # "cifar10": CIFAR10DataModule, # "imagenet": ImagenetDataModule, } # Which setup / dataset to use. # The setups/dataset are implemented as `LightningDataModule`s. dataset: str = choice(available_datasets.keys(), default="mnist") # Transforms to be applied to the observatons of the train/valid/test # environments. transforms: List[Transforms] = list_field() # Transforms to be applied to the training datasets. train_transforms: List[Transforms] = list_field(Transforms.to_tensor, Transforms.three_channels) # Transforms to be applied to the validation datasets. val_transforms: List[Transforms] = list_field(Transforms.to_tensor, Transforms.three_channels) # Transforms to be applied to the testing datasets. test_transforms: List[Transforms] = list_field(Transforms.to_tensor, Transforms.three_channels) # Wether to drop the last batch (during training). Useful if you use batchnorm, to # avoid having an error when the batch_size is 1. drop_last: bool = False SettingType = TypeVar("SettingType", bound=SLSetting) ================================================ FILE: sequoia/settings/sl/task_incremental/__init__.py ================================================ """ Task Incremental Setting Adds the additional assumption that the task labels are available at test time. """ # 1. Import stuff from the Parent # NOTE: Here there doesn't seem to be a need for a custom 'Results' class for # TaskIncremental, given how similar it is to ClassIncremental. # 2. Import what we overwrite/customize from .setting import TaskIncrementalSLSetting ================================================ FILE: sequoia/settings/sl/task_incremental/setting.py ================================================ """ Defines the Task-Incremental CL Setting. Task-Incremental CL is a variant of the ClassIncrementalSetting with task labels available at both train and test time. """ from dataclasses import dataclass from typing import ClassVar, Type, TypeVar from sequoia.settings.assumptions.task_incremental import TaskIncrementalAssumption from sequoia.settings.sl.incremental import IncrementalSLResults as TaskIncrementalSLResults from sequoia.settings.sl.incremental import IncrementalSLSetting from sequoia.utils.utils import constant @dataclass class TaskIncrementalSLSetting(TaskIncrementalAssumption, IncrementalSLSetting): """Setting where data arrives in a series of Tasks, and where the task labels are always available (both train and test time). """ Results: ClassVar[Type[Results]] = TaskIncrementalSLResults # Wether task labels are available at train time. (Forced to True.) task_labels_at_train_time: bool = constant(True) # Wether task labels are available at test time. # TODO: Is this really always True for all Task-Incremental Settings? task_labels_at_test_time: bool = constant(True) SettingType = TypeVar("SettingType", bound=TaskIncrementalSLSetting) ================================================ FILE: sequoia/settings/sl/task_incremental/setting_test.py ================================================ import itertools import math from typing import * import pytest from sequoia.common.config import Config from sequoia.settings.assumptions.incremental_test import OtherDummyMethod from sequoia.utils.logging_utils import get_logger from ..incremental.setting_test import TestIncrementalSLSetting as IncrementalSLSettingTests from .setting import TaskIncrementalSLSetting logger = get_logger(__name__) class TestTaskIncrementalSLSetting(IncrementalSLSettingTests): Setting: ClassVar[Type[Setting]] = TaskIncrementalSLSetting fast_dev_run_kwargs: ClassVar[Dict[str, Any]] = dict( dataset="mnist", batch_size=64, ) def check_only_right_classes_present(setting: TaskIncrementalSLSetting): """Checks that only the classes within each task are present. TODO: This should be refactored to be based more on the reward space. """ assert setting.task_labels_at_test_time and setting.task_labels_at_test_time for i in range(setting.nb_tasks): setting.current_task_id = i batch_size = 5 train_loader = setting.train_dataloader(batch_size=batch_size) # get the classes in the current task: task_classes = setting.task_classes(i, train=True) for j, (observations, rewards) in enumerate(itertools.islice(train_loader, 100)): x = observations.x t = observations.task_labels if setting.task_labels_at_train_time: assert t is not None y = rewards.y print(i, j, y, t) y_in_task_classes = [y_i in task_classes for y_i in y.tolist()] assert all(y_in_task_classes) assert x.shape == (batch_size, 3, 28, 28) x = x.permute(0, 2, 3, 1)[0] assert x.shape == (28, 28, 3) reward = train_loader.send([4 for _ in range(batch_size)]) if rewards is not None: # IF we send somethign to the env, then it should give back the same # labels as for the last batch. assert (reward.y == rewards.y).all() train_loader.close() valid_loader = setting.val_dataloader(batch_size=batch_size) for j, (observations, rewards) in enumerate(itertools.islice(valid_loader, 100)): x = observations.x t = observations.task_labels if setting.monitor_training_performance: assert rewards is None if setting.task_labels_at_train_time: assert t is not None y = rewards.y print(i, j, y, t) y_in_task_classes = [y_i in task_classes for y_i in y.tolist()] assert all(y_in_task_classes) assert x.shape == (batch_size, 3, 28, 28) x = x.permute(0, 2, 3, 1)[0] assert x.shape == (28, 28, 3) reward = valid_loader.send(valid_loader.action_space.sample()) if rewards is not None: # IF we send somethign to the env, then it should give back the same # labels as for the last batch. assert (reward.y == rewards.y).all() valid_loader.close() # FIXME: get the classes in the current task, at test-time. task_classes = list(range(setting.reward_space.n)) test_loader = setting.test_dataloader(batch_size=batch_size) assert not test_loader.unwrapped._hide_task_labels for j, (observations, rewards) in enumerate(itertools.islice(test_loader, 100)): x = observations.x t = observations.task_labels if setting.task_labels_at_test_time: assert t is not None if rewards is None: rewards = test_loader.send(test_loader.action_space.sample()) assert rewards is not None assert rewards.y is not None y = rewards.y print(i, j, y, t) y_in_task_classes = [y_i in task_classes for y_i in y.tolist()] assert all(y_in_task_classes) assert x.shape == (batch_size, 3, 28, 28) x = x.permute(0, 2, 3, 1)[0] assert x.shape == (28, 28, 3) test_loader.close() def test_task_incremental_mnist_setup(): setting = TaskIncrementalSLSetting( dataset="mnist", increment=2, # BUG: When num_workers > 0, some of the tests hang, but only when running *all* the tests! # num_workers=0, ) assert setting.task_labels_at_test_time and setting.task_labels_at_train_time setting.prepare_data(data_dir="data") setting.setup() check_only_right_classes_present(setting) @pytest.mark.xfail( reason=( "TODO: Continuum actually re-labels the images to 0-10, regardless of the " "class order. The actual images are ok though." ) ) def test_task_incremental_mnist_setup_reversed_class_order(): setting = TaskIncrementalSLSetting( dataset="mnist", nb_tasks=5, class_order=list(reversed(range(10))), # num_workers=0, ) assert setting.task_labels_at_train_time and setting.task_labels_at_test_time assert ( setting.known_task_boundaries_at_train_time and setting.known_task_boundaries_at_test_time ) setting.prepare_data(data_dir="data") setting.setup() check_only_right_classes_present(setting) def test_class_incremental_mnist_setup_with_nb_tasks(): setting = TaskIncrementalSLSetting( dataset="mnist", nb_tasks=2, num_workers=0, ) assert setting.increment == 5 setting.prepare_data(data_dir="data") setting.setup() assert len(setting.train_datasets) == 2 assert len(setting.val_datasets) == 2 assert len(setting.test_datasets) == 2 check_only_right_classes_present(setting) def test_action_space_always_matches_obs_batch_size(config: Config): """Make sure that the batch size in the observations always matches the action space provided to the `get_actions` method. ALSO: - Make sure that we get asked for actions for all the observations in the test set, even when there is a shorter last batch. - The total number of observations match the dataset size. """ nb_tasks = 5 # TODO: The `drop_last` argument seems to not be used correctly by the dataloaders / test loop. batch_size = 128 # HUH why are we doing this here? setting = TaskIncrementalSLSetting( dataset="mnist", nb_tasks=nb_tasks, batch_size=batch_size, num_workers=4, monitor_training_performance=True, drop_last=False, ) # 10_000 examples in the test dataset of mnist. total_samples = len(setting.test_dataloader().dataset) method = OtherDummyMethod() _ = setting.apply(method, config=config) # Multiply by nb_tasks because the test loop is ran after each training task. assert sum(method.batch_sizes) == total_samples * nb_tasks assert len(method.batch_sizes) == math.ceil(total_samples / batch_size) * nb_tasks if total_samples % batch_size == 0: assert set(method.batch_sizes) == {batch_size} else: assert set(method.batch_sizes) == {batch_size, total_samples % batch_size} ================================================ FILE: sequoia/settings/sl/traditional/__init__.py ================================================ # 1. Import stuff from the Parent # 2. Import what we overwrite/customize from .results import IIDResults from .setting import TraditionalSLSetting ================================================ FILE: sequoia/settings/sl/traditional/results.py ================================================ """Defines the Results of apply a Method to an IID Setting. """ from pathlib import Path from typing import Dict, Union import matplotlib.pyplot as plt from sequoia.settings.sl.incremental.results import IncrementalSLResults class IIDResults(IncrementalSLResults): """Results of applying a Method on an IID Setting. # TODO: Refactor this to be based on `TaskResults`? """ def save_to_dir(self, save_dir: Union[str, Path]) -> None: # TODO: Add wandb logging here somehow. save_dir = Path(save_dir) save_dir.mkdir(exist_ok=True, parents=True) plots: Dict[str, plt.Figure] = self.make_plots() # Save the actual 'results' object to a file in the save dir. results_json_path = save_dir / "results.json" self.save(results_json_path) print(f"Saved a copy of the results to {results_json_path}") print(f"\nPlots: {plots}\n") for fig_name, figure in plots.items(): print(f"fig_name: {fig_name}") # figure.show() # plt.waitforbuttonpress(10) path = (save_dir / fig_name).with_suffix(".jpg") path.parent.mkdir(exist_ok=True, parents=True) figure.savefig(path) print(f"Saved figure at path {path}") def make_plots(self) -> Dict[str, plt.Figure]: plots_dict = super().make_plots() # TODO: Could add a Confusion Matrix plot? plots_dict.update({"class_accuracies": self.class_accuracies_plot()}) return plots_dict def class_accuracies_plot(self): figure: plt.Figure axes: plt.Axes figure, axes = plt.subplots() y = self[0][0].average_metrics.class_accuracy x = list(range(len(y))) rects = axes.bar(x, y) axes.set_title("Class Accuracy") axes.set_xlabel("Class") axes.set_ylabel("Accuracy") axes.set_ylim(0, 1.0) # autolabel(axes, rects) return figure # def summary(self) -> str: # s = StringIO() # with redirect_stdout(s): # print(f"Average Accuracy: {self.average_metrics.accuracy:.2%}") # for i, class_acc in enumerate(self.average_metrics.class_accuracy): # print(f"Accuracy for class {i}: {class_acc:.3%}") # s.seek(0) # return s.read() def to_log_dict(self, verbose: bool = False) -> Dict[str, float]: results = super().to_log_dict(verbose=verbose) # Remove the useless 2-levels of nesting from the log_dict results.update(results.pop("Task 0").pop("Task 0")) # assert False, json.dumps(results, indent="\t") return results ================================================ FILE: sequoia/settings/sl/traditional/setting.py ================================================ """ Defines the TraditionalSLSetting, as a variant of the TaskIncremental setting with only one task. """ from dataclasses import dataclass from typing import ClassVar, List, Optional, Type, TypeVar, Union from sequoia.utils.utils import constant # TODO: Re-arrange the 'multiple-inheritance' with domain-incremental and # task-incremental, this might not be 100% accurate, as the "IID" you get from # moving down from domain-incremental (+ only one task) might not be exactly the same as # the one you get form TaskIncremental (+ only one task) from ..incremental import IncrementalSLSetting from .results import IIDResults # TODO: IDEA: Add the pytorch lightning datamodules in the list of # 'available datasets' for the IID setting, and make sure that it doesn't mess # up the methods in the parents (train/val loop, dataloader construction, etc.) # IDEA: Maybe overwrite the 'train/val/test_dataloader' methods on the setting # and when the chosen dataset is a LightnignDataModule, then just return the # result from the corresponding method on the LightningDataModule, rather than # from super(). # from pl_bolts.datamodules import (CIFAR10DataModule, FashionMNISTDataModule, # ImagenetDataModule, MNISTDataModule) @dataclass class TraditionalSLSetting(IncrementalSLSetting): """Your 'usual' supervised learning Setting, where the samples are i.i.d. This Setting is slightly different than the others, in that it can be recovered in *two* different ways: - As a variant of Task-Incremental learning, but where there is only one task; - As a variant of Domain-Incremental learning, but where there is only one task. """ Results: ClassVar[Type[Results]] = IIDResults # Number of tasks. nb_tasks: int = 5 stationary_context: bool = constant(True) # increment: Union[int, List[int]] = constant(None) # A different task size applied only for the first task. # Desactivated if `increment` is a list. initial_increment: int = constant(None) # An optional custom class order, used for NC. class_order: Optional[List[int]] = constant(None) # Either number of classes per task, or a list specifying for # every task the amount of new classes (defaults to the value of # `increment`). test_increment: Optional[Union[List[int], int]] = constant(None) # A different task size applied only for the first test task. # Desactivated if `test_increment` is a list. Defaults to the # value of `initial_increment`. test_initial_increment: Optional[int] = constant(None) # An optional custom class order for testing, used for NC. # Defaults to the value of `class_order`. test_class_order: Optional[List[int]] = constant(None) @property def phases(self) -> int: """The number of training 'phases', i.e. how many times `method.fit` will be called. Defaults to the number of tasks, but may be different, for instance in so-called Multi-Task Settings, this is set to 1. """ return 1 if self.stationary_context else self.nb_tasks SettingType = TypeVar("SettingType", bound=TraditionalSLSetting) if __name__ == "__main__": TraditionalSLSetting.main() ================================================ FILE: sequoia/settings/sl/traditional/setting_test.py ================================================ import pytest from sequoia.methods import Method from sequoia.settings import ( ClassIncrementalSetting, DomainIncrementalSLSetting, TaskIncrementalSLSetting, ) from ..continual.setting import ContinualSLSetting from ..discrete.setting import DiscreteTaskAgnosticSLSetting from ..incremental.setting import IncrementalSLSetting from ..multi_task.setting import MultiTaskSLSetting from .setting import TraditionalSLSetting class ContinualSLMethod(Method, target_setting=ContinualSLSetting): pass class DiscreteTaskAgnosticSLMethod(Method, target_setting=DiscreteTaskAgnosticSLSetting): pass class IncrementalSLMethod(Method, target_setting=IncrementalSLSetting): pass class ClassIncrementalSLMethod(Method, target_setting=ClassIncrementalSetting): pass class DomainIncrementalSLMethod(Method, target_setting=DomainIncrementalSLSetting): pass class TaskIncrementalSLMethod(Method, target_setting=TaskIncrementalSLSetting): pass class TraditionalSLMethod(Method, target_setting=TraditionalSLSetting): pass class MultiTaskSLMethod(Method, target_setting=MultiTaskSLSetting): pass def test_methods_applicable_to_iid_setting(): """Test to make sure that Methods that are applicable to the Domain-Incremental are applicable to the IID Setting, same for those targetting the Task-Incremental setting. """ assert ContinualSLMethod.is_applicable(ContinualSLSetting) assert ContinualSLMethod.is_applicable(DiscreteTaskAgnosticSLSetting) assert ContinualSLMethod.is_applicable(IncrementalSLSetting) assert ContinualSLMethod.is_applicable(ClassIncrementalSetting) assert ContinualSLMethod.is_applicable(TaskIncrementalSLSetting) assert ContinualSLMethod.is_applicable(DomainIncrementalSLSetting) assert ContinualSLMethod.is_applicable(TraditionalSLSetting) assert ContinualSLMethod.is_applicable(MultiTaskSLSetting) assert not DiscreteTaskAgnosticSLMethod.is_applicable(ContinualSLSetting) assert DiscreteTaskAgnosticSLMethod.is_applicable(DiscreteTaskAgnosticSLSetting) assert DiscreteTaskAgnosticSLMethod.is_applicable(IncrementalSLSetting) assert DiscreteTaskAgnosticSLMethod.is_applicable(ClassIncrementalSetting) assert DiscreteTaskAgnosticSLMethod.is_applicable(TaskIncrementalSLSetting) assert DiscreteTaskAgnosticSLMethod.is_applicable(DomainIncrementalSLSetting) assert DiscreteTaskAgnosticSLMethod.is_applicable(TraditionalSLSetting) assert DiscreteTaskAgnosticSLMethod.is_applicable(MultiTaskSLSetting) assert not IncrementalSLMethod.is_applicable(ContinualSLSetting) assert not IncrementalSLMethod.is_applicable(DiscreteTaskAgnosticSLSetting) assert IncrementalSLMethod.is_applicable(IncrementalSLSetting) assert IncrementalSLMethod.is_applicable(ClassIncrementalSetting) assert IncrementalSLMethod.is_applicable(TaskIncrementalSLSetting) assert IncrementalSLMethod.is_applicable(DomainIncrementalSLSetting) assert IncrementalSLMethod.is_applicable(TraditionalSLSetting) assert IncrementalSLMethod.is_applicable(MultiTaskSLSetting) assert not ClassIncrementalSLMethod.is_applicable(ContinualSLSetting) assert not ClassIncrementalSLMethod.is_applicable(DiscreteTaskAgnosticSLSetting) assert ClassIncrementalSLMethod.is_applicable(IncrementalSLSetting) assert ClassIncrementalSLMethod.is_applicable(ClassIncrementalSetting) assert ClassIncrementalSLMethod.is_applicable(TaskIncrementalSLSetting) assert ClassIncrementalSLMethod.is_applicable(DomainIncrementalSLSetting) assert ClassIncrementalSLMethod.is_applicable(TraditionalSLSetting) assert ClassIncrementalSLMethod.is_applicable(MultiTaskSLSetting) assert not TaskIncrementalSLMethod.is_applicable(ContinualSLSetting) assert not TaskIncrementalSLMethod.is_applicable(DiscreteTaskAgnosticSLSetting) assert not TaskIncrementalSLMethod.is_applicable(IncrementalSLSetting) assert not TaskIncrementalSLMethod.is_applicable(ClassIncrementalSetting) assert TaskIncrementalSLMethod.is_applicable(TaskIncrementalSLSetting) assert not TaskIncrementalSLMethod.is_applicable(DomainIncrementalSLSetting) assert not TaskIncrementalSLMethod.is_applicable(TraditionalSLSetting) assert TaskIncrementalSLMethod.is_applicable(MultiTaskSLSetting) assert not DomainIncrementalSLMethod.is_applicable(ContinualSLSetting) assert not DomainIncrementalSLMethod.is_applicable(DiscreteTaskAgnosticSLSetting) assert not DomainIncrementalSLMethod.is_applicable(IncrementalSLSetting) assert not DomainIncrementalSLMethod.is_applicable(ClassIncrementalSetting) assert not DomainIncrementalSLMethod.is_applicable(TaskIncrementalSLSetting) assert DomainIncrementalSLMethod.is_applicable(DomainIncrementalSLSetting) assert not DomainIncrementalSLMethod.is_applicable(TraditionalSLSetting) # TODO: What about this one? # assert DomainIncrementalSLMethod.is_applicable(MultiTaskSLSetting) assert not TraditionalSLMethod.is_applicable(ContinualSLSetting) assert not TraditionalSLMethod.is_applicable(DiscreteTaskAgnosticSLSetting) assert not TraditionalSLMethod.is_applicable(IncrementalSLSetting) assert not TraditionalSLMethod.is_applicable(TaskIncrementalSLSetting) assert not TraditionalSLMethod.is_applicable(DomainIncrementalSLSetting) assert not TraditionalSLMethod.is_applicable(ClassIncrementalSetting) assert TraditionalSLMethod.is_applicable(TraditionalSLSetting) assert TraditionalSLMethod.is_applicable(MultiTaskSLSetting) assert not MultiTaskSLMethod.is_applicable(ContinualSLSetting) assert not MultiTaskSLMethod.is_applicable(DiscreteTaskAgnosticSLSetting) assert not MultiTaskSLMethod.is_applicable(IncrementalSLSetting) assert not MultiTaskSLMethod.is_applicable(TaskIncrementalSLSetting) assert not MultiTaskSLMethod.is_applicable(DomainIncrementalSLSetting) assert not MultiTaskSLMethod.is_applicable(ClassIncrementalSetting) assert not MultiTaskSLMethod.is_applicable(TraditionalSLSetting) assert MultiTaskSLMethod.is_applicable(MultiTaskSLSetting) def test_get_parents(): # TODO: THis is a bit funky, now that Class-Incremental is a "pointer" to # Incremental, and Traditional has been moved under TaskIncremental assert TraditionalSLSetting in IncrementalSLSetting.get_children() assert TraditionalSLSetting not in TaskIncrementalSLSetting.get_children() assert TraditionalSLSetting in IncrementalSLSetting.immediate_children() assert TaskIncrementalSLSetting not in TraditionalSLSetting.parents() assert ClassIncrementalSetting in TaskIncrementalSLSetting.immediate_parents() assert TaskIncrementalSLSetting not in TraditionalSLSetting.get_parents() assert ClassIncrementalSetting in TraditionalSLSetting.get_parents() assert TraditionalSLSetting not in TraditionalSLSetting.get_parents() @pytest.mark.xfail(reason="Temporarily removing the domain-incremental<--traditional link.") def test_get_parents_domain_incremental(): assert TraditionalSLSetting in DomainIncrementalSLSetting.get_children() assert DomainIncrementalSLSetting in TraditionalSLSetting.get_immediate_parents() @pytest.mark.xfail(reason="Temporarily removing the domain-incremental<--traditional link.") def test_method_applicability_domain_incremental(): assert not DomainIncrementalSLMethod.is_applicable(ClassIncrementalSetting) assert not DomainIncrementalSLMethod.is_applicable(TaskIncrementalSLSetting) assert DomainIncrementalSLMethod.is_applicable(DomainIncrementalSLSetting) assert DomainIncrementalSLMethod.is_applicable(TraditionalSLSetting) @pytest.mark.xfail(reason="Temporarily removing the domain-incremental<--traditional link.") def test_get_parents_domain_incremental(): assert DomainIncrementalSLSetting in TraditionalSLSetting.get_parents() ================================================ FILE: sequoia/settings/sl/wrappers/__init__.py ================================================ """ Module defining gym wrappers that are specific to SL Environments. """ from .measure_performance import MeasureSLPerformanceWrapper ================================================ FILE: sequoia/settings/sl/wrappers/measure_performance.py ================================================ """ TODO: Create a Wrapper that measures performance over the first epoch of training in SL. Then maybe after we can make something more general that also works for RL. """ import warnings from collections import defaultdict """ Wrapper that gets applied onto the environment in order to measure the online training performance. TODO: Move this somewhere more appropriate. There's also the RL version of the wrapper here. """ from typing import Dict, Iterator, Optional, Tuple import numpy as np from gym.utils import colorize from torch import Tensor import wandb from sequoia.common.gym_wrappers.measure_performance import MeasurePerformanceWrapper from sequoia.common.metrics import ClassificationMetrics, Metrics from sequoia.settings.base import Actions, Observations, Rewards from sequoia.settings.sl.environment import PassiveEnvironment from sequoia.utils.utils import add_prefix class MeasureSLPerformanceWrapper( MeasurePerformanceWrapper, # MeasurePerformanceWrapper[PassiveEnvironment] # Python 3.7 # MeasurePerformanceWrapper[PassiveEnvironment, ClassificationMetrics] # Python 3.8+ ): def __init__( self, env: PassiveEnvironment, first_epoch_only: bool = False, wandb_prefix: str = None, ): super().__init__(env) # Metrics mapping from step to the metrics at that step. self._metrics: Dict[int, ClassificationMetrics] = defaultdict(Metrics) self.first_epoch_only = first_epoch_only self.wandb_prefix = wandb_prefix # Counter for the number of steps. self._steps: int = 0 assert isinstance(self.env.unwrapped, PassiveEnvironment) if not self.env.unwrapped.pretend_to_be_active: warnings.warn( RuntimeWarning( colorize( "Your online performance " + ("during the first epoch " if self.first_epoch_only else "") + "on this environment will be monitored! " "Since this env is Passive, i.e. a Supervised Learning " "DataLoader, the Rewards (y) will be withheld until " "actions are passed to the 'send' method. Make sure that " "your training loop can handle this small tweak.", color="yellow", ) ) ) self.env.unwrapped.pretend_to_be_active = True self.__epochs = 0 def reset(self) -> Observations: return self.env.reset() @property def in_evaluation_period(self) -> bool: if self.first_epoch_only: # TODO: Double-check the iteraction of IterableDataset and __len__ return self.__epochs == 0 return True def step(self, action: Actions): observation, reward, done, info = self.env.step(action) # TODO: Make this wrapper task-aware, using the task ids in this `observation`? if self.in_evaluation_period: # TODO: Edge case, but we also need the prediction for the last batch to be # counted. self._metrics[self._steps] += self.get_metrics(action, reward) elif self.first_epoch_only: # If we are at the last batch in the first epoch, we still keep the metrics # for that batch, even though we're technically not in the first epoch # anymore. # TODO: CHeck the length through the dataset? or through a more 'clean' way # e.g. through the `max_steps` property of a TimeLimit wrapper or something? num_batches = len(self.unwrapped.dataset) // self.batch_size if not self.unwrapped.drop_last: num_batches += 1 if len(self.unwrapped.dataset) % self.batch_size else 0 # currently_at_last_batch = self._steps == num_batches - 1 currently_at_last_batch = self._steps == num_batches - 1 if self.__epochs == 1 and currently_at_last_batch: self._metrics[self._steps] += self.get_metrics(action, reward) self._steps += 1 return observation, reward, done, info def send(self, action: Actions): if not isinstance(action, Actions): assert isinstance(action, (np.ndarray, Tensor)) action = Actions(action) reward = self.env.send(action) if self.in_evaluation_period: # TODO: Edge case, but we also need the prediction for the last batch to be # counted. self._metrics[self._steps] += self.get_metrics(action, reward) elif self.first_epoch_only: # If we are at the last batch in the first epoch, we still keep the metrics # for that batch, even though we're technically not in the first epoch # anymore. # TODO: CHeck the length through the dataset? or through a more 'clean' way # e.g. through the `max_steps` property of a TimeLimit wrapper or something? num_batches = len(self.unwrapped.dataset) // self.batch_size if not self.unwrapped.drop_last: num_batches += 1 if len(self.unwrapped.dataset) % self.batch_size else 0 # currently_at_last_batch = self._steps == num_batches - 1 currently_at_last_batch = self._steps == num_batches - 1 if self.__epochs == 1 and currently_at_last_batch: self._metrics[self._steps] += self.get_metrics(action, reward) # This is ok since we don't increment in the iterator. self._steps += 1 return reward def get_metrics(self, action: Actions, reward: Rewards) -> Metrics: assert action.y_pred.shape == reward.y.shape, (action.shapes, reward.shapes) metric = ClassificationMetrics(y_pred=action.y_pred, y=reward.y, num_classes=self.n_classes) if wandb.run: log_dict = metric.to_log_dict() if self.wandb_prefix: log_dict = add_prefix(log_dict, prefix=self.wandb_prefix, sep="/") log_dict["steps"] = self._steps wandb.log(log_dict) return metric def __iter__(self) -> Iterator[Tuple[Observations, Optional[Rewards]]]: if self.__epochs == 1 and self.first_epoch_only: print( colorize( "Your performance during the first epoch on this environment has " "been successfully measured! The environment will now yield the " "rewards (y) during iteration, and you are no longer required to " "send an action for each observation.", color="green", ) ) self.env.unwrapped.pretend_to_be_active = False for obs, rew in self.env.__iter__(): if self.in_evaluation_period: yield obs, None else: yield obs, rew self.__epochs += 1 ================================================ FILE: sequoia/settings/sl/wrappers/measure_performance_test.py ================================================ """ TODO: Tests for the 'measure performance wrapper' to be used to get the performance over the first "epoch" """ import dataclasses from typing import Iterable, Tuple, TypeVar import numpy as np import pytest import torch from torch.utils.data import TensorDataset from sequoia.common import Config from sequoia.common.metrics import ClassificationMetrics from sequoia.settings.rl.wrappers import TypedObjectsWrapper from sequoia.settings.sl import ClassIncrementalSetting from sequoia.settings.sl.environment import PassiveEnvironment from sequoia.settings.sl.incremental.objects import Actions, Observations, Rewards from .measure_performance import MeasureSLPerformanceWrapper T = TypeVar("T") def with_is_last(iterable: Iterable[T]) -> Iterable[Tuple[T, bool]]: """Function that mimics what's happening in pytorch-lightning, where the iterator is one-offset. This can cause a bit of headache in Sequoia's wrappers when iterating over an env, because they expect an action for each observation. """ iterator = iter(iterable) sentinel = object() previous_value = next(iterator) current_value = next(iterator, sentinel) while current_value is not sentinel: yield previous_value, False previous_value = current_value current_value = next(iterator, sentinel) yield previous_value, True def test_measure_performance_wrapper(): dataset = TensorDataset( torch.arange(100).reshape([100, 1, 1, 1]) * torch.ones([100, 3, 32, 32]), torch.arange(100), ) pretend_to_be_active = True env = PassiveEnvironment( dataset, batch_size=1, n_classes=100, pretend_to_be_active=pretend_to_be_active ) for i, (x, y) in enumerate(env): # print(x) assert y is None if pretend_to_be_active else y is not None assert (x == i).all() action = i if i < 50 else 0 reward = env.send(action) assert reward == i assert i == 99 # This might be a bit weird, since .reset() will give the same obs as the first x # when iterating. obs = env.reset() for i, (x, y) in enumerate(env): # print(x) assert y is None assert (x == i).all() action = i if i < 50 else 0 reward = env.send(action) assert reward == i assert i == 99 from sequoia.settings.sl.continual.objects import Observations, Actions, Rewards env = TypedObjectsWrapper( env, observations_type=Observations, actions_type=Actions, rewards_type=Rewards ) # TODO: Do we want to require Observations / Actions / Rewards objects? env = MeasureSLPerformanceWrapper(env, first_epoch_only=False) for epoch in range(3): for i, (observations, rewards) in enumerate(env): assert observations is not None assert rewards is None assert (observations.x == i).all() # Only guess correctly for the first 50 steps. action = Actions(y_pred=np.array([i if i < 50 else 0])) rewards = env.send(action) assert (rewards.y == i).all() assert i == 99 assert epoch == 2 assert set(env.get_online_performance().keys()) == set(range(100 * 3)) for i, (step, metric) in enumerate(env.get_online_performance().items()): assert step == i assert metric.accuracy == (1.0 if (i % 100) < 50 else 0.0), (i, step, metric) metrics = env.get_average_online_performance() assert isinstance(metrics, ClassificationMetrics) # Since we guessed the correct class only during the first 50 steps. assert metrics.accuracy == 0.5 def make_dummy_env(n_samples: int = 100, batch_size: int = 1, drop_last: bool = False): dataset = TensorDataset( torch.arange(n_samples).reshape([n_samples, 1, 1, 1]) * torch.ones([n_samples, 3, 32, 32]), torch.arange(n_samples), ) pretend_to_be_active = False env = PassiveEnvironment( dataset, batch_size=batch_size, n_classes=n_samples, pretend_to_be_active=pretend_to_be_active, drop_last=drop_last, ) env = TypedObjectsWrapper( env, observations_type=Observations, actions_type=Actions, rewards_type=Rewards ) return env def test_measure_performance_wrapper_first_epoch_only(): env = make_dummy_env(n_samples=100, batch_size=1) env = MeasureSLPerformanceWrapper(env, first_epoch_only=True) for epoch in range(2): print(f"start epoch {epoch}") for i, (observations, rewards) in enumerate(env): assert observations is not None if epoch == 0: assert rewards is None else: assert rewards is not None rewards_ = rewards # save these for a comparison below. assert (observations.x == i).all() # Only guess correctly for the first 50 steps. action = Actions(y_pred=np.array([i if i < 50 else 0])) rewards = env.send(action) if epoch != 0: # We should just receive what we already got by iterating. assert rewards.y == rewards_.y assert (rewards.y == i).all() assert i == 99 # do another epoch, but this time don't even send actions. for i, (observations, rewards) in enumerate(env): assert (observations.x == i).all() assert (rewards.y == i).all() assert i == 99 assert set(env.get_online_performance().keys()) == set(range(100)) for i, (step, metric) in enumerate(env.get_online_performance().items()): assert step == i assert metric.accuracy == (1.0 if (i % 100) < 50 else 0.0), (i, step, metric) metrics = env.get_average_online_performance() assert isinstance(metrics, ClassificationMetrics) # Since we guessed the correct class only during the first 50 steps. assert metrics.accuracy == 0.5 assert metrics.n_samples == 100 def test_measure_performance_wrapper_odd_vs_even(): env = make_dummy_env(n_samples=100, batch_size=1) env = MeasureSLPerformanceWrapper(env, first_epoch_only=True) for i, (observations, rewards) in enumerate(env): assert observations is not None assert rewards is None or rewards.y is None assert (observations.x == i).all() # Only guess correctly for the first 50 steps. action = Actions(y_pred=np.array([i if i % 2 == 0 else 0])) rewards = env.send(action) assert (rewards.y == i).all() assert i == 99 assert set(env.get_online_performance().keys()) == set(range(100)) for i, (step, metric) in enumerate(env.get_online_performance().items()): assert step == i if step % 2 == 0: assert metric.accuracy == 1.0, (i, step, metric) else: assert metric.accuracy == 0.0, (i, step, metric) metrics = env.get_average_online_performance() assert isinstance(metrics, ClassificationMetrics) # Since we guessed the correct class only during the first 50 steps. assert metrics.accuracy == 0.5 assert metrics.n_samples == 100 def test_measure_performance_wrapper_odd_vs_even_passive(): dataset = TensorDataset( torch.arange(100).reshape([100, 1, 1, 1]) * torch.ones([100, 3, 32, 32]), torch.arange(100), ) pretend_to_be_active = False env = PassiveEnvironment( dataset, batch_size=1, n_classes=100, pretend_to_be_active=pretend_to_be_active ) env = TypedObjectsWrapper( env, observations_type=Observations, actions_type=Actions, rewards_type=Rewards ) env = MeasureSLPerformanceWrapper(env, first_epoch_only=False) for i, (observations, rewards) in enumerate(env): assert observations is not None assert rewards is None or rewards.y is None assert (observations.x == i).all() # Only guess correctly for the first 50 steps. action = Actions(y_pred=np.array([i if i % 2 == 0 else 0])) rewards = env.send(action) assert (rewards.y == i).all() assert i == 99 assert set(env.get_online_performance().keys()) == set(range(100)) for i, (step, metric) in enumerate(env.get_online_performance().items()): assert step == i if step % 2 == 0: assert metric.accuracy == 1.0, (i, step, metric) else: assert metric.accuracy == 0.0, (i, step, metric) metrics = env.get_average_online_performance() assert isinstance(metrics, ClassificationMetrics) # Since we guessed the correct class only during the first 50 steps. assert metrics.accuracy == 0.5 assert metrics.n_samples == 100 def test_last_batch(): """Test what happens with the last batch, in the case where the batch size doesn't divide the dataset equally. """ env = make_dummy_env(n_samples=110, batch_size=20) env = MeasureSLPerformanceWrapper(env, first_epoch_only=True) for i, (obs, rew) in enumerate(env): assert rew is None if i != 5: assert obs.batch_size == 20, i else: assert obs.batch_size == 10, i actions = Actions(y_pred=torch.arange(i * 20, (i + 1) * 20)[: obs.batch_size]) rewards = env.send(actions) assert (rewards.y == torch.arange(i * 20, (i + 1) * 20)[: obs.batch_size]).all() perf = env.get_average_online_performance() assert perf.accuracy == 1.0 assert perf.n_samples == 110 from sequoia.methods.models.base_model import BaseModel def test_last_batch_baseline_model(): """BUG: Baseline method is doing something weird at the last batch, and I dont know quite why.""" n_samples = 110 batch_size = 20 # Note: the y's here are different. dataset = TensorDataset( torch.arange(n_samples).reshape([n_samples, 1, 1, 1]) * torch.ones([n_samples, 3, 32, 32]), torch.zeros(n_samples, dtype=int), ) pretend_to_be_active = False env = PassiveEnvironment( dataset, batch_size=batch_size, n_classes=n_samples, pretend_to_be_active=pretend_to_be_active, ) env = TypedObjectsWrapper( env, observations_type=Observations, actions_type=Actions, rewards_type=Rewards ) env = MeasureSLPerformanceWrapper(env, first_epoch_only=True) # FIXME: Hacky setup: Should instead have a way of using a 'test' setting with a # configurable in-memory test dataset. setting = ClassIncrementalSetting() setting.train_env = env model = BaseModel(setting=setting, hparams=BaseModel.HParams(), config=Config(debug=True)) for i, (obs, rew) in enumerate(env): obs = dataclasses.replace( obs, task_labels=torch.ones([obs.x.shape[0]], device=obs.x.device) ) assert rew is None forward_pass = model.training_step((obs, rew), batch_idx=i) loss = model.training_step_end([forward_pass]) print(loss) perf = env.get_average_online_performance() assert perf.n_samples == 110 @pytest.mark.parametrize("drop_last", [False, True]) def test_delayed_actions(drop_last: bool): """Test that whenever some intermediate between the env and the Method is caching some of the observations, the actions and rewards still end up lining up. This is just to replicate what's happening in Pytorch Lightning, where they use some function to check if the batch is the last one or not, and was causing issue before. """ env = make_dummy_env(n_samples=110, batch_size=20, drop_last=drop_last) env = MeasureSLPerformanceWrapper(env, first_epoch_only=True) i = 0 for i, ((obs, rew), is_last) in enumerate(with_is_last(env)): print(i, obs.batch_size) assert rew is None if i != 5: assert obs.batch_size == 20, i else: assert obs.batch_size == 10, i actions = Actions(y_pred=torch.arange(i * 20, (i + 1) * 20)[: obs.batch_size]) rewards = env.send(actions) assert (rewards.y == torch.arange(i * 20, (i + 1) * 20)[: obs.batch_size]).all() assert i == (4 if drop_last else 5) assert is_last for i, ((obs, rew), is_last) in enumerate(with_is_last(env)): print(i) # We get rewards now that we're outside of the first epoch. assert rew is not None if i < 5: assert obs.batch_size == 20, i else: assert obs.batch_size == 10, i # actions = Actions(y_pred=torch.arange(i * 20, (i + 1) * 20)[: obs.batch_size]) # rewards = env.send(actions) # assert (rewards.y == torch.arange(i * 20, (i + 1) * 20)[: obs.batch_size]).all() assert i == 4 if drop_last else 5 assert len(list(env)) == 5 if drop_last else 6 assert len(list(with_is_last(env))) == 5 if drop_last else 6 perf = env.get_average_online_performance() assert perf.accuracy == 1.0 # BUG: The number of samples for the metrics isn't quite right, should include the # last batch, even if it doesn't have a 'full' batch. assert perf.n_samples == (100 if drop_last else 110) ================================================ FILE: sequoia/settings.puml ================================================ @startuml settings !include gym.puml !include pytorch_lightning.puml ' !include common.puml ' TODO: there must be a better way to show only one thing from a ' package, without having to import all the package and then ' remove everything but that one thing! remove gym.spaces remove Wrapper ' remove common namespace torch { class DataLoader class Tensor } package settings { ' !include base/base.puml abstract class Setting extends SettingABC { ' 'root' setting. -- static (class) attributes -- + {static} Observations: Type[Observations] + {static} Actions: Type[Actions] + {static} Rewards: Type[Rewards] .. attributes .. + observation_space: Space + action_space: Space + reward_space: Space .. methods .. {abstract} + apply(Method): Results } package assumptions as settings.assumptions { package continual as settings.assumptions.continual { abstract class ContinualAssumption extends Setting { } } package incremental as settings.assumptions.incremental { abstract class IncrementalAssumption extends ContinualAssumption { + nb_tasks: int + task_labels_at_train_time: bool + task_labels_at_test_time: bool + {field} known_task_boundaries_at_train_time: bool = True (constant) + {field} known_task_boundaries_at_test_time: bool = True (constant) ' TODO: THis is actually a constant atm, even for ContinualRL ' doesn't have this set to 'true', since there is only one task, ' so there aren't an 'task boundaries' to speak of. + {field} smooth_task_boundaries: bool - _current_task_id: int + train_loop() + test_loop() } abstract class IncrementalObservations extends Observations { + task_labels: Optional[Tensor] } abstract class IncrementalResults extends Results { } } ' package task_incremental as settings.assumptions.task_incremental { ' abstract class TaskIncrementalAssumption extends IncrementalAssumption { ' } ' } ' package iid as settings.assumptions.iid { ' abstract class TraditionalSLSetting extends TaskIncrementalSLSetting { ' } ' } } package passive as settings.passive { class PassiveEnvironment implements Environment {} abstract class SLSetting extends Setting { {abstract} + train_dataloader(): PassiveEnvironment {abstract} + val_dataloader(): PassiveEnvironment {abstract} + test_dataloader(): PassiveEnvironment + dataset: str + available_datasets: dict } ' PassiveEnvironment extends DataLoader package cl as settings.passive.cl { class ClassIncrementalSetting implements SLSetting, IncrementalAssumption { {static} + Results: Type[Results] = IncrementalSLResults + nb_tasks: int + task_labels_at_train_time: bool = True + task_labels_at_test_time: bool = False + transforms: List[Transforms] + class_order: Optional[List[int]] = None + relabel: bool = False } class IncrementalSLResults implements IncrementalResults {} package domain_incremental as settings.passive.cl.domain_incremental { class DomainIncrementalSetting extends ClassIncrementalSetting { + relabel: bool = True } } package task_incremental as settings.passive.cl.task_incremental { class TaskIncrementalSLSetting extends ClassIncrementalSetting { {field} + task_labels_at_train_time: bool = True (constant) {field} + task_labels_at_test_time: bool = True (constant) } ' class TaskIncrementalResults extends IncrementalSLResults{} package multi_task as settings.passive.cl.task_incremental.multi_task { class MultiTaskSetting extends TaskIncrementalSLSetting { } } } package iid as settings.passive.cl.iid { class TraditionalSLSetting extends TaskIncrementalSLSetting, DomainIncrementalSetting { {field} + nb_tasks: int = 1 (constant) } class IIDResults extends IncrementalSLResults{} } } } package active as settings.active { 'note: This is currently called GymDataLoader in the repo. class ActiveEnvironment extends Environment {} abstract class RLSetting extends Setting { {abstract} + train_dataloader(): ActiveEnvironment {abstract} + val_dataloader(): ActiveEnvironment {abstract} + test_dataloader(): ActiveEnvironment } package continual as settings.active.continual { class ContinualRLSetting implements RLSetting, IncrementalAssumption { {static} + Results: Type[Results] = RLResults + dataset: str = "cartpole" + nb_tasks: int = 1 + train_max_steps: int = 10000 + max_episodes: Optional[int] = None + steps_per_task: Optional[int] = None + episodes_per_task: Optional[int] = None + test_steps_per_task: int = 1000 + test_steps: Optional[int] = None + smooth_task_boundaries: bool = True + train_task_schedule: dict + val_task_schedule: dict + test_task_schedule: dict + task_noise_std: float + train_wrappers: List[gym.Wrapper] + valid_wrappers: List[gym.Wrapper] + test_wrappers: List[gym.Wrapper] + add_done_to_observations: bool = False } class RLResults implements IncrementalResults package incremental as settings.active.continual.incremental { class IncrementalRLSetting extends ContinualRLSetting { + nb_tasks: int = 10 {field} + smooth_task_boundaries: bool = False (constant) + task_labels_at_train_time: bool = True + task_labels_at_test_time: bool = False } package task_incremental_rl as settings.active.incremental.task_incremental_rl { class TaskIncrementalRLSetting extends IncrementalRLSetting { {field} + task_labels_at_train_time: bool = True (constant) {field} + task_labels_at_test_time: bool = True (constant) } package stationary as settings.active.incremental.task_incremental_rl.stationary { class RLSetting extends TaskIncrementalRLSetting { {field} + nb_tasks: int = 1 (constant) } } } } } } } IncrementalAssumption -left-> IncrementalResults : produces IncrementalAssumption -down-> IncrementalObservations : envs yield ClassIncrementalSetting -left-> IncrementalSLResults : produces TaskIncrementalSLSetting -left-> TaskIncrementalResults : produces TraditionalSLSetting -left-> IIDResults : produces SLSetting --> PassiveEnvironment : uses RLSetting -right-> ActiveEnvironment : uses ContinualRLSetting -> RLResults : produces @enduml ================================================ FILE: sequoia/utils/__init__.py ================================================ """ Miscelaneous utility functions. """ import sys # from .generic_functions import * from .generic_functions.singledispatchmethod import singledispatchmethod from .logging_utils import get_logger from .parseable import Parseable from .serialization import Serializable from .encode import encode # from .utils import ================================================ FILE: sequoia/utils/categorical.py ================================================ from typing import Any, Iterable, Optional, Union import torch from torch import Tensor from torch.distributions import Categorical as Categorical_ class Categorical(Categorical_): """Simple little addition to the `torch.distributions.Categorical`, allowing it to be 'split' into a sequence of distributions (to help with the splitting in the output heads) """ def __init__( self, probs: Optional[Tensor] = None, logits: Optional[Tensor] = None, validate_args: bool = None, ): super().__init__(probs=probs, logits=logits, validate_args=validate_args) self._device: torch.device = probs.device if probs is not None else logits.device def __getitem__(self, index: Optional[int]) -> "Categorical": return Categorical(logits=self.logits[index]) # return Categorical(probs=self.probs[index]) def __iter__(self) -> Iterable["Categorical"]: for index in range(self.logits.shape[0]): yield self[index] def __add__(self, other: Union["Categorical_", Any]) -> "Categorical": # Idea:, how about we return a wrapped version of `self` whose # 'sample' returns self.sample() + `other`? return NotImplemented def __mul__(self, other: Union["Categorical_", Any]) -> "Categorical": # Idea: Idea, how about we return a wrapped version of `self` whose # 'sample' returns self.sample() * `other`? return NotImplemented @property def device(self) -> torch.device: """The device of the tensors of this distribution. @lebrice: Not sure why this isn't already part of torch.Distribution base-class. """ return self._device def to(self, device: Union[str, torch.device]) -> "Categorical": """Moves this distribution to another device. @lebrice: Not sure why this isn't already part of torch.Distribution base-class. """ return type(self)(logits=self.logits.to(device=device)) ================================================ FILE: sequoia/utils/data_utils.py ================================================ import os from pathlib import Path from typing import Dict, Iterable, Iterator, Sized, Tuple import numpy as np import torch from torch import Tensor, nn from torch.utils.data import DataLoader, Subset from torchvision.datasets import CIFAR100, VisionDataset from sequoia.utils.logging_utils import get_logger logger = get_logger(__name__) def train_valid_split( train_dataset: VisionDataset, valid_fraction: float = 0.2 ) -> Tuple[VisionDataset, VisionDataset]: n = len(train_dataset) valid_len: int = int((n * valid_fraction)) train_len: int = n - valid_len indices = np.arange(n, dtype=int) np.random.shuffle(indices) valid_indices = indices[:valid_len] train_indices = indices[valid_len:] train = Subset(train_dataset, train_indices) valid = Subset(train_dataset, valid_indices) logger.info(f"Training samples: {len(train)}, Valid samples: {len(valid)}") return train, valid def unbatch(dataloader: Iterable[Tuple[Tensor, Tensor]]) -> Iterable[Tuple[Tensor, Tensor]]: """Unbatches a dataloader. NOTE: this is a generator for a single pass through the dataloader, not multiple. """ for batch in dataloader: if isinstance(batch, tuple): yield from zip(*batch) else: yield from batch class unlabeled(Iterable[Tuple[Tensor]], Sized): """Given a DataLoader, returns an Iterable that drops the labels.""" def __init__(self, labeled_dataloader: DataLoader): self.loader = labeled_dataloader def __iter__(self) -> Iterator[Tuple[Tensor]]: for batch in self.loader: assert isinstance(batch, tuple) x = batch[0] yield x, def __len__(self) -> int: return len(self.loader) def keep_in_memory(dataset: VisionDataset) -> None: """Converts the dataset's `data` and `targets` attributes to Tensors. This has the consequence of keeping the entire dataset in memory. """ if hasattr(dataset, "data") and not isinstance(dataset.data, (np.ndarray, Tensor)): dataset.data = torch.as_tensor(dataset.data) if not isinstance(dataset.targets, (np.ndarray, Tensor)): dataset.targets = torch.as_tensor(dataset.targets) if isinstance(dataset, CIFAR100): # TODO: Cifar100 seems to want its 'data' to a numpy ndarray. dataset.data = np.asarray(dataset.data) class FixChannels(nn.Module): """Transform that fixes the number of channels in input images. For instance, if the input shape is: [28, 28] -> [3, 28, 28] (copy the image three times) [1, 28, 28] -> [3, 28, 28] (same idea) [10, 1, 28, 28] -> [10, 3, 28, 28] (keep batch intact, do the same again.) """ def __call__(self, x: Tensor) -> Tensor: if x.ndim == 2: x = x.reshape([1, *x.shape]) x = x.repeat(3, 1, 1) if x.ndim == 3 and x.shape[0] == 1: x = x.repeat(3, 1, 1) if x.ndim == 4 and x.shape[1] == 1: x = x.repeat(1, 3, 1, 1) return x def get_imagenet_location() -> Path: from socket import gethostname hostname = gethostname() # For each hostname prefix, the location where the torchvision ImageNet dataset can be found. # TODO: Add the location for your own machine. imagenet_locations: Dict[str, Path] = { "mila": Path("/network/datasets/imagenet.var/imagenet_torchvision"), "": Path("/network/datasets/imagenet.var/imagenet_torchvision"), } for prefix, v in imagenet_locations.items(): if hostname.startswith(prefix): return v if "IMAGENET_DIR" in os.environ: return Path(os.environ["IMAGENET_DIR"]) raise RuntimeError( f"Could not find the ImageNet dataset on this machine with hostname " f"{hostname}. Known location> pairs: {imagenet_locations}" ) ================================================ FILE: sequoia/utils/encode.py ================================================ """ Registers more datatypes to be used by the 'encode' function from simple-parsing when serializing objects to json or yaml. """ import enum import inspect from pathlib import Path from typing import Any, List, Type, Union import numpy as np import torch from simple_parsing.helpers.serialization import encode, register_decoding_fn from torch import Tensor, nn, optim # Register functions for decoding Tensor and ndarray fields from json/yaml. register_decoding_fn(Tensor, torch.as_tensor) register_decoding_fn(np.ndarray, np.asarray) register_decoding_fn(Type[nn.Module], lambda v: v) register_decoding_fn(Type[optim.Optimizer], lambda v: v) # NOTE: Uncomment this to enable logging tensors as-is when calling to_dict on a # Serializable dataclass @encode.register(Tensor) def no_op_encode(value: Any): return value # TODO: Look deeper into how things are pickled and moved by pytorch-lightning. # Right now there is a warning by pytorch-lightning saying that some metrics # will not be included in a checkpoint because they are lists instead of Tensors. # This is because they got encoded with the function below when they shouldn't # have. # @encode.register(Tensor) @encode.register(np.ndarray) def encode_tensor(obj: Union[Tensor, np.ndarray]) -> List: return obj.tolist() @encode.register def encode_type(obj: type) -> List: if inspect.isclass(obj): return str(obj.__qualname__) elif inspect.isfunction(obj): return str(obj.__name__) return str(obj) @encode.register def encode_path(obj: Path) -> str: return str(obj) @encode.register def encode_device(obj: torch.device) -> str: return str(obj) @encode.register def encode_enum(value: enum.Enum): return value.value ================================================ FILE: sequoia/utils/generic_functions/__init__.py ================================================ """ Defines a bunch of single-dispatch generic functions, that are applicable on structured objects, numpy arrays, tensors, spaces, etc. """ from ._namedtuple import NamedTuple, is_namedtuple from .concatenate import concatenate from .detach import detach from .move import move from .replace import replace from .singledispatchmethod import singledispatchmethod from .slicing import get_slice, set_slice from .stack import stack from .to_from_tensor import from_tensor, to_tensor ================================================ FILE: sequoia/utils/generic_functions/_namedtuple.py ================================================ """ Small 'patch' for the NamedTuple type, just so we can use isinstance(obj, NamedTuple) and issubclass(some_class, NamedTuple) work correctly. """ from inspect import isclass from typing import Any, NamedTuple, Type def is_namedtuple(obj: Any) -> bool: """Taken from https://stackoverflow.com/a/62692640/6388696""" return isinstance(obj, tuple) and hasattr(obj, "_asdict") and hasattr(obj, "_fields") def is_namedtuple_type(obj: Type) -> bool: """Taken from https://stackoverflow.com/a/62692640/6388696""" return obj is NamedTuple or ( isclass(obj) and issubclass(obj, tuple) and hasattr(obj, "_asdict") and hasattr(obj, "_fields") ) ================================================ FILE: sequoia/utils/generic_functions/_namedtuple_test.py ================================================ from typing import NamedTuple import pytest from sequoia.utils.generic_functions._namedtuple import is_namedtuple, is_namedtuple_type class DummyTuple(NamedTuple): a: int b: str def test_is_namedtuple(): bob = DummyTuple(1, "bob") assert is_namedtuple(bob) def test_is_namedtuple_type(): assert is_namedtuple_type(DummyTuple) assert is_namedtuple_type(NamedTuple) assert not is_namedtuple_type(tuple) assert not is_namedtuple_type(list) assert not is_namedtuple_type(dict) @pytest.mark.xfail(reason="Not sure this is actually a good idea.") def test_instance_check(): bob = DummyTuple(1, "bob") assert isinstance(bob, DummyTuple) assert isinstance(bob, NamedTuple) assert isinstance(bob, tuple) @pytest.mark.xfail(reason="Not sure this is actually a good idea.") def test_instance_check(): assert issubclass(DummyTuple, NamedTuple) assert issubclass(DummyTuple, tuple) assert issubclass(DummyTuple, DummyTuple) assert not issubclass(list, DummyTuple) assert not issubclass(tuple, DummyTuple) assert not issubclass(NamedTuple, DummyTuple) ================================================ FILE: sequoia/utils/generic_functions/concatenate.py ================================================ """ Generic function for concatenating ndarrays/tensors/distributions/Mappings etc. Extremely similar to `stack.py`, but concatenates along the described axis. """ from collections.abc import Mapping from functools import singledispatch from typing import Any, Dict, List, Sequence, TypeVar, Union import numpy as np import torch from continuum import TaskSet from continuum.tasks import concat as _continuum_concat from torch import Tensor from torch.utils.data import ChainDataset, ConcatDataset, Dataset, IterableDataset from sequoia.utils.categorical import Categorical T = TypeVar("T") # @overload # def concatenate(first_item: List[T], **kwargs) -> Sequence[T]: # ... # @overload # def concatenate(first_item: T, *others: T, **kwargs) -> Sequence[T]: # ... @singledispatch def concatenate(first_item: Union[T, List[T]], *others: T, **kwargs) -> Union[Sequence[T], Any]: # By default, if we don't know how to handle the item type, just # returns an ndarray with with all the items. if not others: # If this was called like concatenate(tensor_list), then we just split off # the list of items. assert isinstance(first_item, (list, tuple)) if len(first_item) == 1: # Called like `concatenate([some_tensor])` -> returns `some_tensor`. return first_item[0] assert len(first_item) > 1 items = first_item return concatenate(items[0], *items[1:], **kwargs) return np.asarray([first_item, *others], **kwargs) @concatenate.register(type(None)) def _concatenate_ndarrays(first_item: None, *others: None, **kwargs) -> None: # NOTE: Concatenating a list of 'None' values will produce a single None output rather # than an ndarray of Nones. assert not any(other is not None for other in others) return None @concatenate.register(np.ndarray) def _concatenate_ndarrays(first_item: np.ndarray, *others: np.ndarray, **kwargs) -> np.ndarray: if not first_item.shape: # can't concatenate 0-dimensional arrays, so we stack them instead: return np.stack([first_item, *others], **kwargs) return np.concatenate([first_item, *others], **kwargs) @concatenate.register(Tensor) def _concatenate_tensors(first_item: Tensor, *others: Tensor, **kwargs) -> Tensor: if not first_item.shape: # can't concatenate 0-dimensional tensors, so we stack them instead. return torch.stack([first_item, *others], **kwargs) return torch.cat([first_item, *others], **kwargs) @concatenate.register(Mapping) def _concatenate_dicts(first_item: Dict, *others: Dict, **kwargs) -> Dict: return type(first_item)( **{ key: concatenate(first_item[key], *(other[key] for other in others), **kwargs) for key in first_item.keys() } ) @concatenate.register(Categorical) def _concatenate_distributions( first_item: Categorical, *others: Categorical, **kwargs ) -> Categorical: return Categorical( logits=torch.cat([first_item.logits, *(other.logits for other in others)], *kwargs) ) @concatenate.register def _concatenate_tasksets(first_item: TaskSet, *others: TaskSet) -> TaskSet: return _continuum_concat([first_item, *others]) @concatenate.register(Dataset) def _concatenate_datasets(first_item: Dataset[T], *others: Dataset[T]) -> ConcatDataset[T]: return ConcatDataset([first_item, *others]) @concatenate.register def _concatenate_iterable_datasets( first_item: IterableDataset, *others: IterableDataset ) -> ChainDataset: return ChainDataset([first_item, *others]) ================================================ FILE: sequoia/utils/generic_functions/detach.py ================================================ from collections.abc import Mapping from functools import singledispatch from typing import Any, Dict, Sequence, TypeVar import numpy as np from sequoia.utils.generic_functions._namedtuple import is_namedtuple from ..categorical import Categorical T = TypeVar("T") @singledispatch def detach(value: T) -> T: """Detaches a value when possible, else returns the value unchanged.""" if hasattr(value, "detach") and callable(value.detach): return value.detach() raise NotImplementedError(f"Don't know how to detach value {value}!") # else: # return value @detach.register(np.ndarray) @detach.register(type(None)) @detach.register(str) @detach.register(int) @detach.register(bool) @detach.register(float) def no_op_detach(v: Any) -> Any: return v @detach.register(list) @detach.register(tuple) @detach.register(set) def _detach_sequence(x: Sequence[T]) -> Sequence[T]: if is_namedtuple(x): return type(x)(*[detach(v) for v in x]) return type(x)(detach(v) for v in x) @detach.register(Mapping) def _detach_dict(d: Dict[str, Any]) -> Dict[str, Any]: """Detaches all the keys and tensors in a dict, as well as all nested dicts.""" return type(d)(**{detach(k): detach(v) for k, v in d.items()}) @detach.register def _detach_categorical(v: Categorical) -> Categorical: return type(v)(logits=v.logits.detach()) ================================================ FILE: sequoia/utils/generic_functions/move.py ================================================ """Defines a singledispatch function to move objects to a given device. """ from functools import singledispatch from typing import Dict, Sequence, TypeVar, Union import torch from sequoia.utils.generic_functions._namedtuple import is_namedtuple T = TypeVar("T") K = TypeVar("K") V = TypeVar("V") @singledispatch def move(x: T, device: Union[str, torch.device]) -> T: """Moves x to the specified device if possible, else returns x unchanged. NOTE: This works for Tensors or any collection of Tensors. """ if hasattr(x, "to") and callable(x.to) and device: return x.to(device=device) return x @move.register(dict) def move_dict(x: Dict[K, V], device: Union[str, torch.device]) -> Dict[K, V]: return type(x)(**{move(k, device): move(v, device) for k, v in x.items()}) @move.register(list) @move.register(tuple) @move.register(set) def move_sequence(x: Sequence[T], device: Union[str, torch.device]) -> Sequence[T]: if is_namedtuple(x): return type(x)(*[move(v, device) for v in x]) return type(x)(move(v, device) for v in x) ================================================ FILE: sequoia/utils/generic_functions/replace.py ================================================ """ Generic function for replacing items in an object. """ import dataclasses from collections.abc import Sequence from functools import singledispatch from typing import Dict, Tuple, TypeVar from gym import spaces from sequoia.utils.generic_functions._namedtuple import is_namedtuple T = TypeVar("T") class Dataclass(type): """Used so we can do `isinstance(obj, Dataclass)`, or maybe even register dataclass handlers for singledispatch generic functions. """ def __instancecheck__(self, instance) -> bool: # Return true if instance should be considered a (direct or indirect) # instance of class. If defined, called to implement # isinstance(instance, class). return dataclasses.is_dataclass(instance) def __subclasscheck__(self, subclass) -> bool: # Return true if subclass should be considered a (direct or indirect) # subclass of class. If defined, called to implement # issubclass(subclass, class). return dataclasses.is_dataclass(subclass) @singledispatch def replace(obj: T, **items) -> T: """Replaces the value at `key` in `obj` with `new_value`. Returns the modified object, either in-place (same instance as obj) or new. """ raise NotImplementedError( f"TODO: Don't know how to set items '{items}' in obj {obj}, " f"(no handler registered for objects of type {obj}." ) @replace.register(Dataclass) def _replace_dataclass_attribute(obj: Dataclass, **items) -> Dataclass: assert dataclasses.is_dataclass(obj) return dataclasses.replace(obj, **items) @replace.register(dict) def _replace_dict_item(obj: Dict, **items) -> Dict: assert isinstance(obj, dict) assert all( key in obj for key in items ), "replace should only be used to replace items, not to add new ones." new_obj = obj.copy() new_obj.update(items) return new_obj @replace.register(list) @replace.register(tuple) def _replace_sequence_items(obj: Sequence, **items) -> Tuple: if is_namedtuple(obj): return obj._replace(**items) return type(obj)(items[i] if i in items else val for i, val in enumerate(obj)) @replace.register def _replace_dict_items(obj: spaces.Dict, **items) -> Dict: """Handler for Dict spaces.""" return type(obj)(replace(obj.spaces, **items)) ================================================ FILE: sequoia/utils/generic_functions/replace_test.py ================================================ """ Tests for the `replace` generic function. """ ================================================ FILE: sequoia/utils/generic_functions/singledispatchmethod.py ================================================ """ Little 'patch' that imports a backport of 'singledispatchmethod', if the python version is < 3.8. """ import sys if sys.version_info >= (3, 8): from functools import singledispatchmethod # type: ignore else: try: pass except ImportError as e: print(f"Couldn't import singledispatchmethod: {e}") print( "Since you're running python version below 3.8, you need to " "install the backport for singledispatchmethod (which was added " "to functools in python 3.8), using the following command:\n" "> pip install singledispatchmethod" ) exit() ================================================ FILE: sequoia/utils/generic_functions/slicing.py ================================================ """ Extendable utility functions for getting and settings slices of arbitrarily nested objects. """ from functools import singledispatch from typing import Any, Dict, Sequence, Tuple, TypeVar import numpy as np from torch import Tensor from ._namedtuple import is_namedtuple K = TypeVar("K") V = TypeVar("V") T = TypeVar("T") @singledispatch def get_slice(value: T, indices: Sequence[int]) -> T: """Returns a slices of `value` at the given indices.""" if value is None: return None return value[indices] @get_slice.register(dict) def _get_dict_slice(value: Dict[K, V], indices: Sequence[int]) -> Dict[K, V]: return type(value)((k, get_slice(v, indices)) for k, v in value.items()) @get_slice.register(tuple) def _get_tuple_slice(value: Tuple[T, ...], indices: Sequence[int]) -> Tuple[T, ...]: # NOTE: we use type(value)( ... ) to create the output dicts or tuples, in # case a subclass of tuple or dict is being used (e.g. NamedTuples). if is_namedtuple(value): return type(value)(*[get_slice(v, indices) for v in value]) return type(value)([get_slice(v, indices) for v in value]) @singledispatch def set_slice(target: Any, indices: Sequence[int], values: Sequence[Any]) -> None: """Sets `values` at positions `indices` in `target`. Modifies the `target` in-place. """ target[indices] = values from sequoia.utils.categorical import Categorical @set_slice.register def _set_slice_categorical( target: Categorical, indices: Sequence[int], values: Sequence[Any] ) -> None: target.logits[indices] = values.logits @set_slice.register(np.ndarray) def _set_slice_ndarray(target: np.ndarray, indices: Sequence[int], values: Sequence[Any]) -> None: if isinstance(indices, Tensor): indices = indices.cpu().numpy() if isinstance(values, Tensor): values = values.cpu().numpy() target[indices] = values @set_slice.register(Tensor) def _set_slice_ndarray(target: Tensor, indices: Sequence[int], values: Sequence[Any]) -> None: target[indices] = values @set_slice.register(dict) def _set_dict_slice( target: Dict[K, Sequence[V]], indices: Sequence[int], values: Dict[K, Sequence[V]] ) -> None: for key, target_values in target.items(): set_slice(target_values, indices, values[key]) @set_slice.register(tuple) def _set_tuple_slice(target: Tuple[T, ...], indices: Sequence[int], values: Tuple[T, ...]) -> None: assert isinstance(values, tuple) assert len(target) == len(values) for target_item, values_item in zip(target, values): set_slice(target_item, indices, values_item) ================================================ FILE: sequoia/utils/generic_functions/slicing_test.py ================================================ from typing import NamedTuple import numpy as np import pytest from .slicing import get_slice, set_slice class DummyTuple(NamedTuple): a: np.ndarray b: np.ndarray @pytest.mark.parametrize( "source, indices, expected", [ (np.arange(10), np.arange(5), np.arange(5)), ( {"a": np.arange(10), "b": np.arange(10)}, np.arange(5), {"a": np.arange(5), "b": np.arange(5)}, ), (({"a": np.arange(10)}, np.arange(10) + 5), 3, ({"a": 3}, 8)), ( # Test with namedtuples. { "a": np.array([0, 1, 2]), "b": DummyTuple(a=np.zeros([3, 4]), b=np.ones([5, 4])), }, np.arange(2), {"a": np.array([0, 1]), "b": DummyTuple(a=np.zeros([2, 4]), b=np.ones([2, 4]))}, ), ], ) def test_get_slice(source, indices, expected): assert str(get_slice(source, indices)) == str(expected) @pytest.mark.parametrize( "target, indices, values, result", [ ( np.arange(10, dtype=float), np.arange(5), np.zeros(5), np.concatenate([np.zeros(5), np.arange(5) + 5.0]), ), ( {"a": np.arange(10, dtype=float), "b": np.zeros(10)}, np.arange(10), {"a": np.ones(10), "b": np.ones(10)}, {"a": np.ones(10), "b": np.ones(10)}, ), ( ({"a": np.arange(10)}, np.arange(10) + 5), 0, ({"a": 3}, 8), ( {"a": np.concatenate([np.array([3]), 1 + np.arange(9)])}, np.concatenate([np.array([8]), 6 + np.arange(9)]), ), ), ( # Test with NamedTuples. { "a": np.array([0, 1, 2]), "b": DummyTuple(a=np.zeros(5), b=np.ones(5)), }, np.arange(2), {"a": np.array([5, 7]), "b": DummyTuple(a=np.ones(2), b=np.zeros(2))}, { "a": np.array([5, 7, 2]), "b": DummyTuple( a=np.array([1.0, 1.0, 0.0, 0.0, 0.0]), b=np.array([0.0, 0.0, 1.0, 1.0, 1.0]) ), }, ), ], ) def test_set_slice(target, indices, values, result): set_slice(target, indices, values) assert str(target) == str(result) @pytest.mark.xfail( reason="Removed the 'concatenate' generic function, since " "there wasn't really a use for it anywhere." ) @pytest.mark.parametrize( "a, b, kwargs, expected", [ (np.array([0, 1, 2]), np.array([3, 4, 5, 6]), {}, np.arange(7)), ( { "a": np.array([0, 1, 2]), "b": DummyTuple(a=np.zeros(3), b=np.ones(3)), }, { "a": np.array([3, 4, 5]), "b": DummyTuple(a=np.zeros(4), b=np.ones(4)), }, {}, { "a": np.array([0, 1, 2, 3, 4, 5]), "b": DummyTuple(a=np.zeros(7), b=np.ones(7)), }, ), ( { "a": np.array([[0], [1], [2]]), # [3, 1] "b": DummyTuple(a=np.zeros([1, 4]), b=np.ones([1, 4])), }, { "a": np.array([[3], [4], [5], [6]]), # shape [4, 1] "b": DummyTuple(a=np.zeros([2, 4]), b=np.ones([3, 4])), }, {"axis": 0}, { "a": np.array([[0], [1], [2], [3], [4], [5], [6]]), "b": DummyTuple(a=np.zeros([3, 4]), b=np.ones([4, 4])), }, ), ], ) def test_concat(a, b, kwargs, expected): from .slicing import concatenate assert str(concatenate(a, b, **kwargs)) == str(expected) ================================================ FILE: sequoia/utils/generic_functions/stack.py ================================================ """ Generic function for concatenating ndarrays/tensors/distributions/Mappings etc. """ from collections.abc import Mapping from functools import singledispatch from typing import Any, Dict, List, TypeVar, Union import numpy as np import torch from torch import Tensor from sequoia.utils.categorical import Categorical T = TypeVar("T") # @overload # def stack(first_item: List[T]) -> Sequence[T]: # ... # @overload # def stack(first_item: T, *others: T) -> Sequence[T]: # ... @singledispatch def stack(first_item: Union[T, List[T]], *others: T, **kwargs) -> Any: # By default, if we don't know how to handle the item type, just # return an ndarray with with all the items. # note: We could also try to return a tensor, rather than an ndarray # but I'd rather keep it simple for now. if not others: # If this was called like stack(tensor_list), then we just split off # the list of items. if first_item is None: # Stacking a list of 'None' items returns None. return None assert isinstance(first_item, (list, tuple)), first_item # assert len(first_item) > 1, first_item items = first_item return stack(items[0], *items[1:], **kwargs) np_stack_kwargs = kwargs.copy() if "dim" in np_stack_kwargs: np_stack_kwargs["axis"] = np_stack_kwargs.pop("dim") return np.stack([first_item, *others], **np_stack_kwargs) @stack.register(type(None)) def _stack_none(first_item: None, *others: None, **kwargs) -> Union[None, np.ndarray]: # TODO: Should we return an ndarray with 'None' entries, of dtype np.object_? or # just a single None? # Opting for a single None for now, as it's easier to work with. (`v is None` works) if all(v is None for v in others): return None return np.array([first_item, *others]) # if not others: # return None # return np.array([None, *others]) @stack.register(np.ndarray) def _stack_ndarrays(first_item: np.ndarray, *others: np.ndarray, **kwargs) -> np.ndarray: return np.stack([first_item, *others], **kwargs) @stack.register(Tensor) def _stack_tensors(first_item: Tensor, *others: Tensor, **kwargs) -> Tensor: return torch.stack([first_item, *others], **kwargs) @stack.register(Mapping) def _stack_dicts(first_item: Dict, *others: Dict, **kwargs) -> Dict: return type(first_item)( **{ key: stack(first_item[key], *(other[key] for other in others), **kwargs) for key in first_item.keys() } ) @stack.register(Categorical) def _stack_distributions(first_item: Categorical, *others: Categorical, **kwargs) -> Categorical: return Categorical( logits=torch.stack([first_item.logits, *(other.logits for other in others)], **kwargs) ) ================================================ FILE: sequoia/utils/generic_functions/to_from_tensor.py ================================================ from functools import singledispatch from typing import Any, Dict, Mapping, Optional, Tuple, TypeVar, Union import numpy as np import torch from gym import Space, spaces from torch import Tensor T = TypeVar("T") @singledispatch def from_tensor(space: Space, sample: Union[Tensor, Any]) -> Union[np.ndarray, Any]: """Converts a Tensor into a sample from the given space.""" if isinstance(sample, Tensor): return sample.cpu().numpy() return sample @from_tensor.register def _(space: spaces.Discrete, sample: Tensor) -> int: if isinstance(sample, Tensor): v = sample.item() int_v = int(v) if int_v != v: raise ValueError(f"Value {sample} isn't an integer, so it can't be from space {space}!") return int_v elif isinstance(sample, np.ndarray): assert sample.size == 1, sample return int(sample) return sample @from_tensor.register def _( space: spaces.Dict, sample: Dict[str, Union[Tensor, Any]] ) -> Dict[str, Union[np.ndarray, Any]]: return {key: from_tensor(space[key], value) for key, value in sample.items()} from sequoia.utils.generic_functions._namedtuple import is_namedtuple @from_tensor.register def _(space: spaces.Tuple, sample: Tuple[Union[Tensor, Any]]) -> Tuple[Union[np.ndarray, Any]]: if not isinstance(sample, tuple): # BUG: Sometimes instead of having a sample of Tuple(Discrete(2)) # be `(1,)`, its `array([1])` instead. sample = tuple(sample) values_gen = (from_tensor(space[i], value) for i, value in enumerate(sample)) if is_namedtuple(sample): return type(sample)(*values_gen) return tuple(values_gen) @singledispatch def to_tensor( space: Space, sample: Union[np.ndarray, Any], device: torch.device = None ) -> Union[np.ndarray, Any]: """Converts a sample from the given space into a Tensor.""" if sample is None: return sample return torch.as_tensor(sample, device=device) @to_tensor.register def _( space: spaces.MultiBinary, sample: np.ndarray, device: torch.device = None ) -> Dict[str, Union[Tensor, Any]]: return torch.as_tensor(sample, device=device, dtype=torch.bool) @to_tensor.register def _( space: spaces.Tuple, sample: Tuple[Union[np.ndarray, Any], ...], device: torch.device = None, ) -> Tuple[Union[Tensor, Any], ...]: if sample is None: assert all(isinstance(item_space, Sparse) for item_space in space.spaces) assert all(item_space.sparsity == 1.0 for item_space in space.spaces) # todo: What to do in this context? return None return np.full( [ len(space.spaces), ], fill_value=None, dtype=np.object_, ) if any(v is None for v in sample): assert False, (space, sample, device) return tuple(to_tensor(subspace, sample[i], device) for i, subspace in enumerate(space.spaces)) from typing import NamedTuple from sequoia.common.spaces.named_tuple import NamedTupleSpace @to_tensor.register def _(space: NamedTupleSpace, sample: NamedTuple, device: torch.device = None): return space.dtype( **{ key: to_tensor(space[i], sample[i], device=device) for i, key in enumerate(space._spaces.keys()) } ) from sequoia.common.spaces.sparse import Sparse @to_tensor.register(Sparse) def sparse_sample_to_tensor( space: Sparse, sample: Union[Optional[Any], np.ndarray], device: torch.device = None ) -> Optional[Union[Tensor, np.ndarray]]: if space.sparsity == 1.0: if isinstance(space.base, spaces.MultiDiscrete): assert all(v == None for v in sample) return np.array([None if v == None else v for v in sample]) if sample is not None: assert isinstance(sample, np.ndarray) and sample.dtype == np.object assert not sample.shape return None if space.sparsity == 0.0: # Do we need to convert dtypes here though? return to_tensor(space.base, sample, device) # 0 < sparsity < 1 if isinstance(sample, np.ndarray) and sample.dtype == np.object: return np.array([None if v == None else v for v in sample]) assert False, (space, sample) ================================================ FILE: sequoia/utils/logging_utils.py ================================================ import inspect import logging from functools import wraps from pathlib import Path from typing import Any, Callable, Dict, Iterable, List, TypeVar, Union import torch.multiprocessing as mp import tqdm from torch import Tensor from sequoia.utils.utils import unique_consecutive logging.basicConfig( format="%(asctime)s,%(msecs)d %(levelname)-8s [%(name)s:%(lineno)d] %(message)s", datefmt="%Y-%m-%d:%H:%M:%S", level=logging.INFO, ) logging.getLogger("simple_parsing").setLevel(logging.ERROR) root_logger = logging.getLogger("") T = TypeVar("T") def pbar(dataloader: Iterable[T], description: str = "", *args, **kwargs) -> Iterable[T]: kwargs.setdefault("dynamic_ncols", True) pbar = tqdm.tqdm(dataloader, *args, **kwargs) if description: pbar.set_description(description) return pbar def get_logger(name: str, level: int = None) -> logging.Logger: """Gets a logger for the given file. Sets a nice default format. TODO: figure out if we should add handlers, etc. """ name_is_path: bool = False try: p = Path(name) if p.exists(): name = str(p.absolute().relative_to(Path.cwd()).as_posix()) name_is_path = True except: pass from sys import argv logger = root_logger.getChild(name) debug_flags: List[str] = ["-d", "--debug", "-vv", "-vvv" "--verbose"] if level is None and any(v in argv for v in debug_flags): level = logging.DEBUG if level is None: level = logging.INFO logger.setLevel(level) # if the name is already something like foo.py:256 # if not name_is_path and name[-1].isdigit(): # formatter = logging.Formatter('%(asctime)s, %(levelname)-8s log [%(name)s] %(message)s') # sh = logging.StreamHandler(sys.stdout) # sh.setFormatter(formatter) # sh.setLevel(level) # logger.addHandler(sh) # logger = logging.getLogger(name) # tqdm_handler = TqdmLoggingHandler() # tqdm_handler.setLevel(level) # logger.addHandler(tqdm_handler) return logger def log_calls(function: Callable, level=logging.INFO) -> Callable: """Decorates a function and logs the calls to it and the passed args.""" callerframerecord = inspect.stack()[1] # 0 represents this line # 1 represents line at caller frame = callerframerecord[0] info = inspect.getframeinfo(frame) p = Path(info.filename) name = str(p.absolute().relative_to(Path.cwd()).as_posix()) logger = get_logger(f"{name}:{info.lineno}") @wraps(function) def _wrapped(*args, **kwargs): process_name = mp.current_process().name logger.log( level, ( f"Process {process_name} called {function.__name__} with " f"args={args} and kwargs={kwargs}." ), ) return function(*args, **kwargs) return _wrapped def get_new_file(file: Path) -> Path: """Creates a new file, adding _{i} suffixes until the file doesn't exist. Args: file (Path): A path. Returns: Path: a path that is new. Might have a new _{i} suffix. """ if not file.exists(): return file else: i = 0 file_i = file.with_name(file.stem + f"_{i}" + file.suffix) while file_i.exists(): i += 1 file_i = file.with_name(file.stem + f"_{i}" + file.suffix) file = file_i return file def cleanup( message: Dict[str, Union[Dict, str, float, Any]], sep: str = "/", keys_to_remove: List[str] = None, ) -> Dict[str, Union[float, Tensor]]: """Cleanup a message dict before it is logged to wandb. TODO: Describe what this does in more detail. Args: message (Dict[str, Union[Dict, str, float, Any]]): [description] sep (str, optional): [description]. Defaults to "/". Returns: Dict[str, Union[float, Tensor]]: Cleaned up dict. """ # Flatten the log dictionary from sequoia.utils.utils import flatten_dict message = flatten_dict(message, separator=sep) keys_to_remove = keys_to_remove or [] for k in list(message.keys()): if any(flag in k for flag in keys_to_remove): message.pop(k) continue v = message.pop(k) # Example input: # "Task_losses/Task1/losses/Test/losses/rotate/losses/270/metrics/270/accuracy" # Simplify the key, by getting rid of all the '/losses/' and '/metrics/' etc. things_to_remove: List[str] = [f"{sep}losses{sep}", f"{sep}metrics{sep}"] for thing in things_to_remove: while thing in k: k = k.replace(thing, sep) # --> "Task_losses/Task1/Test/rotate/270/270/accuracy" # Get rid of repetitive modifiers (ex: "/270/270" above) parts = k.split(sep) parts = [s for s in parts if not s.isspace()] k = sep.join(unique_consecutive(parts)) # Will become: # "Task_losses/Task1/Test/rotate/270/accuracy" message[k] = v return message class TqdmLoggingHandler(logging.Handler): def __init__(self, level=logging.NOTSET): super().__init__(level) def emit(self, record): try: msg = self.format(record) tqdm.tqdm.write(msg) self.flush() except (KeyboardInterrupt, SystemExit): raise except: self.handleError(record) ================================================ FILE: sequoia/utils/module_dict.py ================================================ """ Typed wrapper around `nn.ModuleDict`, just that just adds a get method. """ from typing import Any, MutableMapping, TypeVar, Union from torch import nn M = TypeVar("M", bound=nn.Module) T = TypeVar("T") class ModuleDict(nn.ModuleDict, MutableMapping[str, M]): def get(self, key: str, default: Any = None) -> Union[M, Any]: """Returns the module at `self[key]` if present, else `default`. Args: key (str): a key. default (Union[M, nn.Module], optional): Default value to return. Defaults to None. Returns: Union[Optional[nn.Module], Optional[M]]: The nn.Module at that key. """ return self[key] if key in self else default ================================================ FILE: sequoia/utils/parseable.py ================================================ import dataclasses import shlex import sys from argparse import Namespace from dataclasses import is_dataclass from typing import List, Optional, Tuple, Type, TypeVar, Union from pytorch_lightning import LightningDataModule from simple_parsing import ArgumentParser from sequoia.utils.utils import camel_case from .logging_utils import get_logger logger = get_logger(__name__) P = TypeVar("P", bound="Parseable") class Parseable: _argv: Optional[List[str]] = None @classmethod def add_argparse_args(cls, parser: ArgumentParser) -> None: """Add the command-line arguments for this class to the given parser. Override this if you don't use simple-parsing to add the args. Parameters ---------- parser : ArgumentParser The ArgumentParser. """ if is_dataclass(cls): dest = camel_case(cls.__qualname__) parser.add_arguments(cls, dest=dest) elif issubclass(cls, LightningDataModule): # TODO: Test this case out (using a LightningDataModule as a Setting). super().add_argparse_args(parser) # type: ignore else: raise NotImplementedError( f"Don't know how to add command-line arguments for class " f"{cls}, since it isn't a dataclass and doesn't override the " f"`add_argparse_args` method!\n" f"Either make class {cls} a dataclass and add command-line " f"arguments as fields, or add an implementation for the " f"`add_argparse_args` and `from_argparse_args` classmethods." ) @classmethod def from_argparse_args(cls: Type[P], args: Namespace) -> P: """Extract the parsed command-line arguments from the namespace and return an instance of class `cls`. Override this if you don't use simple-parsing. Parameters ---------- args : Namespace The namespace containing all the parsed command-line arguments. dest : str, optional The , by default None Returns ------- cls An instance of the class `cls`. """ if is_dataclass(cls): dest = camel_case(cls.__qualname__) return getattr(args, dest) # if issubclass(cls, LightningDataModule): # # TODO: Test this case out (using a LightningDataModule as a Setting). # return super()._from_argparse_args(args) # type: ignore raise NotImplementedError( f"Don't know how to extract the command-line arguments for class " f"{cls} from the namespace, since {cls} isn't a dataclass and " f"doesn't override the `from_argparse_args` classmethod." ) @classmethod def from_args( cls: Type[P], argv: Union[str, List[str]] = None, reorder: bool = True, strict: bool = True ) -> P: """Parse an instance of this class from the command-line args. Parameters ---------- cls : Type[P] The class to instantiate. This only supports dataclasses by default. For other classes, you'll have to implement this method yourself. argv : Union[str, List[str]], optional The command-line string or list of string arguments in the style of sys.argv. Could also be the unused_args returned by .from_known_args(), for example. By default None reorder : bool, optional Wether to attempt to re-order positional arguments. Only really useful when using subparser actions. By default True. strict : bool, optional Wether to raise an error if there are extra arguments. By default False TODO: Might be a good idea to actually change this default to 'True' to avoid potential subtle bugs in various places. This would however make the code slightly more difficult to read, since we'd have to pass some unused_args around. Also might be a problem when the same argument e.g. batch_size (at some point) is in both the Setting and the Method, because then the arg would be 'consumed', and not passed to the second parser in the chain. Returns ------- P The parsed instance of this class. Raises ------ NotImplementedError [description] """ # if not is_dataclass(cls): # raise NotImplementedError( # f"Don't know how to create an instance of class {cls} from the " # f"command-line, as it isn't a dataclass. You'll have to " # f"override the `from_args` or `from_known_args` classmethods." # ) if isinstance(argv, str): argv = shlex.split(argv) instance, unused_args = cls.from_known_args( argv=argv, reorder=reorder, strict=strict, ) assert not (strict and unused_args), "an error should have been raised" return instance @classmethod def from_known_args( cls, argv: Union[str, List[str]] = None, reorder: bool = True, strict: bool = False ) -> Tuple[P, List[str]]: # if not is_dataclass(cls): # raise NotImplementedError( # f"Don't know how to parse an instance of class {cls} from the " # f"command-line, as it isn't a dataclass or doesn't have the " # f"`add_arpargse_args` and `from_argparse_args` classmethods. " # f"You'll have to override the `from_known_args` classmethod." # ) if argv is None: argv = sys.argv[1:] logger.debug(f"parsing an instance of class {cls} from argv {argv}") if isinstance(argv, str): argv = shlex.split(argv) parser = ArgumentParser(description=cls.__doc__, add_dest_to_option_strings=False) cls.add_argparse_args(parser) # TODO: Set temporarily on the class, so its accessible in the class constructor cls_argv = cls._argv cls._argv = argv instance: P if strict: args = parser.parse_args(argv) unused_args = [] else: args, unused_args = parser.parse_known_args(argv, attempt_to_reorder=reorder) if unused_args: logger.debug( RuntimeWarning(f"Unknown/unused args when parsing class {cls}: {unused_args}") ) instance = cls.from_argparse_args(args) # Save the argv that were used to create the instance on its `_argv` # attribute. instance._argv = argv cls._argv = cls_argv return instance, unused_args def upgrade(self, target_type: Type[P]) -> P: """Upgrades the hparams `self` to the given `target_type`, filling in any missing values by parsing them from the command-line. If `self` was created from the command-line, then the same argv that were used to create `self` will be used to create the new object. Returns ------- type(self).HParams Hparams of the type `self.HParams`, with the original values preserved and any new values parsed from the command-line. """ # NOTE: This (getting the wrong hparams class) could happen for # instance when parsing a BaseMethod from the command-line, the # default type of hparams on the method is BaseModel.HParams, # whose `output_head` field doesn't have the right type exactly. current_type = type(self) current_hparams = dataclasses.asdict(self) # NOTE: If a value is not at its current default, keep it. default_hparams = target_type() missing_fields = [ f.name for f in dataclasses.fields(target_type) if f.name not in current_hparams or current_hparams[f.name] == getattr(current_type(), f.name, None) or current_hparams[f.name] == getattr(default_hparams, f.name) ] logger.warning( RuntimeWarning( f"Upgrading the hparams from type {current_type} to " f"type {target_type}. This will try to fetch the values for " f"the missing fields {missing_fields} from the command-line. " ) ) # Get the missing values if self._argv: return target_type.from_args(argv=self._argv, strict=False) hparams = target_type.from_args(argv=self._argv, strict=False) for missing_field in missing_fields: current_hparams[missing_field] = getattr(hparams, missing_field) return target_type(**current_hparams) # @classmethod # def fields(cls) -> Dict[str, Field]: # return {f.name: f for f in dataclasses.fields(cls)} ================================================ FILE: sequoia/utils/plotting.py ================================================ from dataclasses import dataclass from typing import List import matplotlib.pyplot as plt def autolabel(axis, rects: List[plt.Rectangle], bar_height_scale: float = 1.0): """Attach a text label above each bar in *rects*, displaying its height. Taken from https://matplotlib.org/gallery/lines_bars_and_markers/barchart.html#sphx-glr-gallery-lines-bars-and-markers-barchart-py """ for rect in rects: height = rect.get_height() bottom = rect.get_y() value = height / bar_height_scale if value != 0.0: axis.annotate( f"{value:.0%}", xy=(rect.get_x() + rect.get_width() / 2, bottom + height), xytext=(0, 3), # 3 points vertical offset textcoords="offset points", ha="center", va="bottom", ) def maximize_figure(): fig_manager = plt.get_current_fig_manager() try: fig_manager.window.showMaximized() except: try: fig_manager.window.state("zoomed") # works fine on Windows! except: try: fig_manager.frame.Maximize(True) except: print("Couldn't maximize the figure.") @dataclass class PlotSectionLabel: """Used to label a section of a plot between `start_step` and `stop_step` with a label of `description`.""" start_step: int stop_step: int description: str = "" @property def middle(self) -> float: return (self.start_step + self.stop_step) / 2 @property def width(self) -> int: return self.stop_step - self.start_step def annotate(self, ax: plt.Axes, height: float = -0.1): """Annotate the corresponding region of the axis. Adds vertical lines at the `start_step` and `end_step` along with a text label for the description in between. Args: ax (plt.Axes): An Axis to annotate. height (float): The height at which to place the text. """ ax.axvline(self.start_step, linestyle=":", color="gray") ax.axvline(self.stop_step, linestyle=":", color="gray") ax.text(self.middle, height, self.description, ha="center") ================================================ FILE: sequoia/utils/pretrained_utils.py ================================================ from typing import Callable, Optional, Tuple, Union from torch import nn from sequoia.utils.logging_utils import get_logger logger = get_logger(__name__) def get_pretrained_encoder( encoder_model: Callable, pretrained: bool = True, freeze_pretrained_weights: bool = False, new_hidden_size: int = None, ) -> Tuple[nn.Module, int]: """Returns a pretrained encoder on ImageNet from `torchvision.models` If `new_hidden_size` is True, will try to replace the classification layer block with a `nn.Linear(, new_hidden_size)`, where corresponds to the hidden size of the model. This last layer will always be trainable, even if `freeze_pretrained_weights` is True. Args: encoder_model (Callable): Which encoder model to use. Should usually be one of the models in the `torchvision.models` module. pretrained (bool, optional): Wether to try and download the pretrained weights. Defaults to True. freeze_pretrained_weights (bool, optional): Wether the pretrained (downloaded) weights should be frozen. Has no effect when `pretrained` is False. Defaults to False. new_hidden_size (int): The hidden size of the resulting model. Returns: Tuple[nn.Module, int]: the pretrained encoder, with the classification head removed, and the resulting output size (hidden dims) """ logger.debug(f"Using encoder model {encoder_model.__name__}") logger.debug(f"pretrained: {pretrained}") logger.debug(f"freezing the pretrained weights: {freeze_pretrained_weights}") try: encoder = encoder_model(pretrained=pretrained) except TypeError as e: encoder = encoder_model() if pretrained and freeze_pretrained_weights: # Fix the parameters of the model. for param in encoder.parameters(): param.requires_grad = False replace_classifier = new_hidden_size is not None # We want to replace the last layer (the classification layer) with a # projection from their hidden space dimension to ours. new_classifier: Optional[nn.Linear] = None classifier = None if not replace_classifier: # We will create the 'new classifier' but then not add it. # this allows us to also get the 'hidden_size' of the resulting encoder. new_hidden_size = 1 for attr in ["classifier", "fc"]: if hasattr(encoder, attr): classifier: Union[nn.Sequential, nn.Linear] = getattr(encoder, attr) new_classifier: Optional[nn.Linear] = None # Get the number of input features. if isinstance(classifier, nn.Linear): new_classifier = nn.Linear( in_features=classifier.in_features, out_features=new_hidden_size ) elif isinstance(classifier, nn.Sequential): # if there is a classifier "block", get the number of # features from the first encountered dense layer. for layer in classifier.children(): if isinstance(layer, nn.Linear): new_classifier = nn.Linear(layer.in_features, new_hidden_size) break break if new_classifier is None: raise RuntimeError( f"Can't detect the hidden size of the model '{encoder_model.__name__}'!" f" (last layer is :{classifier}).\n" ) if not replace_classifier: new_hidden_size = new_classifier.in_features new_classifier = nn.Sequential() else: logger.debug( f"Replacing the attribute '{attr}' of the " f"{encoder_model.__name__} model with a new classifier: " f"{new_classifier}" ) setattr(encoder, attr, new_classifier) return encoder, new_hidden_size ================================================ FILE: sequoia/utils/readme.py ================================================ import os import textwrap from contextlib import redirect_stdout from inspect import getsourcefile from io import StringIO from pathlib import Path from typing import TYPE_CHECKING, List, Type from sequoia.settings import Setting if TYPE_CHECKING: from sequoia.settings import Setting # NOTE: Update this if we move this `readme.py` somewhere else. SEQUOIA_ROOT_DIR = Path(os.path.abspath(os.path.dirname(__file__))).parent.parent def get_relative_path_to(something: Type) -> Path: """Attempts to give the relative path from the current working directory to the file where somethign is defined. If that's not possible, returns an absolute path instead. """ # This isn't quite right: Should be a relative path to the source file: current_dir = Path.cwd() source_file = Path(getsourcefile(something)).relative_to(current_dir) return source_file def get_tree_string( root_setting: Type["Setting"] = Setting, with_methods: bool = False, with_assumptions: bool = False, with_docstrings: bool = False, ) -> str: """Get a string representation of the tree! I want to return something like this: ``` "Setting" ├── active │ └── rl ├── base └── passive └── cl └── task_incremental └── iid ``` """ if with_assumptions: raise NotImplementedError( f"TODO: display the assumptions for each setting into the tree string " f"somehow." ) setting: Type["Setting"] = root_setting # prefix: str = "" message: List[str] = [] source_file = get_relative_path_to(setting) message += [f"{setting.get_name()} found in [{setting.__name__}]({source_file})"] applicable_methods = setting.get_applicable_methods() n_children = len(setting.get_immediate_children()) bar = "│" if n_children else " " if with_docstrings: p = f"{bar} " docstring = setting.__doc__ # Note: why not use something like textwrap.indent? message.extend([p + line for line in docstring.splitlines()]) message += [p] if with_methods: p = f"{bar} " message += [f"{p} Applicable methods: "] for method in applicable_methods: source_file = get_relative_path_to(method) message += [f"{p} * [{method.__name__}]({source_file})"] message += [f"{p} "] # message = "\n".join(message) + "\n" # print(f"Children: {setting.get_children()}") # print(f"Children[0]'s children: {setting.get_children()[0].children}") for i, child_setting in enumerate(setting.get_immediate_children()): # Recurse! child_message = get_tree_string(child_setting) child_message_lines = child_message.splitlines() for j, line in enumerate(child_message_lines): first: str = "x " # just for debugging, shouldn't be an x left after. if j == 0: if i == n_children - 1: # Last child uses different graphic first = "└──" else: first = "├──" else: if i == n_children - 1: first = " " else: first = "│ " message += [first + line] first_line = f"─ {message[0]}\n" message_str = "\n".join(message[1:]) message_str = textwrap.indent(message_str, " ") return first_line + message_str def get_tree_string_markdown( root_setting: Type["Setting"] = Setting, with_methods: bool = False, with_docstring: bool = False, ): """Get a string representation of the tree! I want to return something like this: - "Setting" - active - rl - base - passive - cl - task_incremental * iid """ setting = root_setting message_lines: List[str] = [] source_file = get_relative_path_to(setting) message_lines += [f"- ## [{setting.__name__}]({source_file})"] applicable_methods = setting.get_applicable_methods() tab = " " if with_docstring: message_lines += [""] docstring: str = setting.__doc__ docstring_lines = docstring.splitlines() # The first line is always less indented than the rest, which looks weird: first_line = docstring_lines[0].lstrip() # Remove the common indent in the rest of the docstring lines: other_lines = textwrap.dedent("\n".join(docstring_lines[1:])) # re-indent the docstring, with all equal indentation now: docstring = first_line + "\n" + other_lines # docstring = textwrap.shorten(docstring, replace_whitespace=False, width=130) # docstring = textwrap.fill(docstring, max_lines=10) # print(setting) # print(docstring) # exit() docstring = textwrap.indent(docstring, tab) message_lines.extend(docstring.splitlines()) message_lines += [""] if with_methods: message_lines += [""] message_lines += ["Applicable methods: "] for method in applicable_methods: source_file = get_relative_path_to(method) message_lines += [f" * [{method.__name__}]({source_file})"] message_lines += [""] # message = "\n".join(message) + "\n" # print(f"Children: {setting.get_children()}") # print(f"Children[0]'s children: {setting.get_children()[0].children}") for child_setting in setting.get_immediate_children(): child_message = get_tree_string_markdown( child_setting, with_methods=with_methods, with_docstring=with_docstring ) child_message = textwrap.indent(child_message, tab) message_lines += [""] message_lines.extend(child_message.splitlines()) message_lines += [""] return "\n".join(message_lines) def print_methods(): from sequoia.methods import all_methods for method in all_methods: source_file = get_relative_path_to(method) target_setting: Type["Setting"] = method.target_setting setting_file = get_relative_path_to(target_setting) method_name = method.__name__ if method.get_family() != "methods": method_name = method.get_family() + "." + method_name print(f"- ## [{method_name}]({source_file}) ") print() print(f"\t - Target setting: [{target_setting.__name__}]({setting_file})") print() docstring: str = method.__doc__ docstring_lines = docstring.splitlines() # The first line is always less indented than the rest, which looks weird: first_line = docstring_lines[0].lstrip() # Remove the common indent in the rest of the docstring lines: other_lines = textwrap.dedent("\n".join(docstring_lines[1:])) # re-indent the docstring, with all equal indentation now: docstring = first_line + "\n" + other_lines print(textwrap.indent(docstring, "\t")) def add_stuff_to_readme(readme_path=Path("README.md"), settings: bool = True, methods: bool = True): token = "\n" assert settings or methods lines: List[str] = [] with open(readme_path) as f: with StringIO(f.read()) as f: lines = f.readlines() if token not in lines: print("didn't find token!") exit() tree_index = lines.index(token) + 1 # print(get_tree_string_markdown(with_methods=False, with_docstring=True)) # exit() with open(readme_path, "w") as f: # with nullcontext(): with redirect_stdout(f): # with nullcontext(): # reversed insert? # Print the existing lines back: print(*lines[: tree_index + 1], sep="") if settings: print("\n\n## Available Settings:\n") print() print(get_tree_string_markdown(with_methods=False, with_docstring=True)) print() # print("```") # print(get_tree_string()) # print("```") if methods: print("\n\n## Registered Methods (so far):\n") print_methods() print() if __name__ == "__main__": # print(get_tree_string()) # print(get_tree_string_markdown(with_methods=False, with_docstring=True)) add_stuff_to_readme(readme_path=Path("sequoia/settings/README.md"), methods=False) add_stuff_to_readme(readme_path=Path("sequoia/methods/README.md"), settings=False) ================================================ FILE: sequoia/utils/serialization.py ================================================ from dataclasses import dataclass, fields from inspect import isfunction from pathlib import Path from typing import Any, Dict, Iterable, Tuple, Type, TypeVar, Union, get_type_hints import torch from simple_parsing.helpers import Serializable as SerializableBase from simple_parsing.helpers.serialization import register_decoding_fn from sequoia.utils.generic_functions import detach from .generic_functions.detach import detach from .generic_functions.move import move from .logging_utils import get_logger from .utils import dict_union register_decoding_fn(torch.device, torch.device) T = TypeVar("T") logger = get_logger(__name__) def cpu(x: Any) -> Any: return move(x, "cpu") class Pickleable: """Helps make a class pickleable.""" def __getstate__(self): """We implement this to just make sure to detach the tensors if any before pickling. """ # We use `vars(self)` to get all the attributes, not just the fields. state_dict = vars(self) return cpu(detach(state_dict)) def __setstate__(self, state: Dict): # logger.debug(f"__setstate__ was called") self.__dict__.update(state) S = TypeVar("S", bound="Serializable") @dataclass class Serializable(SerializableBase, Pickleable, decode_into_subclasses=True): # type: ignore # NOTE: This currently doesn't add much compared to `Serializable` from simple-parsing apart # from not dropping the keys. def save(self, path: Union[str, Path], **kwargs) -> None: path = Path(path) path.parent.mkdir(parents=True, exist_ok=True) # Save to temp file, so we don't corrupt the save file. save_path_tmp = path.with_name(path.stem + "_temp" + path.suffix) # write out to the temp file. super().save(save_path_tmp, **kwargs) # Rename the temp file to the right path, overwriting it if it exists. save_path_tmp.replace(path) def detach(self: S) -> S: return type(self)( **detach( { field.name: getattr(self, field.name) for field in fields(self) if field.metadata.get("to_dict", True) } ) ) def to(self, device: Union[str, torch.device]): """Returns a new object with all the attributes 'moved' to `device`. NOTE: This doesn't implement anything related to the other args like memory format or dtype. TODO: Maybe add something to convert everything that is a Tensor or numpy array to a given dtype? """ return type(self)(**{name: move(item, device) for name, item in self.items()}) def items(self) -> Iterable[Tuple[str, Any]]: for field in fields(self): yield field.name, getattr(self, field.name) def cpu(self): return self.to("cpu") def cuda(self, device: Union[str, torch.device] = None): return self.to(device or "cuda") def merge(self, other: "Serializable") -> "Serializable": """Overwrite values in `self` present in 'other' with the values from `other`. Also merges child elements recursively. Returns a new object, i.e. this doesn't modify `self` in-place. """ self_dict = self.to_dict() if isinstance(other, SerializableBase): other = other.to_dict() elif not isinstance(other, dict): raise RuntimeError(f"Can't merge self with {other}.") return type(self).from_dict(dict_union(self_dict, other)) class decode: @staticmethod def register(fn_or_type: Type = None): """Decorator to be used to register a decoding function for a given type. This can be used in two different ways. The type annotation can either be explicit, like so: ```python @decode.register(SomeType) def decode_some_type(v: str): return SomeType(v) # return an instance of SomeType from a string. ``` or implicitly determined through the return type annotation, like so: ``` @decode.register def decode_some_type(v: str) -> SomeType: (...) ``` In the end, this just calls `register_decoding_fn(SomeType, decode_some_type)`. """ def _wrapper(fn): if fn_or_type is not None: type_ = fn_or_type else: type_hints = get_type_hints(fn) if "return" not in type_hints: raise RuntimeError( f"Need to either explicitly pass a type to `register`, or use " f"a return type annotation (e.g. `-> Foo:`) on the function!" ) type_ = type_hints["return"] register_decoding_fn(type_, fn) return fn if isfunction(fn_or_type): fn = fn_or_type fn_or_type = None return _wrapper(fn) return _wrapper ================================================ FILE: sequoia/utils/utils.py ================================================ """ Miscelaneous utility functions. """ import functools import hashlib import inspect import itertools import operator import re import warnings from collections import defaultdict from dataclasses import Field, fields from functools import reduce from inspect import getsourcefile, isclass from itertools import filterfalse, groupby from pathlib import Path from typing import Any, Callable, Dict, Iterable, List, Optional, Set, Tuple, Type, TypeVar, Union from simple_parsing import field from torch import Tensor, cuda cuda_available = cuda.is_available() gpus_available = cuda.device_count() T = TypeVar("T") K = TypeVar("K") V = TypeVar("V") Dataclass = TypeVar("Dataclass") def field_dict(dataclass: Dataclass) -> Dict[str, Field]: return {field.name: field for field in fields(dataclass)} def mean(values: Iterable[T]) -> T: values = list(values) return sum(values) / len(values) def pairwise(iterable: Iterable[T]) -> Iterable[Tuple[T, T]]: "s -> (s0,s1), (s1,s2), (s2, s3), ..." a, b = itertools.tee(iterable) next(b, None) return zip(a, b) def n_consecutive(items: Iterable[T], n: int = 2, yield_last_batch=True) -> Iterable[Tuple[T, ...]]: """Collect data into chunks of up to `n` elements. When `yield_last_batch` is True, the final chunk (which might have fewer than `n` items) will also be yielded. >>> list(n_consecutive("ABCDEFG", 3)) [('A', 'B', 'C'), ('D', 'E', 'F'), ('G',)] """ values: List[T] = [] for item in items: values.append(item) if len(values) == n: yield tuple(values) values.clear() if values and yield_last_batch: yield tuple(values) def fix_channels(x_batch: Tensor) -> Tensor: # TODO: Move this to data_utils.py if x_batch.dim() == 3: return x_batch.unsqueeze(1) else: if x_batch.shape[1] != min(x_batch.shape[1:]): return x_batch.transpose(1, -1) else: return x_batch def to_dict_of_lists(list_of_dicts: Iterable[Dict[str, Any]]) -> Dict[str, List[Tensor]]: """Returns a dict of lists given a list of dicts. Assumes that all dictionaries have the same keys as the first dictionary. Args: list_of_dicts (Iterable[Dict[str, Any]]): An iterable of dicts. Returns: Dict[str, List[Tensor]]: A Dict of lists. """ result: Dict[str, List[Any]] = defaultdict(list) for i, d in enumerate(list_of_dicts): for key, value in d.items(): result[key].append(value) assert d.keys() == result.keys(), f"Dict {d} at index {i} does not contain all the keys!" return result def add_prefix(some_dict: Dict[str, T], prefix: str = "", sep=" ") -> Dict[str, T]: """Adds the given prefix to all the keys in the dictionary that don't already start with it. Parameters ---------- - some_dict : Dict[str, T] Some dictionary. - prefix : str, optional, by default "" A string prefix to append. - sep : str, optional, by default " " A string separator to add between the `prefix` and the existing keys (which do no start by `prefix`). Returns ------- Dict[str, T] A new dictionary where all keys start with the prefix. Examples: ------- >>> add_prefix({"a": 1}, prefix="bob", sep="") {'boba': 1} >>> add_prefix({"a": 1}, prefix="bob") {'bob a': 1} >>> add_prefix({"a": 1}, prefix="a") {'a': 1} >>> add_prefix({"a": 1}, prefix="a ") {'a': 1} >>> add_prefix({"a": 1}, prefix="a", sep="/") {'a': 1} """ if not prefix: return some_dict result: Dict[str, T] = type(some_dict)() if sep and prefix.endswith(sep): prefix = prefix.rstrip(sep) for key, value in some_dict.items(): new_key = key if key.startswith(prefix) else (prefix + sep + key) result[new_key] = value return result def loss_str(loss_tensor: Tensor) -> str: loss = loss_tensor.item() if loss == 0: return "0" elif abs(loss) < 1e-3 or abs(loss) > 1e3: return f"{loss:.1e}" else: return f"{loss:.3f}" def set_seed(seed: int): """Set the pytorch/numpy random seed.""" import random import numpy as np import torch random.seed(seed) torch.manual_seed(seed) np.random.seed(seed) if torch.cuda.is_available(): torch.cuda.manual_seed_all(seed) def compute_identity(size: int = 16, **sample) -> str: """Compute a unique hash out of a dictionary Parameters ---------- size: int size of the unique hash **sample: Dictionary to compute the hash from """ sample_hash = hashlib.sha256() for k, v in sorted(sample.items()): sample_hash.update(k.encode("utf8")) if isinstance(v, dict): sample_hash.update(compute_identity(size, **v).encode("utf8")) else: sample_hash.update(str(v).encode("utf8")) return sample_hash.hexdigest()[:size] def prod(iterable: Iterable[T]) -> T: """Like sum() but returns the product of all numbers in the iterable. >>> prod(range(1, 5)) 24 """ return reduce(operator.mul, iterable, 1) def common_fields(a, b) -> Iterable[Tuple[str, Tuple[Field, Field]]]: # If any attributes are common to both the Experiment and the State, # copy them over to the Experiment. a_fields = fields(a) b_fields = fields(b) for field_a in a_fields: name_a: str = field_a.name value_a = getattr(a, field_a.name) for field_b in b_fields: name_b: str = field_b.name value_b = getattr(b, field_b.name) if name_a == name_b: yield name_a, (value_a, value_b) def add_dicts(d1: Dict, d2: Dict, add_values=True) -> Dict: result = d1.copy() for key, v2 in d2.items(): if key not in d1: result[key] = v2 elif isinstance(v2, dict): result[key] = add_dicts(d1[key], v2, add_values=add_values) elif not add_values: result[key] = v2 else: result[key] = d1[key] + v2 return result def rsetattr(obj: Any, attr: str, val: Any) -> None: """Taken from https://stackoverflow.com/questions/31174295/getattr-and-setattr-on-nested-subobjects-chained-properties""" pre, _, post = attr.rpartition(".") return setattr(rgetattr(obj, pre) if pre else obj, post, val) # using wonder's beautiful simplification: https://stackoverflow.com/questions/31174295/getattr-and-setattr-on-nested-objects/31174427?noredirect=1#comment86638618_31174427 def rgetattr(obj: Any, attr: str, *args): """Taken from https://stackoverflow.com/questions/31174295/getattr-and-setattr-on-nested-subobjects-chained-properties""" def _getattr(obj, attr): return getattr(obj, attr, *args) return functools.reduce(_getattr, [obj] + attr.split(".")) def is_nonempty_dir(path: Path) -> bool: return path.is_dir() and len(list(path.iterdir())) > 0 D = TypeVar("D", bound=Dict) def flatten_dict(d: D, separator: str = "/") -> D: """Flattens the given nested dict, adding `separator` between keys at different nesting levels. Args: d (Dict): A nested dictionary separator (str, optional): Separator to use. Defaults to "/". Returns: Dict: A flattened dictionary. """ result = type(d)() for k, v in d.items(): if isinstance(v, dict): for ki, vi in flatten_dict(v, separator=separator).items(): key = f"{k}{separator}{ki}" result[key] = vi else: result[k] = v return result def unique_consecutive(iterable: Iterable[T], key: Callable[[T], Any] = None) -> Iterable[T]: """List unique elements, preserving order. Remember only the element just seen. NOTE: If `key` is passed, it is only used to test for equality, the outputs of `key` for each sample won't be returned. >>> list(unique_consecutive('AAAABBBCCDAABBB')) ['A', 'B', 'C', 'D', 'A', 'B'] >>> list(unique_consecutive('ABBCcAD', str.lower)) ['A', 'B', 'C', 'A', 'D'] Recipe taken from itertools docs: https://docs.python.org/3/library/itertools.html """ return map(next, map(operator.itemgetter(1), groupby(iterable, key))) def unique_consecutive_with_index( iterable: Iterable[T], key: Callable[[T], Any] = None ) -> Iterable[Tuple[int, T]]: """List unique elements, preserving order. Remember only the element just seen. Yields tuples of the index and the values. NOTE: If `key` is passed, it is only used to test for equality, the outputs of `key` for each sample won't be returned. If you want to save some compute, use a map as the input. >>> list(unique_consecutive_with_index('AAAABBBCCDAABBB')) [(0, 'A'), (4, 'B'), (7, 'C'), (9, 'D'), (10, 'A'), (12, 'B')] >>> list(unique_consecutive_with_index('ABBCcAD', str.lower)) [(0, 'A'), (1, 'B'), (3, 'C'), (5, 'A'), (6, 'D')] """ _key = lambda i_v: key(i_v[1]) if key is not None else i_v[1] for v, group_iterator in groupby(enumerate(iterable), _key): index, first_val = next(group_iterator) yield index, first_val def roundrobin(*iterables: Iterable[T]) -> Iterable[T]: """ roundrobin('ABC', 'D', 'EF') --> A D E B F C Recipe taken from itertools docs: https://docs.python.org/3/library/itertools.html """ # Recipe credited to George Sakkis num_active = len(iterables) nexts = itertools.cycle(iter(it).__next__ for it in iterables) while num_active: try: for next_ in nexts: yield next_() except StopIteration: # Remove the iterator we just exhausted from the cycle. num_active -= 1 nexts = itertools.cycle(itertools.islice(nexts, num_active)) def take(iterable: Iterable[T], n: Optional[int]) -> Iterable[T]: """Takes only the first `n` elements from `iterable`. if `n` is None, returns the entire iterable. """ return itertools.islice(iterable, n) if n is not None else iterable def camel_case(name): s1 = re.sub("(.)([A-Z][a-z]+)", r"\1_\2", name) s2 = re.sub("([a-z0-9])([A-Z])", r"\1_\2", s1).lower() while "__" in s2: s2 = s2.replace("__", "_") return s2 def constant(v: T, **kwargs) -> T: metadata = kwargs.setdefault("metadata", {}) metadata["constant"] = v metadata["decoding_fn"] = lambda _: v metadata["to_dict"] = lambda _: v return field(default=v, init=False, **kwargs) def flag(default: bool, *args, **kwargs): return field(default=default, nargs="?", *args, **kwargs) def dict_union(*dicts: Dict[K, V], recurse: bool = True, dict_factory=dict) -> Dict[K, V]: """Simple dict union until we use python 3.9 If `recurse` is True, also does the union of nested dictionaries. NOTE: The returned dictionary has keys sorted alphabetically. >>> a = dict(a=1, b=2, c=3) >>> b = dict(c=5, d=6, e=7) >>> dict_union(a, b) {'a': 1, 'b': 2, 'c': 5, 'd': 6, 'e': 7} >>> a = dict(a=1, b=dict(c=2, d=3)) >>> b = dict(a=2, b=dict(c=3, e=6)) >>> dict_union(a, b) {'a': 2, 'b': {'c': 3, 'd': 3, 'e': 6}} """ result: Dict = dict_factory() if not dicts: return result assert len(dicts) >= 1 all_keys: Set[str] = set() all_keys.update(*dicts) all_keys = sorted(all_keys) # Create a neat generator of generators, to save some memory. all_values: Iterable[Tuple[V, Iterable[K]]] = ( (k, (d[k] for d in dicts if k in d)) for k in all_keys ) for k, values in all_values: sub_dicts: List[Dict] = [] new_value: V = None n_values = 0 for v in values: if isinstance(v, dict) and recurse: sub_dicts.append(v) else: # Overwrite the new value for that key. new_value = v n_values += 1 if len(sub_dicts) == n_values and recurse: # We only get here if all values for key `k` were dictionaries, # and if recurse was True. new_value = dict_union(*sub_dicts, recurse=True, dict_factory=dict_factory) result[k] = new_value return result K = TypeVar("K") V = TypeVar("V") M = TypeVar("M") def zip_dicts(*dicts: Dict[K, V], missing: M = None) -> Iterable[Tuple[K, Tuple[Union[M, V], ...]]]: """Iterator over the union of all keys, giving the value from each dict if present, else `missing`. """ # If any attributes are common to both the Experiment and the State, # copy them over to the Experiment. keys = set(itertools.chain(*dicts)) for key in keys: yield (key, tuple(d.get(key, missing) for d in dicts)) def dict_intersection(*dicts: Dict[K, V]) -> Iterable[Tuple[K, Tuple[V, ...]]]: """Gives back an iterator over the keys and values common to all dicts.""" dicts = [dict(d.items()) for d in dicts] common_keys = set(dicts[0]) for d in dicts: common_keys.intersection_update(d) for key in common_keys: yield (key, tuple(d[key] for d in dicts)) def try_get(d: Dict[K, V], *keys: K, default: V = None) -> Optional[V]: for k in keys: try: return d[k] except KeyError: pass return default def remove_suffix(s: str, suffix: str) -> str: """Remove the suffix from string s if present. Doing this manually until we start using python 3.9. >>> remove_suffix("bob.com", ".com") 'bob' >>> remove_suffix("Henrietta", "match") 'Henrietta' """ i = s.rfind(suffix) if i == -1: # return s if not found. return s return s[:i] def remove_prefix(s: str, prefix: str) -> str: """Remove the prefix from string s if present. Doing this manually until we start using python 3.9. >>> remove_prefix("bob.com", "bo") 'b.com' >>> remove_prefix("Henrietta", "match") 'Henrietta' """ if not s.startswith(prefix): return s return s[len(prefix) :] def get_all_subclasses_of(cls: Type[T]) -> Iterable[Type[T]]: scope_dict: Dict = globals() for name, var in scope_dict.items(): if isclass(var) and issubclass(var, cls): yield var def get_all_concrete_subclasses_of(cls: Type[T]) -> Iterable[Type[T]]: yield from filterfalse(inspect.isabstract, get_all_subclasses_of(cls)) def get_path_to_source_file(cls: Type) -> Path: """Attempts to give a relative path to the given source path. If not possible, then gives back an absolute path to the source file instead. """ cwd = Path.cwd() source_file = getsourcefile(cls) assert isinstance(source_file, str), f"can't locate source file for {cls}?" source_path = Path(source_file).absolute() try: return source_path.relative_to(cwd) except ValueError: # If we can't find the relative path, for instance when sequoia is # installed in site_packages (not with `pip install -e .``), give back # the absolute path instead. return source_path def constant_property(fixed_value: T) -> T: def constant_field(v: T, **kwargs) -> T: metadata = kwargs.setdefault("metadata", {}) metadata["constant"] = v metadata["decoding_fn"] = lambda _: v metadata["to_dict"] = lambda _: v return field(default=v, init=False, **kwargs) def setter(_, value: Any): if isinstance(value, property): # This happens in the __init__ that is generated by dataclasses, so we # do nothing here. pass elif value != fixed_value: raise RuntimeError(RuntimeWarning(f"This attribute is fixed at value {fixed_value}.")) def getter(_) -> T: return fixed_value return property(fget=getter, fset=setter) def deprecated_property(old_name: str, new_name: str): """Marks a property as being deprecated, redirectly any changes to its value to the property with name 'new_name'. """ def setter(self, value: Any): warnings.warn( DeprecationWarning(f"'{old_name}' property is deprecated, use '{new_name}' instead."), category=DeprecationWarning, stacklevel=2, ) if isinstance(value, property): # This happens in the __init__ that is generated by dataclasses, so we # do nothing here. pass else: setattr(self, new_name, value) # raise RuntimeError(f"'{old_name}' property is deprecated, use '{new_name}' instead.") def getter(self): warnings.warn( DeprecationWarning(f"'{old_name}' property is deprecated, use '{new_name}' instead."), category=DeprecationWarning, stacklevel=2, ) return getattr(self, new_name) doc = f"Deprecated property, Please use '{new_name}' instead." return property(fget=getter, fset=setter, doc=doc) if __name__ == "__main__": import doctest doctest.testmod() ================================================ FILE: setup.cfg ================================================ [versioneer] VCS=git style=pep440-post versionfile_source=sequoia/_version.py versionfile_build=sequoia/_version.py tag_prefix=v parentdir_prefix=sequoia- [metadata] license_file=LICENSE ================================================ FILE: setup.py ================================================ import os from typing import Dict, List, Union from setuptools import find_packages, setup import versioneer with open(os.path.join(os.path.dirname(__file__), "requirements.txt"), "r") as file: lines = [ln.strip() for ln in file.readlines()] packages_to_export = find_packages(where=".", exclude=["tests*", "examples*"], include="sequoia*") required_packages = [line for line in lines if line and not line.startswith("#")] extras_require: Dict[str, Union[str, List[str]]] = { "monsterkong": [ "meta_monsterkong @ git+https://github.com/lebrice/MetaMonsterkong.git#egg=meta_monsterkong" ], "atari": ["gym[atari] @ git+https://www.github.com/lebrice/gym@easier_custom_spaces#egg=gym"], "hpo": ["orion>=0.1.15", "orion.algo.skopt>=0.1.6"], "avalanche": [ "gdown", # BUG: Avalanche needs this to download cub200 dataset. "avalanche @ git+https://github.com/ContinualAI/avalanche.git@83b3cb9a92b75a59c1b9d31fc6f0dce9436e5fc5#egg=avalanche-lib", ], # NOTE: Removing this for now, because it has very strict requirements, and includes # a lot of copy-pasted code, and doesn't really add anything compared to metaworld. # This isn't right. # "mtenv": [ # "mtenv @ git+https://github.com/facebookresearch/mtenv.git@main#egg='mtenv[metaworld]'" # ], "ctrl": "ctrl-benchmark==0.0.4", "mujoco": [ "mujoco_py", ], "metaworld": [ "metaworld @ git+https://github.com/rlworkgroup/metaworld.git@29fe5d6d95cf9ad86f63eac38db8c0aef3837994#egg=metaworld" ], "sb3": "stable-baselines3==1.2.0", } # Add-up all the optional requirements, and then remove any duplicates. extras_require["all"] = sum( [ extra_requirements if isinstance(extra_requirements, list) else [extra_requirements] for extra_requirements in extras_require.values() ], [], ) extras_require["all"] = list(set(extras_require["all"])) extras_require["no_mujoco"] = sum( [ extra_dependencies if isinstance(extra_dependencies, list) else [extra_dependencies] for extra_name, extra_dependencies in extras_require.items() if extra_name not in ["all", "mujoco", "metaworld"] ], [], ) extras_require["no_mujoco"] = list(set(extras_require["no_mujoco"])) setup( name="sequoia", version=versioneer.get_version(), cmdclass=versioneer.get_cmdclass(), description="The Research Tree - A playground for research at the intersection of Continual, Reinforcement, and Self-Supervised Learning.", url="https://github.com/lebrice/Sequoia", author="Fabrice Normandin", author_email="fabrice.normandin@gmail.com", license="GPLv3", packages=packages_to_export, extras_require=extras_require, install_requires=required_packages, python_requires=">=3.7", tests_require=["pytest"], classifiers=[ "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", "License :: OSI Approved :: GNU General Public License v3 (GPLv3)", ], entry_points={ "console_scripts": [ "sequoia = sequoia.main:main", # TODO: This entry-point is added temporarily while we redesign the # command-line API (See https://github.com/lebrice/Sequoia/issues/47) # "sequoia_sweep = sequoia.experiments.hpo_sweep:main", ], }, ) ================================================ FILE: versioneer.py ================================================ # Version: 0.19 """The Versioneer - like a rocketeer, but for versions. The Versioneer ============== * like a rocketeer, but for versions! * https://github.com/python-versioneer/python-versioneer * Brian Warner * License: Public Domain * Compatible with: Python 3.6, 3.7, 3.8, 3.9 and pypy3 * [![Latest Version][pypi-image]][pypi-url] * [![Build Status][travis-image]][travis-url] This is a tool for managing a recorded version number in distutils-based python projects. The goal is to remove the tedious and error-prone "update the embedded version string" step from your release process. Making a new release should be as easy as recording a new tag in your version-control system, and maybe making new tarballs. ## Quick Install * `pip install versioneer` to somewhere in your $PATH * add a `[versioneer]` section to your setup.cfg (see [Install](INSTALL.md)) * run `versioneer install` in your source tree, commit the results * Verify version information with `python setup.py version` ## Version Identifiers Source trees come from a variety of places: * a version-control system checkout (mostly used by developers) * a nightly tarball, produced by build automation * a snapshot tarball, produced by a web-based VCS browser, like github's "tarball from tag" feature * a release tarball, produced by "setup.py sdist", distributed through PyPI Within each source tree, the version identifier (either a string or a number, this tool is format-agnostic) can come from a variety of places: * ask the VCS tool itself, e.g. "git describe" (for checkouts), which knows about recent "tags" and an absolute revision-id * the name of the directory into which the tarball was unpacked * an expanded VCS keyword ($Id$, etc) * a `_version.py` created by some earlier build step For released software, the version identifier is closely related to a VCS tag. Some projects use tag names that include more than just the version string (e.g. "myproject-1.2" instead of just "1.2"), in which case the tool needs to strip the tag prefix to extract the version identifier. For unreleased software (between tags), the version identifier should provide enough information to help developers recreate the same tree, while also giving them an idea of roughly how old the tree is (after version 1.2, before version 1.3). Many VCS systems can report a description that captures this, for example `git describe --tags --dirty --always` reports things like "0.7-1-g574ab98-dirty" to indicate that the checkout is one revision past the 0.7 tag, has a unique revision id of "574ab98", and is "dirty" (it has uncommitted changes). The version identifier is used for multiple purposes: * to allow the module to self-identify its version: `myproject.__version__` * to choose a name and prefix for a 'setup.py sdist' tarball ## Theory of Operation Versioneer works by adding a special `_version.py` file into your source tree, where your `__init__.py` can import it. This `_version.py` knows how to dynamically ask the VCS tool for version information at import time. `_version.py` also contains `$Revision$` markers, and the installation process marks `_version.py` to have this marker rewritten with a tag name during the `git archive` command. As a result, generated tarballs will contain enough information to get the proper version. To allow `setup.py` to compute a version too, a `versioneer.py` is added to the top level of your source tree, next to `setup.py` and the `setup.cfg` that configures it. This overrides several distutils/setuptools commands to compute the version when invoked, and changes `setup.py build` and `setup.py sdist` to replace `_version.py` with a small static file that contains just the generated version data. ## Installation See [INSTALL.md](./INSTALL.md) for detailed installation instructions. ## Version-String Flavors Code which uses Versioneer can learn about its version string at runtime by importing `_version` from your main `__init__.py` file and running the `get_versions()` function. From the "outside" (e.g. in `setup.py`), you can import the top-level `versioneer.py` and run `get_versions()`. Both functions return a dictionary with different flavors of version information: * `['version']`: A condensed version string, rendered using the selected style. This is the most commonly used value for the project's version string. The default "pep440" style yields strings like `0.11`, `0.11+2.g1076c97`, or `0.11+2.g1076c97.dirty`. See the "Styles" section below for alternative styles. * `['full-revisionid']`: detailed revision identifier. For Git, this is the full SHA1 commit id, e.g. "1076c978a8d3cfc70f408fe5974aa6c092c949ac". * `['date']`: Date and time of the latest `HEAD` commit. For Git, it is the commit date in ISO 8601 format. This will be None if the date is not available. * `['dirty']`: a boolean, True if the tree has uncommitted changes. Note that this is only accurate if run in a VCS checkout, otherwise it is likely to be False or None * `['error']`: if the version string could not be computed, this will be set to a string describing the problem, otherwise it will be None. It may be useful to throw an exception in setup.py if this is set, to avoid e.g. creating tarballs with a version string of "unknown". Some variants are more useful than others. Including `full-revisionid` in a bug report should allow developers to reconstruct the exact code being tested (or indicate the presence of local changes that should be shared with the developers). `version` is suitable for display in an "about" box or a CLI `--version` output: it can be easily compared against release notes and lists of bugs fixed in various releases. The installer adds the following text to your `__init__.py` to place a basic version in `YOURPROJECT.__version__`: from ._version import get_versions __version__ = get_versions()['version'] del get_versions ## Styles The setup.cfg `style=` configuration controls how the VCS information is rendered into a version string. The default style, "pep440", produces a PEP440-compliant string, equal to the un-prefixed tag name for actual releases, and containing an additional "local version" section with more detail for in-between builds. For Git, this is TAG[+DISTANCE.gHEX[.dirty]] , using information from `git describe --tags --dirty --always`. For example "0.11+2.g1076c97.dirty" indicates that the tree is like the "1076c97" commit but has uncommitted changes (".dirty"), and that this commit is two revisions ("+2") beyond the "0.11" tag. For released software (exactly equal to a known tag), the identifier will only contain the stripped tag, e.g. "0.11". Other styles are available. See [details.md](details.md) in the Versioneer source tree for descriptions. ## Debugging Versioneer tries to avoid fatal errors: if something goes wrong, it will tend to return a version of "0+unknown". To investigate the problem, run `setup.py version`, which will run the version-lookup code in a verbose mode, and will display the full contents of `get_versions()` (including the `error` string, which may help identify what went wrong). ## Known Limitations Some situations are known to cause problems for Versioneer. This details the most significant ones. More can be found on Github [issues page](https://github.com/python-versioneer/python-versioneer/issues). ### Subprojects Versioneer has limited support for source trees in which `setup.py` is not in the root directory (e.g. `setup.py` and `.git/` are *not* siblings). The are two common reasons why `setup.py` might not be in the root: * Source trees which contain multiple subprojects, such as [Buildbot](https://github.com/buildbot/buildbot), which contains both "master" and "slave" subprojects, each with their own `setup.py`, `setup.cfg`, and `tox.ini`. Projects like these produce multiple PyPI distributions (and upload multiple independently-installable tarballs). * Source trees whose main purpose is to contain a C library, but which also provide bindings to Python (and perhaps other languages) in subdirectories. Versioneer will look for `.git` in parent directories, and most operations should get the right version string. However `pip` and `setuptools` have bugs and implementation details which frequently cause `pip install .` from a subproject directory to fail to find a correct version string (so it usually defaults to `0+unknown`). `pip install --editable .` should work correctly. `setup.py install` might work too. Pip-8.1.1 is known to have this problem, but hopefully it will get fixed in some later version. [Bug #38](https://github.com/python-versioneer/python-versioneer/issues/38) is tracking this issue. The discussion in [PR #61](https://github.com/python-versioneer/python-versioneer/pull/61) describes the issue from the Versioneer side in more detail. [pip PR#3176](https://github.com/pypa/pip/pull/3176) and [pip PR#3615](https://github.com/pypa/pip/pull/3615) contain work to improve pip to let Versioneer work correctly. Versioneer-0.16 and earlier only looked for a `.git` directory next to the `setup.cfg`, so subprojects were completely unsupported with those releases. ### Editable installs with setuptools <= 18.5 `setup.py develop` and `pip install --editable .` allow you to install a project into a virtualenv once, then continue editing the source code (and test) without re-installing after every change. "Entry-point scripts" (`setup(entry_points={"console_scripts": ..})`) are a convenient way to specify executable scripts that should be installed along with the python package. These both work as expected when using modern setuptools. When using setuptools-18.5 or earlier, however, certain operations will cause `pkg_resources.DistributionNotFound` errors when running the entrypoint script, which must be resolved by re-installing the package. This happens when the install happens with one version, then the egg_info data is regenerated while a different version is checked out. Many setup.py commands cause egg_info to be rebuilt (including `sdist`, `wheel`, and installing into a different virtualenv), so this can be surprising. [Bug #83](https://github.com/python-versioneer/python-versioneer/issues/83) describes this one, but upgrading to a newer version of setuptools should probably resolve it. ## Updating Versioneer To upgrade your project to a new release of Versioneer, do the following: * install the new Versioneer (`pip install -U versioneer` or equivalent) * edit `setup.cfg`, if necessary, to include any new configuration settings indicated by the release notes. See [UPGRADING](./UPGRADING.md) for details. * re-run `versioneer install` in your source tree, to replace `SRC/_version.py` * commit any changed files ## Future Directions This tool is designed to make it easily extended to other version-control systems: all VCS-specific components are in separate directories like src/git/ . The top-level `versioneer.py` script is assembled from these components by running make-versioneer.py . In the future, make-versioneer.py will take a VCS name as an argument, and will construct a version of `versioneer.py` that is specific to the given VCS. It might also take the configuration arguments that are currently provided manually during installation by editing setup.py . Alternatively, it might go the other direction and include code from all supported VCS systems, reducing the number of intermediate scripts. ## Similar projects * [setuptools_scm](https://github.com/pypa/setuptools_scm/) - a non-vendored build-time dependency * [minver](https://github.com/jbweston/miniver) - a lightweight reimplementation of versioneer ## License To make Versioneer easier to embed, all its code is dedicated to the public domain. The `_version.py` that it creates is also in the public domain. Specifically, both are released under the Creative Commons "Public Domain Dedication" license (CC0-1.0), as described in https://creativecommons.org/publicdomain/zero/1.0/ . [pypi-image]: https://img.shields.io/pypi/v/versioneer.svg [pypi-url]: https://pypi.python.org/pypi/versioneer/ [travis-image]: https://img.shields.io/travis/com/python-versioneer/python-versioneer.svg [travis-url]: https://travis-ci.com/github/python-versioneer/python-versioneer """ import configparser import errno import json import os import re import subprocess import sys class VersioneerConfig: """Container for Versioneer configuration parameters.""" def get_root(): """Get the project root directory. We require that all commands are run from the project root, i.e. the directory that contains setup.py, setup.cfg, and versioneer.py . """ root = os.path.realpath(os.path.abspath(os.getcwd())) setup_py = os.path.join(root, "setup.py") versioneer_py = os.path.join(root, "versioneer.py") if not (os.path.exists(setup_py) or os.path.exists(versioneer_py)): # allow 'python path/to/setup.py COMMAND' root = os.path.dirname(os.path.realpath(os.path.abspath(sys.argv[0]))) setup_py = os.path.join(root, "setup.py") versioneer_py = os.path.join(root, "versioneer.py") if not (os.path.exists(setup_py) or os.path.exists(versioneer_py)): err = ( "Versioneer was unable to run the project root directory. " "Versioneer requires setup.py to be executed from " "its immediate directory (like 'python setup.py COMMAND'), " "or in a way that lets it use sys.argv[0] to find the root " "(like 'python path/to/setup.py COMMAND')." ) raise VersioneerBadRootError(err) try: # Certain runtime workflows (setup.py install/develop in a setuptools # tree) execute all dependencies in a single python process, so # "versioneer" may be imported multiple times, and python's shared # module-import table will cache the first one. So we can't use # os.path.dirname(__file__), as that will find whichever # versioneer.py was first imported, even in later projects. me = os.path.realpath(os.path.abspath(__file__)) me_dir = os.path.normcase(os.path.splitext(me)[0]) vsr_dir = os.path.normcase(os.path.splitext(versioneer_py)[0]) if me_dir != vsr_dir: print( "Warning: build in %s is using versioneer.py from %s" % (os.path.dirname(me), versioneer_py) ) except NameError: pass return root def get_config_from_root(root): """Read the project setup.cfg file to determine Versioneer config.""" # This might raise EnvironmentError (if setup.cfg is missing), or # configparser.NoSectionError (if it lacks a [versioneer] section), or # configparser.NoOptionError (if it lacks "VCS="). See the docstring at # the top of versioneer.py for instructions on writing your setup.cfg . setup_cfg = os.path.join(root, "setup.cfg") parser = configparser.ConfigParser() with open(setup_cfg, "r") as f: parser.read_file(f) VCS = parser.get("versioneer", "VCS") # mandatory def get(parser, name): if parser.has_option("versioneer", name): return parser.get("versioneer", name) return None cfg = VersioneerConfig() cfg.VCS = VCS cfg.style = get(parser, "style") or "" cfg.versionfile_source = get(parser, "versionfile_source") cfg.versionfile_build = get(parser, "versionfile_build") cfg.tag_prefix = get(parser, "tag_prefix") if cfg.tag_prefix in ("''", '""'): cfg.tag_prefix = "" cfg.parentdir_prefix = get(parser, "parentdir_prefix") cfg.verbose = get(parser, "verbose") return cfg class NotThisMethod(Exception): """Exception raised if a method is not valid for the current scenario.""" # these dictionaries contain VCS-specific tools LONG_VERSION_PY = {} HANDLERS = {} def register_vcs_handler(vcs, method): # decorator """Create decorator to mark a method as the handler of a VCS.""" def decorate(f): """Store f in HANDLERS[vcs][method].""" if vcs not in HANDLERS: HANDLERS[vcs] = {} HANDLERS[vcs][method] = f return f return decorate def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, env=None): """Call the given command(s).""" assert isinstance(commands, list) p = None for c in commands: try: dispcmd = str([c] + args) # remember shell=False, so use git.cmd on windows, not just git p = subprocess.Popen( [c] + args, cwd=cwd, env=env, stdout=subprocess.PIPE, stderr=(subprocess.PIPE if hide_stderr else None), ) break except EnvironmentError: e = sys.exc_info()[1] if e.errno == errno.ENOENT: continue if verbose: print("unable to run %s" % dispcmd) print(e) return None, None else: if verbose: print("unable to find command, tried %s" % (commands,)) return None, None stdout = p.communicate()[0].strip().decode() if p.returncode != 0: if verbose: print("unable to run %s (error)" % dispcmd) print("stdout was %s" % stdout) return None, p.returncode return stdout, p.returncode LONG_VERSION_PY[ "git" ] = r''' # This file helps to compute a version number in source trees obtained from # git-archive tarball (such as those provided by githubs download-from-tag # feature). Distribution tarballs (built by setup.py sdist) and build # directories (produced by setup.py build) will contain a much shorter file # that just contains the computed version number. # This file is released into the public domain. Generated by # versioneer-0.19 (https://github.com/python-versioneer/python-versioneer) """Git implementation of _version.py.""" import errno import os import re import subprocess import sys def get_keywords(): """Get the keywords needed to look up the version information.""" # these strings will be replaced by git during git-archive. # setup.py/versioneer.py will grep for the variable names, so they must # each be defined on a line of their own. _version.py will just call # get_keywords(). git_refnames = "%(DOLLAR)sFormat:%%d%(DOLLAR)s" git_full = "%(DOLLAR)sFormat:%%H%(DOLLAR)s" git_date = "%(DOLLAR)sFormat:%%ci%(DOLLAR)s" keywords = {"refnames": git_refnames, "full": git_full, "date": git_date} return keywords class VersioneerConfig: """Container for Versioneer configuration parameters.""" def get_config(): """Create, populate and return the VersioneerConfig() object.""" # these strings are filled in when 'setup.py versioneer' creates # _version.py cfg = VersioneerConfig() cfg.VCS = "git" cfg.style = "%(STYLE)s" cfg.tag_prefix = "%(TAG_PREFIX)s" cfg.parentdir_prefix = "%(PARENTDIR_PREFIX)s" cfg.versionfile_source = "%(VERSIONFILE_SOURCE)s" cfg.verbose = False return cfg class NotThisMethod(Exception): """Exception raised if a method is not valid for the current scenario.""" LONG_VERSION_PY = {} HANDLERS = {} def register_vcs_handler(vcs, method): # decorator """Create decorator to mark a method as the handler of a VCS.""" def decorate(f): """Store f in HANDLERS[vcs][method].""" if vcs not in HANDLERS: HANDLERS[vcs] = {} HANDLERS[vcs][method] = f return f return decorate def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, env=None): """Call the given command(s).""" assert isinstance(commands, list) p = None for c in commands: try: dispcmd = str([c] + args) # remember shell=False, so use git.cmd on windows, not just git p = subprocess.Popen([c] + args, cwd=cwd, env=env, stdout=subprocess.PIPE, stderr=(subprocess.PIPE if hide_stderr else None)) break except EnvironmentError: e = sys.exc_info()[1] if e.errno == errno.ENOENT: continue if verbose: print("unable to run %%s" %% dispcmd) print(e) return None, None else: if verbose: print("unable to find command, tried %%s" %% (commands,)) return None, None stdout = p.communicate()[0].strip().decode() if p.returncode != 0: if verbose: print("unable to run %%s (error)" %% dispcmd) print("stdout was %%s" %% stdout) return None, p.returncode return stdout, p.returncode def versions_from_parentdir(parentdir_prefix, root, verbose): """Try to determine the version from the parent directory name. Source tarballs conventionally unpack into a directory that includes both the project name and a version string. We will also support searching up two directory levels for an appropriately named parent directory """ rootdirs = [] for i in range(3): dirname = os.path.basename(root) if dirname.startswith(parentdir_prefix): return {"version": dirname[len(parentdir_prefix):], "full-revisionid": None, "dirty": False, "error": None, "date": None} else: rootdirs.append(root) root = os.path.dirname(root) # up a level if verbose: print("Tried directories %%s but none started with prefix %%s" %% (str(rootdirs), parentdir_prefix)) raise NotThisMethod("rootdir doesn't start with parentdir_prefix") @register_vcs_handler("git", "get_keywords") def git_get_keywords(versionfile_abs): """Extract version information from the given file.""" # the code embedded in _version.py can just fetch the value of these # keywords. When used from setup.py, we don't want to import _version.py, # so we do it with a regexp instead. This function is not used from # _version.py. keywords = {} try: f = open(versionfile_abs, "r") for line in f.readlines(): if line.strip().startswith("git_refnames ="): mo = re.search(r'=\s*"(.*)"', line) if mo: keywords["refnames"] = mo.group(1) if line.strip().startswith("git_full ="): mo = re.search(r'=\s*"(.*)"', line) if mo: keywords["full"] = mo.group(1) if line.strip().startswith("git_date ="): mo = re.search(r'=\s*"(.*)"', line) if mo: keywords["date"] = mo.group(1) f.close() except EnvironmentError: pass return keywords @register_vcs_handler("git", "keywords") def git_versions_from_keywords(keywords, tag_prefix, verbose): """Get version information from git keywords.""" if not keywords: raise NotThisMethod("no keywords at all, weird") date = keywords.get("date") if date is not None: # Use only the last line. Previous lines may contain GPG signature # information. date = date.splitlines()[-1] # git-2.2.0 added "%%cI", which expands to an ISO-8601 -compliant # datestamp. However we prefer "%%ci" (which expands to an "ISO-8601 # -like" string, which we must then edit to make compliant), because # it's been around since git-1.5.3, and it's too difficult to # discover which version we're using, or to work around using an # older one. date = date.strip().replace(" ", "T", 1).replace(" ", "", 1) refnames = keywords["refnames"].strip() if refnames.startswith("$Format"): if verbose: print("keywords are unexpanded, not using") raise NotThisMethod("unexpanded keywords, not a git-archive tarball") refs = set([r.strip() for r in refnames.strip("()").split(",")]) # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of # just "foo-1.0". If we see a "tag: " prefix, prefer those. TAG = "tag: " tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)]) if not tags: # Either we're using git < 1.8.3, or there really are no tags. We use # a heuristic: assume all version tags have a digit. The old git %%d # expansion behaves like git log --decorate=short and strips out the # refs/heads/ and refs/tags/ prefixes that would let us distinguish # between branches and tags. By ignoring refnames without digits, we # filter out many common branch names like "release" and # "stabilization", as well as "HEAD" and "master". tags = set([r for r in refs if re.search(r'\d', r)]) if verbose: print("discarding '%%s', no digits" %% ",".join(refs - tags)) if verbose: print("likely tags: %%s" %% ",".join(sorted(tags))) for ref in sorted(tags): # sorting will prefer e.g. "2.0" over "2.0rc1" if ref.startswith(tag_prefix): r = ref[len(tag_prefix):] if verbose: print("picking %%s" %% r) return {"version": r, "full-revisionid": keywords["full"].strip(), "dirty": False, "error": None, "date": date} # no suitable tags, so version is "0+unknown", but full hex is still there if verbose: print("no suitable tags, using unknown + full revision id") return {"version": "0+unknown", "full-revisionid": keywords["full"].strip(), "dirty": False, "error": "no suitable tags", "date": None} @register_vcs_handler("git", "pieces_from_vcs") def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): """Get version from 'git describe' in the root of the source tree. This only gets called if the git-archive 'subst' keywords were *not* expanded, and _version.py hasn't already been rewritten with a short version string, meaning we're inside a checked out source tree. """ GITS = ["git"] if sys.platform == "win32": GITS = ["git.cmd", "git.exe"] out, rc = run_command(GITS, ["rev-parse", "--git-dir"], cwd=root, hide_stderr=True) if rc != 0: if verbose: print("Directory %%s not under git control" %% root) raise NotThisMethod("'git rev-parse --git-dir' returned error") # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty] # if there isn't one, this yields HEX[-dirty] (no NUM) describe_out, rc = run_command(GITS, ["describe", "--tags", "--dirty", "--always", "--long", "--match", "%%s*" %% tag_prefix], cwd=root) # --long was added in git-1.5.5 if describe_out is None: raise NotThisMethod("'git describe' failed") describe_out = describe_out.strip() full_out, rc = run_command(GITS, ["rev-parse", "HEAD"], cwd=root) if full_out is None: raise NotThisMethod("'git rev-parse' failed") full_out = full_out.strip() pieces = {} pieces["long"] = full_out pieces["short"] = full_out[:7] # maybe improved later pieces["error"] = None # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty] # TAG might have hyphens. git_describe = describe_out # look for -dirty suffix dirty = git_describe.endswith("-dirty") pieces["dirty"] = dirty if dirty: git_describe = git_describe[:git_describe.rindex("-dirty")] # now we have TAG-NUM-gHEX or HEX if "-" in git_describe: # TAG-NUM-gHEX mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe) if not mo: # unparseable. Maybe git-describe is misbehaving? pieces["error"] = ("unable to parse git-describe output: '%%s'" %% describe_out) return pieces # tag full_tag = mo.group(1) if not full_tag.startswith(tag_prefix): if verbose: fmt = "tag '%%s' doesn't start with prefix '%%s'" print(fmt %% (full_tag, tag_prefix)) pieces["error"] = ("tag '%%s' doesn't start with prefix '%%s'" %% (full_tag, tag_prefix)) return pieces pieces["closest-tag"] = full_tag[len(tag_prefix):] # distance: number of commits since tag pieces["distance"] = int(mo.group(2)) # commit: short hex revision ID pieces["short"] = mo.group(3) else: # HEX: no tags pieces["closest-tag"] = None count_out, rc = run_command(GITS, ["rev-list", "HEAD", "--count"], cwd=root) pieces["distance"] = int(count_out) # total number of commits # commit date: see ISO-8601 comment in git_versions_from_keywords() date = run_command(GITS, ["show", "-s", "--format=%%ci", "HEAD"], cwd=root)[0].strip() # Use only the last line. Previous lines may contain GPG signature # information. date = date.splitlines()[-1] pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1) return pieces def plus_or_dot(pieces): """Return a + if we don't already have one, else return a .""" if "+" in pieces.get("closest-tag", ""): return "." return "+" def render_pep440(pieces): """Build up version string, with post-release "local version identifier". Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty Exceptions: 1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty] """ if pieces["closest-tag"]: rendered = pieces["closest-tag"] if pieces["distance"] or pieces["dirty"]: rendered += plus_or_dot(pieces) rendered += "%%d.g%%s" %% (pieces["distance"], pieces["short"]) if pieces["dirty"]: rendered += ".dirty" else: # exception #1 rendered = "0+untagged.%%d.g%%s" %% (pieces["distance"], pieces["short"]) if pieces["dirty"]: rendered += ".dirty" return rendered def render_pep440_pre(pieces): """TAG[.post0.devDISTANCE] -- No -dirty. Exceptions: 1: no tags. 0.post0.devDISTANCE """ if pieces["closest-tag"]: rendered = pieces["closest-tag"] if pieces["distance"]: rendered += ".post0.dev%%d" %% pieces["distance"] else: # exception #1 rendered = "0.post0.dev%%d" %% pieces["distance"] return rendered def render_pep440_post(pieces): """TAG[.postDISTANCE[.dev0]+gHEX] . The ".dev0" means dirty. Note that .dev0 sorts backwards (a dirty tree will appear "older" than the corresponding clean one), but you shouldn't be releasing software with -dirty anyways. Exceptions: 1: no tags. 0.postDISTANCE[.dev0] """ if pieces["closest-tag"]: rendered = pieces["closest-tag"] if pieces["distance"] or pieces["dirty"]: rendered += ".post%%d" %% pieces["distance"] if pieces["dirty"]: rendered += ".dev0" rendered += plus_or_dot(pieces) rendered += "g%%s" %% pieces["short"] else: # exception #1 rendered = "0.post%%d" %% pieces["distance"] if pieces["dirty"]: rendered += ".dev0" rendered += "+g%%s" %% pieces["short"] return rendered def render_pep440_old(pieces): """TAG[.postDISTANCE[.dev0]] . The ".dev0" means dirty. Exceptions: 1: no tags. 0.postDISTANCE[.dev0] """ if pieces["closest-tag"]: rendered = pieces["closest-tag"] if pieces["distance"] or pieces["dirty"]: rendered += ".post%%d" %% pieces["distance"] if pieces["dirty"]: rendered += ".dev0" else: # exception #1 rendered = "0.post%%d" %% pieces["distance"] if pieces["dirty"]: rendered += ".dev0" return rendered def render_git_describe(pieces): """TAG[-DISTANCE-gHEX][-dirty]. Like 'git describe --tags --dirty --always'. Exceptions: 1: no tags. HEX[-dirty] (note: no 'g' prefix) """ if pieces["closest-tag"]: rendered = pieces["closest-tag"] if pieces["distance"]: rendered += "-%%d-g%%s" %% (pieces["distance"], pieces["short"]) else: # exception #1 rendered = pieces["short"] if pieces["dirty"]: rendered += "-dirty" return rendered def render_git_describe_long(pieces): """TAG-DISTANCE-gHEX[-dirty]. Like 'git describe --tags --dirty --always -long'. The distance/hash is unconditional. Exceptions: 1: no tags. HEX[-dirty] (note: no 'g' prefix) """ if pieces["closest-tag"]: rendered = pieces["closest-tag"] rendered += "-%%d-g%%s" %% (pieces["distance"], pieces["short"]) else: # exception #1 rendered = pieces["short"] if pieces["dirty"]: rendered += "-dirty" return rendered def render(pieces, style): """Render the given version pieces into the requested style.""" if pieces["error"]: return {"version": "unknown", "full-revisionid": pieces.get("long"), "dirty": None, "error": pieces["error"], "date": None} if not style or style == "default": style = "pep440" # the default if style == "pep440": rendered = render_pep440(pieces) elif style == "pep440-pre": rendered = render_pep440_pre(pieces) elif style == "pep440-post": rendered = render_pep440_post(pieces) elif style == "pep440-old": rendered = render_pep440_old(pieces) elif style == "git-describe": rendered = render_git_describe(pieces) elif style == "git-describe-long": rendered = render_git_describe_long(pieces) else: raise ValueError("unknown style '%%s'" %% style) return {"version": rendered, "full-revisionid": pieces["long"], "dirty": pieces["dirty"], "error": None, "date": pieces.get("date")} def get_versions(): """Get version information or return default if unable to do so.""" # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have # __file__, we can work backwards from there to the root. Some # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which # case we can only use expanded keywords. cfg = get_config() verbose = cfg.verbose try: return git_versions_from_keywords(get_keywords(), cfg.tag_prefix, verbose) except NotThisMethod: pass try: root = os.path.realpath(__file__) # versionfile_source is the relative path from the top of the source # tree (where the .git directory might live) to this file. Invert # this to find the root from __file__. for i in cfg.versionfile_source.split('/'): root = os.path.dirname(root) except NameError: return {"version": "0+unknown", "full-revisionid": None, "dirty": None, "error": "unable to find root of source tree", "date": None} try: pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose) return render(pieces, cfg.style) except NotThisMethod: pass try: if cfg.parentdir_prefix: return versions_from_parentdir(cfg.parentdir_prefix, root, verbose) except NotThisMethod: pass return {"version": "0+unknown", "full-revisionid": None, "dirty": None, "error": "unable to compute version", "date": None} ''' @register_vcs_handler("git", "get_keywords") def git_get_keywords(versionfile_abs): """Extract version information from the given file.""" # the code embedded in _version.py can just fetch the value of these # keywords. When used from setup.py, we don't want to import _version.py, # so we do it with a regexp instead. This function is not used from # _version.py. keywords = {} try: f = open(versionfile_abs, "r") for line in f.readlines(): if line.strip().startswith("git_refnames ="): mo = re.search(r'=\s*"(.*)"', line) if mo: keywords["refnames"] = mo.group(1) if line.strip().startswith("git_full ="): mo = re.search(r'=\s*"(.*)"', line) if mo: keywords["full"] = mo.group(1) if line.strip().startswith("git_date ="): mo = re.search(r'=\s*"(.*)"', line) if mo: keywords["date"] = mo.group(1) f.close() except EnvironmentError: pass return keywords @register_vcs_handler("git", "keywords") def git_versions_from_keywords(keywords, tag_prefix, verbose): """Get version information from git keywords.""" if not keywords: raise NotThisMethod("no keywords at all, weird") date = keywords.get("date") if date is not None: # Use only the last line. Previous lines may contain GPG signature # information. date = date.splitlines()[-1] # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant # datestamp. However we prefer "%ci" (which expands to an "ISO-8601 # -like" string, which we must then edit to make compliant), because # it's been around since git-1.5.3, and it's too difficult to # discover which version we're using, or to work around using an # older one. date = date.strip().replace(" ", "T", 1).replace(" ", "", 1) refnames = keywords["refnames"].strip() if refnames.startswith("$Format"): if verbose: print("keywords are unexpanded, not using") raise NotThisMethod("unexpanded keywords, not a git-archive tarball") refs = set([r.strip() for r in refnames.strip("()").split(",")]) # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of # just "foo-1.0". If we see a "tag: " prefix, prefer those. TAG = "tag: " tags = set([r[len(TAG) :] for r in refs if r.startswith(TAG)]) if not tags: # Either we're using git < 1.8.3, or there really are no tags. We use # a heuristic: assume all version tags have a digit. The old git %d # expansion behaves like git log --decorate=short and strips out the # refs/heads/ and refs/tags/ prefixes that would let us distinguish # between branches and tags. By ignoring refnames without digits, we # filter out many common branch names like "release" and # "stabilization", as well as "HEAD" and "master". tags = set([r for r in refs if re.search(r"\d", r)]) if verbose: print("discarding '%s', no digits" % ",".join(refs - tags)) if verbose: print("likely tags: %s" % ",".join(sorted(tags))) for ref in sorted(tags): # sorting will prefer e.g. "2.0" over "2.0rc1" if ref.startswith(tag_prefix): r = ref[len(tag_prefix) :] if verbose: print("picking %s" % r) return { "version": r, "full-revisionid": keywords["full"].strip(), "dirty": False, "error": None, "date": date, } # no suitable tags, so version is "0+unknown", but full hex is still there if verbose: print("no suitable tags, using unknown + full revision id") return { "version": "0+unknown", "full-revisionid": keywords["full"].strip(), "dirty": False, "error": "no suitable tags", "date": None, } @register_vcs_handler("git", "pieces_from_vcs") def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): """Get version from 'git describe' in the root of the source tree. This only gets called if the git-archive 'subst' keywords were *not* expanded, and _version.py hasn't already been rewritten with a short version string, meaning we're inside a checked out source tree. """ GITS = ["git"] if sys.platform == "win32": GITS = ["git.cmd", "git.exe"] out, rc = run_command(GITS, ["rev-parse", "--git-dir"], cwd=root, hide_stderr=True) if rc != 0: if verbose: print("Directory %s not under git control" % root) raise NotThisMethod("'git rev-parse --git-dir' returned error") # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty] # if there isn't one, this yields HEX[-dirty] (no NUM) describe_out, rc = run_command( GITS, ["describe", "--tags", "--dirty", "--always", "--long", "--match", "%s*" % tag_prefix], cwd=root, ) # --long was added in git-1.5.5 if describe_out is None: raise NotThisMethod("'git describe' failed") describe_out = describe_out.strip() full_out, rc = run_command(GITS, ["rev-parse", "HEAD"], cwd=root) if full_out is None: raise NotThisMethod("'git rev-parse' failed") full_out = full_out.strip() pieces = {} pieces["long"] = full_out pieces["short"] = full_out[:7] # maybe improved later pieces["error"] = None # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty] # TAG might have hyphens. git_describe = describe_out # look for -dirty suffix dirty = git_describe.endswith("-dirty") pieces["dirty"] = dirty if dirty: git_describe = git_describe[: git_describe.rindex("-dirty")] # now we have TAG-NUM-gHEX or HEX if "-" in git_describe: # TAG-NUM-gHEX mo = re.search(r"^(.+)-(\d+)-g([0-9a-f]+)$", git_describe) if not mo: # unparseable. Maybe git-describe is misbehaving? pieces["error"] = "unable to parse git-describe output: '%s'" % describe_out return pieces # tag full_tag = mo.group(1) if not full_tag.startswith(tag_prefix): if verbose: fmt = "tag '%s' doesn't start with prefix '%s'" print(fmt % (full_tag, tag_prefix)) pieces["error"] = "tag '%s' doesn't start with prefix '%s'" % (full_tag, tag_prefix) return pieces pieces["closest-tag"] = full_tag[len(tag_prefix) :] # distance: number of commits since tag pieces["distance"] = int(mo.group(2)) # commit: short hex revision ID pieces["short"] = mo.group(3) else: # HEX: no tags pieces["closest-tag"] = None count_out, rc = run_command(GITS, ["rev-list", "HEAD", "--count"], cwd=root) pieces["distance"] = int(count_out) # total number of commits # commit date: see ISO-8601 comment in git_versions_from_keywords() date = run_command(GITS, ["show", "-s", "--format=%ci", "HEAD"], cwd=root)[0].strip() # Use only the last line. Previous lines may contain GPG signature # information. date = date.splitlines()[-1] pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1) return pieces def do_vcs_install(manifest_in, versionfile_source, ipy): """Git-specific installation logic for Versioneer. For Git, this means creating/changing .gitattributes to mark _version.py for export-subst keyword substitution. """ GITS = ["git"] if sys.platform == "win32": GITS = ["git.cmd", "git.exe"] files = [manifest_in, versionfile_source] if ipy: files.append(ipy) try: me = __file__ if me.endswith(".pyc") or me.endswith(".pyo"): me = os.path.splitext(me)[0] + ".py" versioneer_file = os.path.relpath(me) except NameError: versioneer_file = "versioneer.py" files.append(versioneer_file) present = False try: f = open(".gitattributes", "r") for line in f.readlines(): if line.strip().startswith(versionfile_source): if "export-subst" in line.strip().split()[1:]: present = True f.close() except EnvironmentError: pass if not present: f = open(".gitattributes", "a+") f.write("%s export-subst\n" % versionfile_source) f.close() files.append(".gitattributes") run_command(GITS, ["add", "--"] + files) def versions_from_parentdir(parentdir_prefix, root, verbose): """Try to determine the version from the parent directory name. Source tarballs conventionally unpack into a directory that includes both the project name and a version string. We will also support searching up two directory levels for an appropriately named parent directory """ rootdirs = [] for i in range(3): dirname = os.path.basename(root) if dirname.startswith(parentdir_prefix): return { "version": dirname[len(parentdir_prefix) :], "full-revisionid": None, "dirty": False, "error": None, "date": None, } else: rootdirs.append(root) root = os.path.dirname(root) # up a level if verbose: print( "Tried directories %s but none started with prefix %s" % (str(rootdirs), parentdir_prefix) ) raise NotThisMethod("rootdir doesn't start with parentdir_prefix") SHORT_VERSION_PY = """ # This file was generated by 'versioneer.py' (0.19) from # revision-control system data, or from the parent directory name of an # unpacked source archive. Distribution tarballs contain a pre-generated copy # of this file. import json version_json = ''' %s ''' # END VERSION_JSON def get_versions(): return json.loads(version_json) """ def versions_from_file(filename): """Try to determine the version from _version.py if present.""" try: with open(filename) as f: contents = f.read() except EnvironmentError: raise NotThisMethod("unable to read _version.py") mo = re.search(r"version_json = '''\n(.*)''' # END VERSION_JSON", contents, re.M | re.S) if not mo: mo = re.search(r"version_json = '''\r\n(.*)''' # END VERSION_JSON", contents, re.M | re.S) if not mo: raise NotThisMethod("no version_json in _version.py") return json.loads(mo.group(1)) def write_to_version_file(filename, versions): """Write the given version number to the given _version.py file.""" os.unlink(filename) contents = json.dumps(versions, sort_keys=True, indent=1, separators=(",", ": ")) with open(filename, "w") as f: f.write(SHORT_VERSION_PY % contents) print("set %s to '%s'" % (filename, versions["version"])) def plus_or_dot(pieces): """Return a + if we don't already have one, else return a .""" if "+" in pieces.get("closest-tag", ""): return "." return "+" def render_pep440(pieces): """Build up version string, with post-release "local version identifier". Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty Exceptions: 1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty] """ if pieces["closest-tag"]: rendered = pieces["closest-tag"] if pieces["distance"] or pieces["dirty"]: rendered += plus_or_dot(pieces) rendered += "%d.g%s" % (pieces["distance"], pieces["short"]) if pieces["dirty"]: rendered += ".dirty" else: # exception #1 rendered = "0+untagged.%d.g%s" % (pieces["distance"], pieces["short"]) if pieces["dirty"]: rendered += ".dirty" return rendered def render_pep440_pre(pieces): """TAG[.post0.devDISTANCE] -- No -dirty. Exceptions: 1: no tags. 0.post0.devDISTANCE """ if pieces["closest-tag"]: rendered = pieces["closest-tag"] if pieces["distance"]: rendered += ".post0.dev%d" % pieces["distance"] else: # exception #1 rendered = "0.post0.dev%d" % pieces["distance"] return rendered def render_pep440_post(pieces): """TAG[.postDISTANCE[.dev0]+gHEX] . The ".dev0" means dirty. Note that .dev0 sorts backwards (a dirty tree will appear "older" than the corresponding clean one), but you shouldn't be releasing software with -dirty anyways. Exceptions: 1: no tags. 0.postDISTANCE[.dev0] """ if pieces["closest-tag"]: rendered = pieces["closest-tag"] if pieces["distance"] or pieces["dirty"]: rendered += ".post%d" % pieces["distance"] if pieces["dirty"]: rendered += ".dev0" rendered += plus_or_dot(pieces) rendered += "g%s" % pieces["short"] else: # exception #1 rendered = "0.post%d" % pieces["distance"] if pieces["dirty"]: rendered += ".dev0" rendered += "+g%s" % pieces["short"] return rendered def render_pep440_old(pieces): """TAG[.postDISTANCE[.dev0]] . The ".dev0" means dirty. Exceptions: 1: no tags. 0.postDISTANCE[.dev0] """ if pieces["closest-tag"]: rendered = pieces["closest-tag"] if pieces["distance"] or pieces["dirty"]: rendered += ".post%d" % pieces["distance"] if pieces["dirty"]: rendered += ".dev0" else: # exception #1 rendered = "0.post%d" % pieces["distance"] if pieces["dirty"]: rendered += ".dev0" return rendered def render_git_describe(pieces): """TAG[-DISTANCE-gHEX][-dirty]. Like 'git describe --tags --dirty --always'. Exceptions: 1: no tags. HEX[-dirty] (note: no 'g' prefix) """ if pieces["closest-tag"]: rendered = pieces["closest-tag"] if pieces["distance"]: rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) else: # exception #1 rendered = pieces["short"] if pieces["dirty"]: rendered += "-dirty" return rendered def render_git_describe_long(pieces): """TAG-DISTANCE-gHEX[-dirty]. Like 'git describe --tags --dirty --always -long'. The distance/hash is unconditional. Exceptions: 1: no tags. HEX[-dirty] (note: no 'g' prefix) """ if pieces["closest-tag"]: rendered = pieces["closest-tag"] rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) else: # exception #1 rendered = pieces["short"] if pieces["dirty"]: rendered += "-dirty" return rendered def render(pieces, style): """Render the given version pieces into the requested style.""" if pieces["error"]: return { "version": "unknown", "full-revisionid": pieces.get("long"), "dirty": None, "error": pieces["error"], "date": None, } if not style or style == "default": style = "pep440" # the default if style == "pep440": rendered = render_pep440(pieces) elif style == "pep440-pre": rendered = render_pep440_pre(pieces) elif style == "pep440-post": rendered = render_pep440_post(pieces) elif style == "pep440-old": rendered = render_pep440_old(pieces) elif style == "git-describe": rendered = render_git_describe(pieces) elif style == "git-describe-long": rendered = render_git_describe_long(pieces) else: raise ValueError("unknown style '%s'" % style) return { "version": rendered, "full-revisionid": pieces["long"], "dirty": pieces["dirty"], "error": None, "date": pieces.get("date"), } class VersioneerBadRootError(Exception): """The project root directory is unknown or missing key files.""" def get_versions(verbose=False): """Get the project version from whatever source is available. Returns dict with two keys: 'version' and 'full'. """ if "versioneer" in sys.modules: # see the discussion in cmdclass.py:get_cmdclass() del sys.modules["versioneer"] root = get_root() cfg = get_config_from_root(root) assert cfg.VCS is not None, "please set [versioneer]VCS= in setup.cfg" handlers = HANDLERS.get(cfg.VCS) assert handlers, "unrecognized VCS '%s'" % cfg.VCS verbose = verbose or cfg.verbose assert cfg.versionfile_source is not None, "please set versioneer.versionfile_source" assert cfg.tag_prefix is not None, "please set versioneer.tag_prefix" versionfile_abs = os.path.join(root, cfg.versionfile_source) # extract version from first of: _version.py, VCS command (e.g. 'git # describe'), parentdir. This is meant to work for developers using a # source checkout, for users of a tarball created by 'setup.py sdist', # and for users of a tarball/zipball created by 'git archive' or github's # download-from-tag feature or the equivalent in other VCSes. get_keywords_f = handlers.get("get_keywords") from_keywords_f = handlers.get("keywords") if get_keywords_f and from_keywords_f: try: keywords = get_keywords_f(versionfile_abs) ver = from_keywords_f(keywords, cfg.tag_prefix, verbose) if verbose: print("got version from expanded keyword %s" % ver) return ver except NotThisMethod: pass try: ver = versions_from_file(versionfile_abs) if verbose: print("got version from file %s %s" % (versionfile_abs, ver)) return ver except NotThisMethod: pass from_vcs_f = handlers.get("pieces_from_vcs") if from_vcs_f: try: pieces = from_vcs_f(cfg.tag_prefix, root, verbose) ver = render(pieces, cfg.style) if verbose: print("got version from VCS %s" % ver) return ver except NotThisMethod: pass try: if cfg.parentdir_prefix: ver = versions_from_parentdir(cfg.parentdir_prefix, root, verbose) if verbose: print("got version from parentdir %s" % ver) return ver except NotThisMethod: pass if verbose: print("unable to compute version") return { "version": "0+unknown", "full-revisionid": None, "dirty": None, "error": "unable to compute version", "date": None, } def get_version(): """Get the short version string for this project.""" return get_versions()["version"] def get_cmdclass(cmdclass=None): """Get the custom setuptools/distutils subclasses used by Versioneer. If the package uses a different cmdclass (e.g. one from numpy), it should be provide as an argument. """ if "versioneer" in sys.modules: del sys.modules["versioneer"] # this fixes the "python setup.py develop" case (also 'install' and # 'easy_install .'), in which subdependencies of the main project are # built (using setup.py bdist_egg) in the same python process. Assume # a main project A and a dependency B, which use different versions # of Versioneer. A's setup.py imports A's Versioneer, leaving it in # sys.modules by the time B's setup.py is executed, causing B to run # with the wrong versioneer. Setuptools wraps the sub-dep builds in a # sandbox that restores sys.modules to it's pre-build state, so the # parent is protected against the child's "import versioneer". By # removing ourselves from sys.modules here, before the child build # happens, we protect the child from the parent's versioneer too. # Also see https://github.com/python-versioneer/python-versioneer/issues/52 cmds = {} if cmdclass is None else cmdclass.copy() # we add "version" to both distutils and setuptools from distutils.core import Command class cmd_version(Command): description = "report generated version string" user_options = [] boolean_options = [] def initialize_options(self): pass def finalize_options(self): pass def run(self): vers = get_versions(verbose=True) print("Version: %s" % vers["version"]) print(" full-revisionid: %s" % vers.get("full-revisionid")) print(" dirty: %s" % vers.get("dirty")) print(" date: %s" % vers.get("date")) if vers["error"]: print(" error: %s" % vers["error"]) cmds["version"] = cmd_version # we override "build_py" in both distutils and setuptools # # most invocation pathways end up running build_py: # distutils/build -> build_py # distutils/install -> distutils/build ->.. # setuptools/bdist_wheel -> distutils/install ->.. # setuptools/bdist_egg -> distutils/install_lib -> build_py # setuptools/install -> bdist_egg ->.. # setuptools/develop -> ? # pip install: # copies source tree to a tempdir before running egg_info/etc # if .git isn't copied too, 'git describe' will fail # then does setup.py bdist_wheel, or sometimes setup.py install # setup.py egg_info -> ? # we override different "build_py" commands for both environments if "build_py" in cmds: _build_py = cmds["build_py"] elif "setuptools" in sys.modules: from setuptools.command.build_py import build_py as _build_py else: from distutils.command.build_py import build_py as _build_py class cmd_build_py(_build_py): def run(self): root = get_root() cfg = get_config_from_root(root) versions = get_versions() _build_py.run(self) # now locate _version.py in the new build/ directory and replace # it with an updated value if cfg.versionfile_build: target_versionfile = os.path.join(self.build_lib, cfg.versionfile_build) print("UPDATING %s" % target_versionfile) write_to_version_file(target_versionfile, versions) cmds["build_py"] = cmd_build_py if "setuptools" in sys.modules: from setuptools.command.build_ext import build_ext as _build_ext else: from distutils.command.build_ext import build_ext as _build_ext class cmd_build_ext(_build_ext): def run(self): root = get_root() cfg = get_config_from_root(root) versions = get_versions() _build_ext.run(self) if self.inplace: # build_ext --inplace will only build extensions in # build/lib<..> dir with no _version.py to write to. # As in place builds will already have a _version.py # in the module dir, we do not need to write one. return # now locate _version.py in the new build/ directory and replace # it with an updated value target_versionfile = os.path.join(self.build_lib, cfg.versionfile_source) print("UPDATING %s" % target_versionfile) write_to_version_file(target_versionfile, versions) cmds["build_ext"] = cmd_build_ext if "cx_Freeze" in sys.modules: # cx_freeze enabled? from cx_Freeze.dist import build_exe as _build_exe # nczeczulin reports that py2exe won't like the pep440-style string # as FILEVERSION, but it can be used for PRODUCTVERSION, e.g. # setup(console=[{ # "version": versioneer.get_version().split("+", 1)[0], # FILEVERSION # "product_version": versioneer.get_version(), # ... class cmd_build_exe(_build_exe): def run(self): root = get_root() cfg = get_config_from_root(root) versions = get_versions() target_versionfile = cfg.versionfile_source print("UPDATING %s" % target_versionfile) write_to_version_file(target_versionfile, versions) _build_exe.run(self) os.unlink(target_versionfile) with open(cfg.versionfile_source, "w") as f: LONG = LONG_VERSION_PY[cfg.VCS] f.write( LONG % { "DOLLAR": "$", "STYLE": cfg.style, "TAG_PREFIX": cfg.tag_prefix, "PARENTDIR_PREFIX": cfg.parentdir_prefix, "VERSIONFILE_SOURCE": cfg.versionfile_source, } ) cmds["build_exe"] = cmd_build_exe del cmds["build_py"] if "py2exe" in sys.modules: # py2exe enabled? from py2exe.distutils_buildexe import py2exe as _py2exe class cmd_py2exe(_py2exe): def run(self): root = get_root() cfg = get_config_from_root(root) versions = get_versions() target_versionfile = cfg.versionfile_source print("UPDATING %s" % target_versionfile) write_to_version_file(target_versionfile, versions) _py2exe.run(self) os.unlink(target_versionfile) with open(cfg.versionfile_source, "w") as f: LONG = LONG_VERSION_PY[cfg.VCS] f.write( LONG % { "DOLLAR": "$", "STYLE": cfg.style, "TAG_PREFIX": cfg.tag_prefix, "PARENTDIR_PREFIX": cfg.parentdir_prefix, "VERSIONFILE_SOURCE": cfg.versionfile_source, } ) cmds["py2exe"] = cmd_py2exe # we override different "sdist" commands for both environments if "sdist" in cmds: _sdist = cmds["sdist"] elif "setuptools" in sys.modules: from setuptools.command.sdist import sdist as _sdist else: from distutils.command.sdist import sdist as _sdist class cmd_sdist(_sdist): def run(self): versions = get_versions() self._versioneer_generated_versions = versions # unless we update this, the command will keep using the old # version self.distribution.metadata.version = versions["version"] return _sdist.run(self) def make_release_tree(self, base_dir, files): root = get_root() cfg = get_config_from_root(root) _sdist.make_release_tree(self, base_dir, files) # now locate _version.py in the new base_dir directory # (remembering that it may be a hardlink) and replace it with an # updated value target_versionfile = os.path.join(base_dir, cfg.versionfile_source) print("UPDATING %s" % target_versionfile) write_to_version_file(target_versionfile, self._versioneer_generated_versions) cmds["sdist"] = cmd_sdist return cmds CONFIG_ERROR = """ setup.cfg is missing the necessary Versioneer configuration. You need a section like: [versioneer] VCS = git style = pep440 versionfile_source = src/myproject/_version.py versionfile_build = myproject/_version.py tag_prefix = parentdir_prefix = myproject- You will also need to edit your setup.py to use the results: import versioneer setup(version=versioneer.get_version(), cmdclass=versioneer.get_cmdclass(), ...) Please read the docstring in ./versioneer.py for configuration instructions, edit setup.cfg, and re-run the installer or 'python versioneer.py setup'. """ SAMPLE_CONFIG = """ # See the docstring in versioneer.py for instructions. Note that you must # re-run 'versioneer.py setup' after changing this section, and commit the # resulting files. [versioneer] #VCS = git #style = pep440 #versionfile_source = #versionfile_build = #tag_prefix = #parentdir_prefix = """ INIT_PY_SNIPPET = """ from ._version import get_versions __version__ = get_versions()['version'] del get_versions """ def do_setup(): """Do main VCS-independent setup function for installing Versioneer.""" root = get_root() try: cfg = get_config_from_root(root) except (EnvironmentError, configparser.NoSectionError, configparser.NoOptionError) as e: if isinstance(e, (EnvironmentError, configparser.NoSectionError)): print("Adding sample versioneer config to setup.cfg", file=sys.stderr) with open(os.path.join(root, "setup.cfg"), "a") as f: f.write(SAMPLE_CONFIG) print(CONFIG_ERROR, file=sys.stderr) return 1 print(" creating %s" % cfg.versionfile_source) with open(cfg.versionfile_source, "w") as f: LONG = LONG_VERSION_PY[cfg.VCS] f.write( LONG % { "DOLLAR": "$", "STYLE": cfg.style, "TAG_PREFIX": cfg.tag_prefix, "PARENTDIR_PREFIX": cfg.parentdir_prefix, "VERSIONFILE_SOURCE": cfg.versionfile_source, } ) ipy = os.path.join(os.path.dirname(cfg.versionfile_source), "__init__.py") if os.path.exists(ipy): try: with open(ipy, "r") as f: old = f.read() except EnvironmentError: old = "" if INIT_PY_SNIPPET not in old: print(" appending to %s" % ipy) with open(ipy, "a") as f: f.write(INIT_PY_SNIPPET) else: print(" %s unmodified" % ipy) else: print(" %s doesn't exist, ok" % ipy) ipy = None # Make sure both the top-level "versioneer.py" and versionfile_source # (PKG/_version.py, used by runtime code) are in MANIFEST.in, so # they'll be copied into source distributions. Pip won't be able to # install the package without this. manifest_in = os.path.join(root, "MANIFEST.in") simple_includes = set() try: with open(manifest_in, "r") as f: for line in f: if line.startswith("include "): for include in line.split()[1:]: simple_includes.add(include) except EnvironmentError: pass # That doesn't cover everything MANIFEST.in can do # (http://docs.python.org/2/distutils/sourcedist.html#commands), so # it might give some false negatives. Appending redundant 'include' # lines is safe, though. if "versioneer.py" not in simple_includes: print(" appending 'versioneer.py' to MANIFEST.in") with open(manifest_in, "a") as f: f.write("include versioneer.py\n") else: print(" 'versioneer.py' already in MANIFEST.in") if cfg.versionfile_source not in simple_includes: print(" appending versionfile_source ('%s') to MANIFEST.in" % cfg.versionfile_source) with open(manifest_in, "a") as f: f.write("include %s\n" % cfg.versionfile_source) else: print(" versionfile_source already in MANIFEST.in") # Make VCS-specific changes. For git, this means creating/changing # .gitattributes to mark _version.py for export-subst keyword # substitution. do_vcs_install(manifest_in, cfg.versionfile_source, ipy) return 0 def scan_setup_py(): """Validate the contents of setup.py against Versioneer's expectations.""" found = set() setters = False errors = 0 with open("setup.py", "r") as f: for line in f.readlines(): if "import versioneer" in line: found.add("import") if "versioneer.get_cmdclass()" in line: found.add("cmdclass") if "versioneer.get_version()" in line: found.add("get_version") if "versioneer.VCS" in line: setters = True if "versioneer.versionfile_source" in line: setters = True if len(found) != 3: print("") print("Your setup.py appears to be missing some important items") print("(but I might be wrong). Please make sure it has something") print("roughly like the following:") print("") print(" import versioneer") print(" setup( version=versioneer.get_version(),") print(" cmdclass=versioneer.get_cmdclass(), ...)") print("") errors += 1 if setters: print("You should remove lines like 'versioneer.VCS = ' and") print("'versioneer.versionfile_source = ' . This configuration") print("now lives in setup.cfg, and should be removed from setup.py") print("") errors += 1 return errors if __name__ == "__main__": cmd = sys.argv[1] if cmd == "setup": errors = do_setup() errors += scan_setup_py() if errors: sys.exit(1)