Repository: lebrice/Sequoia
Branch: master
Commit: 7e12ff8ed67f
Files: 460
Total size: 2.6 MB
Directory structure:
gitextract_c6gc35b2/
├── .dockerignore
├── .gitattributes
├── .gitignore
├── .gitmodules
├── .travis.yml
├── LICENSE
├── MANIFEST.in
├── README.md
├── dockers/
│ ├── .gitignore
│ ├── base/
│ │ ├── Dockerfile
│ │ └── build.sh
│ └── branch/
│ ├── Dockerfile
│ └── build.sh
├── docs/
│ └── diagrams/
│ └── src/
│ ├── gym.puml
│ ├── pytorch_lightning.puml
│ └── seq_diagram.puml
├── examples/
│ ├── README.md
│ ├── __init__.py
│ ├── advanced/
│ │ ├── RL_and_SL_demo.py
│ │ ├── continual_rl_demo.py
│ │ ├── ewc_in_rl.py
│ │ ├── hat_demo.py
│ │ ├── hparam_tuning.py
│ │ ├── pnn/
│ │ │ ├── __init__.py
│ │ │ ├── layers.py
│ │ │ ├── model_rl.py
│ │ │ ├── model_sl.py
│ │ │ └── pnn_method.py
│ │ └── procgen_example.py
│ ├── basic/
│ │ ├── __init__.py
│ │ ├── base_method_demo.py
│ │ ├── pl_example.py
│ │ ├── pl_example_packnet.py
│ │ ├── pl_example_test.py
│ │ ├── quick_demo.ipynb
│ │ ├── quick_demo.py
│ │ ├── quick_demo_ewc.py
│ │ ├── quick_demo_packnet.py
│ │ └── quick_demo_test.py
│ ├── clcomp21/
│ │ ├── README.md
│ │ ├── __init__.py
│ │ ├── a2c_example.py
│ │ ├── a2c_example_test.py
│ │ ├── classifier.py
│ │ ├── classifier_test.py
│ │ ├── conftest.py
│ │ ├── dummy_method.py
│ │ ├── dummy_method_test.py
│ │ ├── multihead_classifier.py
│ │ ├── multihead_classifier_test.py
│ │ ├── regularization_example.py
│ │ ├── regularization_example_test.py
│ │ ├── sb3_example.py
│ │ └── sb3_example_test.py
│ ├── demo_utils.py
│ └── prerequisites/
│ └── dataclasses_example.py
├── mypy.ini
├── pytest.ini
├── requirements.txt
├── scripts/
│ ├── eai/
│ │ ├── cancel_all_queuing.sh
│ │ ├── cancel_all_running.sh
│ │ ├── job.sh
│ │ ├── rl_sweep.sh
│ │ ├── shell_job.sh
│ │ └── sl_sweep.sh
│ └── slurm/
│ ├── launch_many_sweeps.sh
│ ├── run.sh
│ └── sweep.sh
├── sequoia/
│ ├── README.md
│ ├── __init__.py
│ ├── _version.py
│ ├── client/
│ │ ├── README.md
│ │ ├── __init__.py
│ │ ├── __main__.py
│ │ ├── env.proto
│ │ ├── env_proxy.py
│ │ ├── env_proxy_test.py
│ │ ├── server.py
│ │ ├── setting_proxy.py
│ │ └── setting_proxy_test.py
│ ├── common/
│ │ ├── __init__.py
│ │ ├── batch.py
│ │ ├── batch_test.py
│ │ ├── callbacks/
│ │ │ ├── __init__.py
│ │ │ ├── knn_callback.py
│ │ │ └── vae_callback.py
│ │ ├── config/
│ │ │ ├── __init__.py
│ │ │ ├── config.py
│ │ │ └── wandb_config.py
│ │ ├── gym_wrappers/
│ │ │ ├── __init__.py
│ │ │ ├── action_limit.py
│ │ │ ├── action_limit_test.py
│ │ │ ├── add_done.py
│ │ │ ├── add_info.py
│ │ │ ├── convert_tensors.py
│ │ │ ├── convert_tensors_test.py
│ │ │ ├── env_dataset.py
│ │ │ ├── env_dataset_test.py
│ │ │ ├── episode_limit.py
│ │ │ ├── episode_limit_test.py
│ │ │ ├── measure_performance.py
│ │ │ ├── multi_task_environment.py
│ │ │ ├── multi_task_environment_test.py
│ │ │ ├── observation_limit.py
│ │ │ ├── observation_limit_test.py
│ │ │ ├── pixel_observation.py
│ │ │ ├── pixel_observation_test.py
│ │ │ ├── policy_env.py
│ │ │ ├── policy_env_test.py
│ │ │ ├── smooth_environment.py
│ │ │ ├── smooth_environment_test.py
│ │ │ ├── step_callback_wrapper.py
│ │ │ ├── step_callback_wrapper_test.py
│ │ │ ├── transform_wrappers.py
│ │ │ ├── transform_wrappers_test.py
│ │ │ ├── utils.py
│ │ │ └── utils_test.py
│ │ ├── hparams/
│ │ │ └── __init__.py
│ │ ├── layers.py
│ │ ├── loss.py
│ │ ├── loss_test.py
│ │ ├── metrics/
│ │ │ ├── __init__.py
│ │ │ ├── classification.py
│ │ │ ├── classification_test.py
│ │ │ ├── get_metrics.py
│ │ │ ├── metrics.py
│ │ │ ├── metrics_utils.py
│ │ │ ├── metrics_utils_test.py
│ │ │ ├── regression.py
│ │ │ └── rl_metrics.py
│ │ ├── replay.py
│ │ ├── spaces/
│ │ │ ├── __init__.py
│ │ │ ├── image.py
│ │ │ ├── named_tuple.py
│ │ │ ├── named_tuple_test.py
│ │ │ ├── space.py
│ │ │ ├── sparse.py
│ │ │ ├── sparse_test.py
│ │ │ ├── tensor_spaces.py
│ │ │ ├── tensor_spaces_test.py
│ │ │ ├── typed_dict.py
│ │ │ └── typed_dict_test.py
│ │ ├── task.py
│ │ └── transforms/
│ │ ├── __init__.py
│ │ ├── channels.py
│ │ ├── compose.py
│ │ ├── resize.py
│ │ ├── split_batch.py
│ │ ├── to_tensor.py
│ │ ├── transform.py
│ │ ├── transform_enum.py
│ │ ├── transforms_test.py
│ │ └── utils.py
│ ├── common.puml
│ ├── conftest.py
│ ├── experiments/
│ │ ├── __init__.py
│ │ ├── experiment.py
│ │ ├── experiment_test.py
│ │ ├── hpo_sweep.py
│ │ └── hpo_sweep_test.py
│ ├── main.py
│ ├── methods/
│ │ ├── README.md
│ │ ├── __init__.py
│ │ ├── aux_tasks/
│ │ │ ├── __init__.py
│ │ │ ├── auxiliary_task.py
│ │ │ ├── ewc.py
│ │ │ ├── reconstruction/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── ae.py
│ │ │ │ ├── decoder_for_dataset.py
│ │ │ │ ├── decoders.py
│ │ │ │ └── vae.py
│ │ │ └── transformation_based/
│ │ │ ├── __init__.py
│ │ │ ├── bases.py
│ │ │ └── rotation.py
│ │ ├── avalanche_methods/
│ │ │ ├── __init__.py
│ │ │ ├── agem.py
│ │ │ ├── agem_test.py
│ │ │ ├── ar1.py
│ │ │ ├── ar1_test.py
│ │ │ ├── base.py
│ │ │ ├── base_test.py
│ │ │ ├── conftest.py
│ │ │ ├── cwr_star.py
│ │ │ ├── cwr_star_test.py
│ │ │ ├── ewc.py
│ │ │ ├── ewc_test.py
│ │ │ ├── experience.py
│ │ │ ├── gdumb.py
│ │ │ ├── gdumb_test.py
│ │ │ ├── gem.py
│ │ │ ├── gem_test.py
│ │ │ ├── lwf.py
│ │ │ ├── lwf_test.py
│ │ │ ├── naive.py
│ │ │ ├── naive_test.py
│ │ │ ├── patched_models.py
│ │ │ ├── plugins.py
│ │ │ ├── replay.py
│ │ │ ├── replay_test.py
│ │ │ ├── synaptic_intelligence.py
│ │ │ └── synaptic_intelligence_test.py
│ │ ├── base_method.py
│ │ ├── base_method_test.py
│ │ ├── conftest.py
│ │ ├── d3rlpy_methods/
│ │ │ ├── __init__.py
│ │ │ ├── base.py
│ │ │ └── base_test.py
│ │ ├── ewc_method.py
│ │ ├── ewc_method_test.py
│ │ ├── experience_replay.py
│ │ ├── experience_replay_test.py
│ │ ├── hat.py
│ │ ├── method_test.py
│ │ ├── models/
│ │ │ ├── __init__.py
│ │ │ ├── base_model/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── base_model.py
│ │ │ │ ├── model.py
│ │ │ │ ├── multihead_model.py
│ │ │ │ ├── multihead_model_test.py
│ │ │ │ ├── self_supervised_model.py
│ │ │ │ ├── self_supervised_model_test.py
│ │ │ │ └── semi_supervised_model.py
│ │ │ ├── baseline_model.puml
│ │ │ ├── fcnet.py
│ │ │ ├── forward_pass.py
│ │ │ ├── output_heads/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── classification_head.py
│ │ │ │ ├── output_head.py
│ │ │ │ ├── regression_head.py
│ │ │ │ └── rl/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── actor_critic_head.py
│ │ │ │ ├── episodic_a2c.py
│ │ │ │ ├── episodic_a2c_test.py
│ │ │ │ ├── policy_head.py
│ │ │ │ ├── policy_head_test.py
│ │ │ │ └── wasted_steps_calc.py
│ │ │ ├── output_heads.puml
│ │ │ └── simple_convnet.py
│ │ ├── models.puml
│ │ ├── packnet_method.py
│ │ ├── packnet_method_test.py
│ │ ├── pl_bolts_methods/
│ │ │ └── __init__.py
│ │ ├── pl_dqn.py
│ │ ├── pnn/
│ │ │ ├── __init__.py
│ │ │ ├── layers.py
│ │ │ ├── model_rl.py
│ │ │ ├── model_sl.py
│ │ │ └── pnn_method.py
│ │ ├── random_baseline.py
│ │ ├── random_baseline_test.py
│ │ ├── stable_baselines3_methods/
│ │ │ ├── __init__.py
│ │ │ ├── a2c.py
│ │ │ ├── a2c_test.py
│ │ │ ├── base.py
│ │ │ ├── base_test.py
│ │ │ ├── ddpg.py
│ │ │ ├── ddpg_test.py
│ │ │ ├── dqn.py
│ │ │ ├── dqn_test.py
│ │ │ ├── off_policy_method.py
│ │ │ ├── off_policy_method_test.py
│ │ │ ├── on_policy_method.py
│ │ │ ├── policy_wrapper.py
│ │ │ ├── ppo.py
│ │ │ ├── ppo_test.py
│ │ │ ├── sac.py
│ │ │ ├── sac_test.py
│ │ │ ├── td3.py
│ │ │ └── td3_test.py
│ │ └── trainer.py
│ ├── methods.puml
│ ├── sequoia.puml
│ ├── settings/
│ │ ├── README.md
│ │ ├── __init__.py
│ │ ├── assumptions/
│ │ │ ├── __init__.py
│ │ │ ├── assumptions.puml
│ │ │ ├── base.py
│ │ │ ├── classification.py
│ │ │ ├── context_discreteness.py
│ │ │ ├── context_visibility.py
│ │ │ ├── continual.py
│ │ │ ├── discrete_results.py
│ │ │ ├── iid.py
│ │ │ ├── iid_results.py
│ │ │ ├── incremental.py
│ │ │ ├── incremental_results.py
│ │ │ ├── incremental_test.py
│ │ │ ├── task_incremental.py
│ │ │ └── task_type.py
│ │ ├── base/
│ │ │ ├── __init__.py
│ │ │ ├── base.puml
│ │ │ ├── bases.py
│ │ │ ├── environment.py
│ │ │ ├── objects.py
│ │ │ ├── results.py
│ │ │ ├── setting.py
│ │ │ ├── setting_meta.py
│ │ │ └── setting_test.py
│ │ ├── offline_rl/
│ │ │ └── setting.py
│ │ ├── presets/
│ │ │ ├── __init__.py
│ │ │ ├── cartpole_pixels.yaml
│ │ │ ├── cartpole_state.yaml
│ │ │ ├── cifar10.yaml
│ │ │ ├── cifar100.yaml
│ │ │ ├── classic_control/
│ │ │ │ ├── cartpole.yaml
│ │ │ │ └── mountaincar_continuous.yaml
│ │ │ ├── fashion_mnist.yaml
│ │ │ ├── mnist.yaml
│ │ │ ├── monsterkong/
│ │ │ │ ├── monsterkong_3each.yaml
│ │ │ │ ├── monsterkong_4each.yaml
│ │ │ │ ├── monsterkong_5each.yaml
│ │ │ │ ├── monsterkong_all.yaml
│ │ │ │ ├── monsterkong_jumps.yaml
│ │ │ │ ├── monsterkong_jumps_and_ladders.yaml
│ │ │ │ ├── monsterkong_ladders.yaml
│ │ │ │ └── monsterkong_mix.yaml
│ │ │ ├── mujoco/
│ │ │ │ └── half_cheetah.yaml
│ │ │ ├── rl_track.yaml
│ │ │ └── sl_track.yaml
│ │ ├── rl/
│ │ │ ├── __init__.py
│ │ │ ├── continual/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── environment.py
│ │ │ │ ├── environment_test.py
│ │ │ │ ├── make_env.py
│ │ │ │ ├── make_env_test.py
│ │ │ │ ├── objects.py
│ │ │ │ ├── results.py
│ │ │ │ ├── setting.py
│ │ │ │ ├── setting_test.py
│ │ │ │ ├── tasks.py
│ │ │ │ ├── tasks_test.py
│ │ │ │ └── test_environment.py
│ │ │ ├── discrete/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── multienv_wrappers.py
│ │ │ │ ├── multienv_wrappers_test.py
│ │ │ │ ├── results.py
│ │ │ │ ├── setting.py
│ │ │ │ ├── setting_test.py
│ │ │ │ ├── tasks.py
│ │ │ │ ├── tasks_test.py
│ │ │ │ └── test_environment.py
│ │ │ ├── environment.py
│ │ │ ├── environment_test.py
│ │ │ ├── envs/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── classic_control.py
│ │ │ │ ├── monsterkong.py
│ │ │ │ ├── mujoco/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── half_cheetah.py
│ │ │ │ │ ├── half_cheetah_test.py
│ │ │ │ │ ├── hopper.py
│ │ │ │ │ ├── hopper_test.py
│ │ │ │ │ ├── modified_friction.py
│ │ │ │ │ ├── modified_friction_test.py
│ │ │ │ │ ├── modified_gravity.py
│ │ │ │ │ ├── modified_gravity_test.py
│ │ │ │ │ ├── modified_mass.py
│ │ │ │ │ ├── modified_mass_test.py
│ │ │ │ │ ├── modified_size.py
│ │ │ │ │ ├── modified_size_test.py
│ │ │ │ │ ├── modified_wall.py
│ │ │ │ │ ├── mujoco_model_utils.py
│ │ │ │ │ ├── walker2d.py
│ │ │ │ │ └── walker2d_test.py
│ │ │ │ └── variant_spec.py
│ │ │ ├── incremental/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── objects.py
│ │ │ │ ├── results.py
│ │ │ │ ├── setting.py
│ │ │ │ ├── setting_test.py
│ │ │ │ └── tasks.py
│ │ │ ├── multi_task/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── setting.py
│ │ │ │ └── setting_test.py
│ │ │ ├── objects.py
│ │ │ ├── setting.py
│ │ │ ├── setting_test.py
│ │ │ ├── task_incremental/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── setting.py
│ │ │ │ ├── setting_test.py
│ │ │ │ └── tasks.py
│ │ │ ├── traditional/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── setting.py
│ │ │ │ └── setting_test.py
│ │ │ └── wrappers/
│ │ │ ├── __init__.py
│ │ │ ├── measure_performance.py
│ │ │ ├── measure_performance_test.py
│ │ │ ├── no_typed_objects.py
│ │ │ ├── task_labels.py
│ │ │ └── typed_objects.py
│ │ ├── settings.puml
│ │ └── sl/
│ │ ├── README.md
│ │ ├── __init__.py
│ │ ├── continual/
│ │ │ ├── __init__.py
│ │ │ ├── environment.py
│ │ │ ├── environment_test.py
│ │ │ ├── envs.py
│ │ │ ├── objects.py
│ │ │ ├── results.py
│ │ │ ├── setting.py
│ │ │ ├── setting_test.py
│ │ │ └── wrappers.py
│ │ ├── discrete/
│ │ │ ├── __init__.py
│ │ │ ├── setting.py
│ │ │ └── setting_test.py
│ │ ├── domain_incremental/
│ │ │ ├── __init__.py
│ │ │ ├── setting.py
│ │ │ └── setting_test.py
│ │ ├── environment.py
│ │ ├── environment_test.py
│ │ ├── incremental/
│ │ │ ├── __init__.py
│ │ │ ├── environment.py
│ │ │ ├── environment_test.py
│ │ │ ├── objects.py
│ │ │ ├── results.py
│ │ │ ├── setting.py
│ │ │ ├── setting_test.py
│ │ │ └── unused_batch_transforms.py
│ │ ├── multi_task/
│ │ │ ├── __init__.py
│ │ │ ├── setting.py
│ │ │ └── setting_test.py
│ │ ├── setting.py
│ │ ├── task_incremental/
│ │ │ ├── __init__.py
│ │ │ ├── setting.py
│ │ │ └── setting_test.py
│ │ ├── traditional/
│ │ │ ├── __init__.py
│ │ │ ├── results.py
│ │ │ ├── setting.py
│ │ │ └── setting_test.py
│ │ └── wrappers/
│ │ ├── __init__.py
│ │ ├── measure_performance.py
│ │ └── measure_performance_test.py
│ ├── settings.puml
│ └── utils/
│ ├── __init__.py
│ ├── categorical.py
│ ├── data_utils.py
│ ├── encode.py
│ ├── generic_functions/
│ │ ├── __init__.py
│ │ ├── _namedtuple.py
│ │ ├── _namedtuple_test.py
│ │ ├── concatenate.py
│ │ ├── detach.py
│ │ ├── move.py
│ │ ├── replace.py
│ │ ├── replace_test.py
│ │ ├── singledispatchmethod.py
│ │ ├── slicing.py
│ │ ├── slicing_test.py
│ │ ├── stack.py
│ │ └── to_from_tensor.py
│ ├── logging_utils.py
│ ├── module_dict.py
│ ├── parseable.py
│ ├── plotting.py
│ ├── pretrained_utils.py
│ ├── readme.py
│ ├── serialization.py
│ └── utils.py
├── setup.cfg
├── setup.py
└── versioneer.py
================================================
FILE CONTENTS
================================================
================================================
FILE: .dockerignore
================================================
data
lightning_logs
checkpoints
results
================================================
FILE: .gitattributes
================================================
sequoia/_version.py export-subst
================================================
FILE: .gitignore
================================================
**/__pycache__/
.vscode
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
examples/results/*
results/*
!results/**/*.csv
data/*
*/data/*
!data/**/*.py
scripts/*.png
wandb
.idea
.ipynb_checkpoints
checkpoints
lightning_logs
.pylintrc
**.png
*.gz
*.pt
build
dist
*.egg-info
sequoia/results
mjkey.txt
================================================
FILE: .gitmodules
================================================
[submodule "sequoia/methods/cn_dpm"]
path = sequoia/methods/cn_dpm
url = https://github.com/ryanlindeborg/CN-DPM.git
[submodule "examples/clcomp21/Real_DEEL"]
path = examples/clcomp21/Real_DEEL
url = https://github.com/mostafaelaraby/Real-DEEL-Dark-Experience.git
[submodule "sequoia/methods/continual_world"]
path = sequoia/methods/continual_world
url = https://www.github.com/lebrice/continual_world.git
================================================
FILE: .travis.yml
================================================
language: python
python:
- "3.7"
install:
- pip install gym[atari]
- pip install -r requirements.txt
script:
- pytest
after_sucess:
coveralls
================================================
FILE: LICENSE
================================================
GNU GENERAL PUBLIC LICENSE
Version 3, 29 June 2007
Copyright (C) 2007 Free Software Foundation, Inc.
Everyone is permitted to copy and distribute verbatim copies
of this license document, but changing it is not allowed.
Preamble
The GNU General Public License is a free, copyleft license for
software and other kinds of works.
The licenses for most software and other practical works are designed
to take away your freedom to share and change the works. By contrast,
the GNU General Public License is intended to guarantee your freedom to
share and change all versions of a program--to make sure it remains free
software for all its users. We, the Free Software Foundation, use the
GNU General Public License for most of our software; it applies also to
any other work released this way by its authors. You can apply it to
your programs, too.
When we speak of free software, we are referring to freedom, not
price. Our General Public Licenses are designed to make sure that you
have the freedom to distribute copies of free software (and charge for
them if you wish), that you receive source code or can get it if you
want it, that you can change the software or use pieces of it in new
free programs, and that you know you can do these things.
To protect your rights, we need to prevent others from denying you
these rights or asking you to surrender the rights. Therefore, you have
certain responsibilities if you distribute copies of the software, or if
you modify it: responsibilities to respect the freedom of others.
For example, if you distribute copies of such a program, whether
gratis or for a fee, you must pass on to the recipients the same
freedoms that you received. You must make sure that they, too, receive
or can get the source code. And you must show them these terms so they
know their rights.
Developers that use the GNU GPL protect your rights with two steps:
(1) assert copyright on the software, and (2) offer you this License
giving you legal permission to copy, distribute and/or modify it.
For the developers' and authors' protection, the GPL clearly explains
that there is no warranty for this free software. For both users' and
authors' sake, the GPL requires that modified versions be marked as
changed, so that their problems will not be attributed erroneously to
authors of previous versions.
Some devices are designed to deny users access to install or run
modified versions of the software inside them, although the manufacturer
can do so. This is fundamentally incompatible with the aim of
protecting users' freedom to change the software. The systematic
pattern of such abuse occurs in the area of products for individuals to
use, which is precisely where it is most unacceptable. Therefore, we
have designed this version of the GPL to prohibit the practice for those
products. If such problems arise substantially in other domains, we
stand ready to extend this provision to those domains in future versions
of the GPL, as needed to protect the freedom of users.
Finally, every program is threatened constantly by software patents.
States should not allow patents to restrict development and use of
software on general-purpose computers, but in those that do, we wish to
avoid the special danger that patents applied to a free program could
make it effectively proprietary. To prevent this, the GPL assures that
patents cannot be used to render the program non-free.
The precise terms and conditions for copying, distribution and
modification follow.
TERMS AND CONDITIONS
0. Definitions.
"This License" refers to version 3 of the GNU General Public License.
"Copyright" also means copyright-like laws that apply to other kinds of
works, such as semiconductor masks.
"The Program" refers to any copyrightable work licensed under this
License. Each licensee is addressed as "you". "Licensees" and
"recipients" may be individuals or organizations.
To "modify" a work means to copy from or adapt all or part of the work
in a fashion requiring copyright permission, other than the making of an
exact copy. The resulting work is called a "modified version" of the
earlier work or a work "based on" the earlier work.
A "covered work" means either the unmodified Program or a work based
on the Program.
To "propagate" a work means to do anything with it that, without
permission, would make you directly or secondarily liable for
infringement under applicable copyright law, except executing it on a
computer or modifying a private copy. Propagation includes copying,
distribution (with or without modification), making available to the
public, and in some countries other activities as well.
To "convey" a work means any kind of propagation that enables other
parties to make or receive copies. Mere interaction with a user through
a computer network, with no transfer of a copy, is not conveying.
An interactive user interface displays "Appropriate Legal Notices"
to the extent that it includes a convenient and prominently visible
feature that (1) displays an appropriate copyright notice, and (2)
tells the user that there is no warranty for the work (except to the
extent that warranties are provided), that licensees may convey the
work under this License, and how to view a copy of this License. If
the interface presents a list of user commands or options, such as a
menu, a prominent item in the list meets this criterion.
1. Source Code.
The "source code" for a work means the preferred form of the work
for making modifications to it. "Object code" means any non-source
form of a work.
A "Standard Interface" means an interface that either is an official
standard defined by a recognized standards body, or, in the case of
interfaces specified for a particular programming language, one that
is widely used among developers working in that language.
The "System Libraries" of an executable work include anything, other
than the work as a whole, that (a) is included in the normal form of
packaging a Major Component, but which is not part of that Major
Component, and (b) serves only to enable use of the work with that
Major Component, or to implement a Standard Interface for which an
implementation is available to the public in source code form. A
"Major Component", in this context, means a major essential component
(kernel, window system, and so on) of the specific operating system
(if any) on which the executable work runs, or a compiler used to
produce the work, or an object code interpreter used to run it.
The "Corresponding Source" for a work in object code form means all
the source code needed to generate, install, and (for an executable
work) run the object code and to modify the work, including scripts to
control those activities. However, it does not include the work's
System Libraries, or general-purpose tools or generally available free
programs which are used unmodified in performing those activities but
which are not part of the work. For example, Corresponding Source
includes interface definition files associated with source files for
the work, and the source code for shared libraries and dynamically
linked subprograms that the work is specifically designed to require,
such as by intimate data communication or control flow between those
subprograms and other parts of the work.
The Corresponding Source need not include anything that users
can regenerate automatically from other parts of the Corresponding
Source.
The Corresponding Source for a work in source code form is that
same work.
2. Basic Permissions.
All rights granted under this License are granted for the term of
copyright on the Program, and are irrevocable provided the stated
conditions are met. This License explicitly affirms your unlimited
permission to run the unmodified Program. The output from running a
covered work is covered by this License only if the output, given its
content, constitutes a covered work. This License acknowledges your
rights of fair use or other equivalent, as provided by copyright law.
You may make, run and propagate covered works that you do not
convey, without conditions so long as your license otherwise remains
in force. You may convey covered works to others for the sole purpose
of having them make modifications exclusively for you, or provide you
with facilities for running those works, provided that you comply with
the terms of this License in conveying all material for which you do
not control copyright. Those thus making or running the covered works
for you must do so exclusively on your behalf, under your direction
and control, on terms that prohibit them from making any copies of
your copyrighted material outside their relationship with you.
Conveying under any other circumstances is permitted solely under
the conditions stated below. Sublicensing is not allowed; section 10
makes it unnecessary.
3. Protecting Users' Legal Rights From Anti-Circumvention Law.
No covered work shall be deemed part of an effective technological
measure under any applicable law fulfilling obligations under article
11 of the WIPO copyright treaty adopted on 20 December 1996, or
similar laws prohibiting or restricting circumvention of such
measures.
When you convey a covered work, you waive any legal power to forbid
circumvention of technological measures to the extent such circumvention
is effected by exercising rights under this License with respect to
the covered work, and you disclaim any intention to limit operation or
modification of the work as a means of enforcing, against the work's
users, your or third parties' legal rights to forbid circumvention of
technological measures.
4. Conveying Verbatim Copies.
You may convey verbatim copies of the Program's source code as you
receive it, in any medium, provided that you conspicuously and
appropriately publish on each copy an appropriate copyright notice;
keep intact all notices stating that this License and any
non-permissive terms added in accord with section 7 apply to the code;
keep intact all notices of the absence of any warranty; and give all
recipients a copy of this License along with the Program.
You may charge any price or no price for each copy that you convey,
and you may offer support or warranty protection for a fee.
5. Conveying Modified Source Versions.
You may convey a work based on the Program, or the modifications to
produce it from the Program, in the form of source code under the
terms of section 4, provided that you also meet all of these conditions:
a) The work must carry prominent notices stating that you modified
it, and giving a relevant date.
b) The work must carry prominent notices stating that it is
released under this License and any conditions added under section
7. This requirement modifies the requirement in section 4 to
"keep intact all notices".
c) You must license the entire work, as a whole, under this
License to anyone who comes into possession of a copy. This
License will therefore apply, along with any applicable section 7
additional terms, to the whole of the work, and all its parts,
regardless of how they are packaged. This License gives no
permission to license the work in any other way, but it does not
invalidate such permission if you have separately received it.
d) If the work has interactive user interfaces, each must display
Appropriate Legal Notices; however, if the Program has interactive
interfaces that do not display Appropriate Legal Notices, your
work need not make them do so.
A compilation of a covered work with other separate and independent
works, which are not by their nature extensions of the covered work,
and which are not combined with it such as to form a larger program,
in or on a volume of a storage or distribution medium, is called an
"aggregate" if the compilation and its resulting copyright are not
used to limit the access or legal rights of the compilation's users
beyond what the individual works permit. Inclusion of a covered work
in an aggregate does not cause this License to apply to the other
parts of the aggregate.
6. Conveying Non-Source Forms.
You may convey a covered work in object code form under the terms
of sections 4 and 5, provided that you also convey the
machine-readable Corresponding Source under the terms of this License,
in one of these ways:
a) Convey the object code in, or embodied in, a physical product
(including a physical distribution medium), accompanied by the
Corresponding Source fixed on a durable physical medium
customarily used for software interchange.
b) Convey the object code in, or embodied in, a physical product
(including a physical distribution medium), accompanied by a
written offer, valid for at least three years and valid for as
long as you offer spare parts or customer support for that product
model, to give anyone who possesses the object code either (1) a
copy of the Corresponding Source for all the software in the
product that is covered by this License, on a durable physical
medium customarily used for software interchange, for a price no
more than your reasonable cost of physically performing this
conveying of source, or (2) access to copy the
Corresponding Source from a network server at no charge.
c) Convey individual copies of the object code with a copy of the
written offer to provide the Corresponding Source. This
alternative is allowed only occasionally and noncommercially, and
only if you received the object code with such an offer, in accord
with subsection 6b.
d) Convey the object code by offering access from a designated
place (gratis or for a charge), and offer equivalent access to the
Corresponding Source in the same way through the same place at no
further charge. You need not require recipients to copy the
Corresponding Source along with the object code. If the place to
copy the object code is a network server, the Corresponding Source
may be on a different server (operated by you or a third party)
that supports equivalent copying facilities, provided you maintain
clear directions next to the object code saying where to find the
Corresponding Source. Regardless of what server hosts the
Corresponding Source, you remain obligated to ensure that it is
available for as long as needed to satisfy these requirements.
e) Convey the object code using peer-to-peer transmission, provided
you inform other peers where the object code and Corresponding
Source of the work are being offered to the general public at no
charge under subsection 6d.
A separable portion of the object code, whose source code is excluded
from the Corresponding Source as a System Library, need not be
included in conveying the object code work.
A "User Product" is either (1) a "consumer product", which means any
tangible personal property which is normally used for personal, family,
or household purposes, or (2) anything designed or sold for incorporation
into a dwelling. In determining whether a product is a consumer product,
doubtful cases shall be resolved in favor of coverage. For a particular
product received by a particular user, "normally used" refers to a
typical or common use of that class of product, regardless of the status
of the particular user or of the way in which the particular user
actually uses, or expects or is expected to use, the product. A product
is a consumer product regardless of whether the product has substantial
commercial, industrial or non-consumer uses, unless such uses represent
the only significant mode of use of the product.
"Installation Information" for a User Product means any methods,
procedures, authorization keys, or other information required to install
and execute modified versions of a covered work in that User Product from
a modified version of its Corresponding Source. The information must
suffice to ensure that the continued functioning of the modified object
code is in no case prevented or interfered with solely because
modification has been made.
If you convey an object code work under this section in, or with, or
specifically for use in, a User Product, and the conveying occurs as
part of a transaction in which the right of possession and use of the
User Product is transferred to the recipient in perpetuity or for a
fixed term (regardless of how the transaction is characterized), the
Corresponding Source conveyed under this section must be accompanied
by the Installation Information. But this requirement does not apply
if neither you nor any third party retains the ability to install
modified object code on the User Product (for example, the work has
been installed in ROM).
The requirement to provide Installation Information does not include a
requirement to continue to provide support service, warranty, or updates
for a work that has been modified or installed by the recipient, or for
the User Product in which it has been modified or installed. Access to a
network may be denied when the modification itself materially and
adversely affects the operation of the network or violates the rules and
protocols for communication across the network.
Corresponding Source conveyed, and Installation Information provided,
in accord with this section must be in a format that is publicly
documented (and with an implementation available to the public in
source code form), and must require no special password or key for
unpacking, reading or copying.
7. Additional Terms.
"Additional permissions" are terms that supplement the terms of this
License by making exceptions from one or more of its conditions.
Additional permissions that are applicable to the entire Program shall
be treated as though they were included in this License, to the extent
that they are valid under applicable law. If additional permissions
apply only to part of the Program, that part may be used separately
under those permissions, but the entire Program remains governed by
this License without regard to the additional permissions.
When you convey a copy of a covered work, you may at your option
remove any additional permissions from that copy, or from any part of
it. (Additional permissions may be written to require their own
removal in certain cases when you modify the work.) You may place
additional permissions on material, added by you to a covered work,
for which you have or can give appropriate copyright permission.
Notwithstanding any other provision of this License, for material you
add to a covered work, you may (if authorized by the copyright holders of
that material) supplement the terms of this License with terms:
a) Disclaiming warranty or limiting liability differently from the
terms of sections 15 and 16 of this License; or
b) Requiring preservation of specified reasonable legal notices or
author attributions in that material or in the Appropriate Legal
Notices displayed by works containing it; or
c) Prohibiting misrepresentation of the origin of that material, or
requiring that modified versions of such material be marked in
reasonable ways as different from the original version; or
d) Limiting the use for publicity purposes of names of licensors or
authors of the material; or
e) Declining to grant rights under trademark law for use of some
trade names, trademarks, or service marks; or
f) Requiring indemnification of licensors and authors of that
material by anyone who conveys the material (or modified versions of
it) with contractual assumptions of liability to the recipient, for
any liability that these contractual assumptions directly impose on
those licensors and authors.
All other non-permissive additional terms are considered "further
restrictions" within the meaning of section 10. If the Program as you
received it, or any part of it, contains a notice stating that it is
governed by this License along with a term that is a further
restriction, you may remove that term. If a license document contains
a further restriction but permits relicensing or conveying under this
License, you may add to a covered work material governed by the terms
of that license document, provided that the further restriction does
not survive such relicensing or conveying.
If you add terms to a covered work in accord with this section, you
must place, in the relevant source files, a statement of the
additional terms that apply to those files, or a notice indicating
where to find the applicable terms.
Additional terms, permissive or non-permissive, may be stated in the
form of a separately written license, or stated as exceptions;
the above requirements apply either way.
8. Termination.
You may not propagate or modify a covered work except as expressly
provided under this License. Any attempt otherwise to propagate or
modify it is void, and will automatically terminate your rights under
this License (including any patent licenses granted under the third
paragraph of section 11).
However, if you cease all violation of this License, then your
license from a particular copyright holder is reinstated (a)
provisionally, unless and until the copyright holder explicitly and
finally terminates your license, and (b) permanently, if the copyright
holder fails to notify you of the violation by some reasonable means
prior to 60 days after the cessation.
Moreover, your license from a particular copyright holder is
reinstated permanently if the copyright holder notifies you of the
violation by some reasonable means, this is the first time you have
received notice of violation of this License (for any work) from that
copyright holder, and you cure the violation prior to 30 days after
your receipt of the notice.
Termination of your rights under this section does not terminate the
licenses of parties who have received copies or rights from you under
this License. If your rights have been terminated and not permanently
reinstated, you do not qualify to receive new licenses for the same
material under section 10.
9. Acceptance Not Required for Having Copies.
You are not required to accept this License in order to receive or
run a copy of the Program. Ancillary propagation of a covered work
occurring solely as a consequence of using peer-to-peer transmission
to receive a copy likewise does not require acceptance. However,
nothing other than this License grants you permission to propagate or
modify any covered work. These actions infringe copyright if you do
not accept this License. Therefore, by modifying or propagating a
covered work, you indicate your acceptance of this License to do so.
10. Automatic Licensing of Downstream Recipients.
Each time you convey a covered work, the recipient automatically
receives a license from the original licensors, to run, modify and
propagate that work, subject to this License. You are not responsible
for enforcing compliance by third parties with this License.
An "entity transaction" is a transaction transferring control of an
organization, or substantially all assets of one, or subdividing an
organization, or merging organizations. If propagation of a covered
work results from an entity transaction, each party to that
transaction who receives a copy of the work also receives whatever
licenses to the work the party's predecessor in interest had or could
give under the previous paragraph, plus a right to possession of the
Corresponding Source of the work from the predecessor in interest, if
the predecessor has it or can get it with reasonable efforts.
You may not impose any further restrictions on the exercise of the
rights granted or affirmed under this License. For example, you may
not impose a license fee, royalty, or other charge for exercise of
rights granted under this License, and you may not initiate litigation
(including a cross-claim or counterclaim in a lawsuit) alleging that
any patent claim is infringed by making, using, selling, offering for
sale, or importing the Program or any portion of it.
11. Patents.
A "contributor" is a copyright holder who authorizes use under this
License of the Program or a work on which the Program is based. The
work thus licensed is called the contributor's "contributor version".
A contributor's "essential patent claims" are all patent claims
owned or controlled by the contributor, whether already acquired or
hereafter acquired, that would be infringed by some manner, permitted
by this License, of making, using, or selling its contributor version,
but do not include claims that would be infringed only as a
consequence of further modification of the contributor version. For
purposes of this definition, "control" includes the right to grant
patent sublicenses in a manner consistent with the requirements of
this License.
Each contributor grants you a non-exclusive, worldwide, royalty-free
patent license under the contributor's essential patent claims, to
make, use, sell, offer for sale, import and otherwise run, modify and
propagate the contents of its contributor version.
In the following three paragraphs, a "patent license" is any express
agreement or commitment, however denominated, not to enforce a patent
(such as an express permission to practice a patent or covenant not to
sue for patent infringement). To "grant" such a patent license to a
party means to make such an agreement or commitment not to enforce a
patent against the party.
If you convey a covered work, knowingly relying on a patent license,
and the Corresponding Source of the work is not available for anyone
to copy, free of charge and under the terms of this License, through a
publicly available network server or other readily accessible means,
then you must either (1) cause the Corresponding Source to be so
available, or (2) arrange to deprive yourself of the benefit of the
patent license for this particular work, or (3) arrange, in a manner
consistent with the requirements of this License, to extend the patent
license to downstream recipients. "Knowingly relying" means you have
actual knowledge that, but for the patent license, your conveying the
covered work in a country, or your recipient's use of the covered work
in a country, would infringe one or more identifiable patents in that
country that you have reason to believe are valid.
If, pursuant to or in connection with a single transaction or
arrangement, you convey, or propagate by procuring conveyance of, a
covered work, and grant a patent license to some of the parties
receiving the covered work authorizing them to use, propagate, modify
or convey a specific copy of the covered work, then the patent license
you grant is automatically extended to all recipients of the covered
work and works based on it.
A patent license is "discriminatory" if it does not include within
the scope of its coverage, prohibits the exercise of, or is
conditioned on the non-exercise of one or more of the rights that are
specifically granted under this License. You may not convey a covered
work if you are a party to an arrangement with a third party that is
in the business of distributing software, under which you make payment
to the third party based on the extent of your activity of conveying
the work, and under which the third party grants, to any of the
parties who would receive the covered work from you, a discriminatory
patent license (a) in connection with copies of the covered work
conveyed by you (or copies made from those copies), or (b) primarily
for and in connection with specific products or compilations that
contain the covered work, unless you entered into that arrangement,
or that patent license was granted, prior to 28 March 2007.
Nothing in this License shall be construed as excluding or limiting
any implied license or other defenses to infringement that may
otherwise be available to you under applicable patent law.
12. No Surrender of Others' Freedom.
If conditions are imposed on you (whether by court order, agreement or
otherwise) that contradict the conditions of this License, they do not
excuse you from the conditions of this License. If you cannot convey a
covered work so as to satisfy simultaneously your obligations under this
License and any other pertinent obligations, then as a consequence you may
not convey it at all. For example, if you agree to terms that obligate you
to collect a royalty for further conveying from those to whom you convey
the Program, the only way you could satisfy both those terms and this
License would be to refrain entirely from conveying the Program.
13. Use with the GNU Affero General Public License.
Notwithstanding any other provision of this License, you have
permission to link or combine any covered work with a work licensed
under version 3 of the GNU Affero General Public License into a single
combined work, and to convey the resulting work. The terms of this
License will continue to apply to the part which is the covered work,
but the special requirements of the GNU Affero General Public License,
section 13, concerning interaction through a network will apply to the
combination as such.
14. Revised Versions of this License.
The Free Software Foundation may publish revised and/or new versions of
the GNU General Public License from time to time. Such new versions will
be similar in spirit to the present version, but may differ in detail to
address new problems or concerns.
Each version is given a distinguishing version number. If the
Program specifies that a certain numbered version of the GNU General
Public License "or any later version" applies to it, you have the
option of following the terms and conditions either of that numbered
version or of any later version published by the Free Software
Foundation. If the Program does not specify a version number of the
GNU General Public License, you may choose any version ever published
by the Free Software Foundation.
If the Program specifies that a proxy can decide which future
versions of the GNU General Public License can be used, that proxy's
public statement of acceptance of a version permanently authorizes you
to choose that version for the Program.
Later license versions may give you additional or different
permissions. However, no additional obligations are imposed on any
author or copyright holder as a result of your choosing to follow a
later version.
15. Disclaimer of Warranty.
THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
16. Limitation of Liability.
IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
SUCH DAMAGES.
17. Interpretation of Sections 15 and 16.
If the disclaimer of warranty and limitation of liability provided
above cannot be given local legal effect according to their terms,
reviewing courts shall apply local law that most closely approximates
an absolute waiver of all civil liability in connection with the
Program, unless a warranty or assumption of liability accompanies a
copy of the Program in return for a fee.
END OF TERMS AND CONDITIONS
How to Apply These Terms to Your New Programs
If you develop a new program, and you want it to be of the greatest
possible use to the public, the best way to achieve this is to make it
free software which everyone can redistribute and change under these terms.
To do so, attach the following notices to the program. It is safest
to attach them to the start of each source file to most effectively
state the exclusion of warranty; and each file should have at least
the "copyright" line and a pointer to where the full notice is found.
Copyright (C)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see .
Also add information on how to contact you by electronic and paper mail.
If the program does terminal interaction, make it output a short
notice like this when it starts in an interactive mode:
Copyright (C)
This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
This is free software, and you are welcome to redistribute it
under certain conditions; type `show c' for details.
The hypothetical commands `show w' and `show c' should show the appropriate
parts of the General Public License. Of course, your program's commands
might be different; for a GUI interface, you would use an "about box".
You should also get your employer (if you work as a programmer) or school,
if any, to sign a "copyright disclaimer" for the program, if necessary.
For more information on this, and how to apply and follow the GNU GPL, see
.
The GNU General Public License does not permit incorporating your program
into proprietary programs. If your program is a subroutine library, you
may consider it more useful to permit linking proprietary applications with
the library. If this is what you want to do, use the GNU Lesser General
Public License instead of this License. But first, please read
.
================================================
FILE: MANIFEST.in
================================================
include versioneer.py
include sequoia/_version.py
================================================
FILE: README.md
================================================
# Sequoia - The Research Tree
A Playground for research at the intersection of Continual, Reinforcement, and Self-Supervised Learning.
- 5 minute intro: https://www.youtube.com/watch?v=0u48vr96zRQ
- Paper link: https://arxiv.org/abs/2108.01005
- [Continual Supervised Learning Study](https://wandb.ai/sequoia/csl_study) (~6K runs)
- [Continual Reinforcement Learning Study](https://wandb.ai/sequoia/crl_study) (~2300 runs)
## Note: This project is not being actively developed at the moment. If you encounter any difficulties, please create an issue and I'll help you out.
If you have any questions or comments, please make an issue!
## Motivation:
Most applied ML research generally either proposes new Settings (research problems), new Methods (solutions to such problems), or both.
- When proposing new Settings, researchers almost always have to reimplement or heavily modify existing solutions before they can be applied onto their new problem.
- Likewise, when creating new Methods, it's often necessary to first re-create the experimental setting of other baseline papers, or even the baseline methods themselves, as experimental conditions may be *slightly* different between papers!
The goal of this repo is to:
- Organize various research Settings into an inheritance hierarchy (a tree!), with more *general*, challenging settings with few assumptions at the top, and more constrained problems at the bottom.
- Provide a mechanism for easily reusing existing solutions (Methods) onto new Settings through **Polymorphism**!
- Allow researchers to easily create new, general Methods and quickly gather results on a multitude of Settings, ranging from Supervised to Reinforcement Learning!
## Installation
Requires python >= 3.7
### Basic installation:
```console
$ git clone https://www.github.com/lebrice/Sequoia.git
$ pip install -e Sequoia
```
### Optional Addons
You can also install optional "addons" for Sequoia, each of which either adds new Methods, new environments/datasets, or both.
using either the usual `extras_require` feature of setuptools, or by pip-installing other repositories which register Methods for Sequoia using an `entry_point` in their `setup.py` file.
```console
pip install -e Sequoia[all|]
```
Here are some of the optional addons:
- `avalanche`:
Continual Supervised Learning methods, provided by the [Avalanche](https://github.com/ContinualAI/avalanche) library:
```console
$ pip install -e Sequoia[avalanche]
```
- `CN-DPM`: Continual Neural Dirichlet Process Mixture model:
```console
$ cd Sequoia
$ git submodule init # to setup the submodules
$ pip install -e sequoia/methods/cn_dpm
```
- `orion`:
Hyper-parameter optimization using [Orion](https://github.com/epistimio/orion)
```console
$ pip install -e Sequoia[orion]
```
- `metaworld`:
Continual / Multi-Task Reinforcement Learning environments, thanks to the [metaworld](https://github.com/rlworkgroup/metaworld) package. The usual setup for mujoco needs to be done, Sequoia unfortunately can't do it for you ;(
```console
$ pip install -e Sequoia[metaworld]
```
- `monsterkong`:
Continual Reinforcement Learning environment from [the Meta-MonsterKong repo](https://github.com/lebrice/MetaMonsterkong).
```console
$ pip install -e Sequoia[monsterkong]
```
- `continual_world`: The Continual World benchmark for Continual Reinforcement learning. Adds 6 different Continual RL Methods to Sequoia.
```console
$ cd Sequoia
$ git submodule init # to setup the submodules
$ pip install -e sequoia/methods/continual_world
```
See the `setup.py` file for all the optional extras.
### Additional Installation Steps for Mac
Install the latest XQuartz app from here: https://www.xquartz.org/releases/index.html
Then run the following commands on the terminal:
```console
mkdir /tmp/.X11-unix
sudo chmod 1777 /tmp/.X11-unix
sudo chown root /tmp/.X11-unix/
```
## Documentation overview:
- ### **[Getting Started / Examples (take a look at this first)](examples/)**
- ### Runing Experiments (below)
- ### [Settings overview](sequoia/settings/)
- ### [Methods overview](sequoia/methods/)
### Current Settings & Assumptions:
| Setting | RL vs SL | clear task boundaries? | Task boundaries given? | Task labels at training time? | task labels at test time | Stationary context? | Fixed action space |
| -------------------------------------------------------------------------- | ------------------------------------------------------------------------ | ---------------------- | ---------------------- | ----------------------------- | ------------------------ | ------------------- | ------------------ |
| [Continual RL](sequoia/settings/rl/continual/setting.py) | RL | no | no | no | no | no | no(?) |
| [Discrete Task-Agnostic RL](sequoia/settings/rl/discrete/setting.py) | RL | **yes** | **yes** | no | no | no | no(?) |
| [Incremental RL](sequoia/settings/rl/incremental/setting.py) | RL | **yes** | **yes** | **yes** | no | no | no(?) |
| [Task-Incremental RL](sequoia/settings/rl/task_incremental/setting.py) | RL | **yes** | **yes** | **yes** | **yes** | no | no(?) |
| [Traditional RL](sequoia/settings/rl/task_incremental/setting.py) | RL | **yes** | **yes** | **yes** | no | **yes** | no(?) |
| [Multi-Task RL](sequoia/settings/rl/task_incremental/setting.py) | RL | **yes** | **yes** | **yes** | **yes** | **yes** | no(?) |
| [Continual SL](sequoia/settings/sl/continual/setting.py) | SL | no | no | no | no | no | no |
| [Discrete Task-Agnostic SL](sequoia/settings/sl/discrete/setting.py) | SL | **yes** | no | no | no | no | no |
| [(Class) Incremental SL](sequoia/settings/sl/incremental/setting.py) | SL | **yes** | **yes** | no | no | no | no |
| [Domain-Incremental SL](sequoia/settings/sl/domain_incremental/setting.py) | SL | **yes** | **yes** | **yes** | no | no | **yes** |
| [Task-Incremental SL](sequoia/settings/sl/task_incremental/setting.py) | SL | **yes** | **yes** | **yes** | **yes** | no | no |
| [Traditional SL](sequoia/settings/sl/traditional/setting.py) | SL | **yes** | **yes** | **yes** | no | **yes** | no |
| [Multi-Task SL](sequoia/settings/sl/multi_task/setting.py) | SL | **yes** | **yes** | **yes** | **yes** | **yes** | no |
#### Notes
- **Active / Passive**:
Active settings are Settings where the next observation depends on the current action, i.e. where actions influence future observations, e.g. Reinforcement Learning.
Passive settings are Settings where the current actions don't influence the next observations (e.g. Supervised Learning.)
- **Bold entries** in the table mark constant attributes which cannot be
changed from their default value.
- \*: The environment is changing constantly over time in `ContinualRLSetting`, so
there aren't really "tasks" to speak of.
## Running experiments
--> **(Reminder) First, take a look at the [Examples](/examples)** <--
#### Directly in code:
```python
from sequoia.settings import TaskIncrementalSLSetting
from sequoia.methods import BaseMethod
# Create the setting
setting = TaskIncrementalSLSetting(dataset="mnist")
# Create the method
method = BaseMethod(max_epochs=1)
# Apply the setting to the method to generate results.
results = setting.apply(method)
print(results.summary())
```
### Command-line:
```console
$ sequoia --help
usage: sequoia [-h] [--version] {run,sweep,info} ...
Sequoia - The Research Tree
Used to run experiments, which consist in applying a Method to a Setting.
optional arguments:
-h, --help show this help message and exit
--version Displays the installed version of Sequoia and exits.
command:
Command to execute
{run,sweep,info}
run Run an experiment on a given setting.
sweep Run a hyper-parameter optimization sweep.
info Displays some information about a Setting or Method.
```
For example:
```console
$ sequoia run [--debug] (setting arguments) (method arguments)
$ sequoia sweep [--debug] (setting arguments) (method arguments)
$ sequoia info [setting or method]
```
For a detailed description of all the arguments, use the `--help` command for any of the actions:
```console
$ sequoia --help
$ sequoia run --help
$ sequoia run --help
$ sequoia run --help
$ sequoia sweep --help
$ sequoia sweep --help
$ sequoia sweep --help
```
For example:
```console
$ sequoia run --debug task_incremental_sl --dataset mnist random_baseline
```
For example:
- Run the BaseMethod on task-incremental MNIST, with one epoch per task, and without wandb:
```console
$ sequoia run task_incremental_sl --dataset mnist base --max_epochs 1
```
- Run the PPO Method from stable-baselines3 on an incremental RL setting, with the default dataset (CartPole) and 5 tasks:
```console
$ sequoia --setting incremental_rl --nb_tasks 5 --method sb3.ppo --steps_per_task 10_000
```
More questions? Please let us know by creating an issue or posting in the discussions!
================================================
FILE: dockers/.gitignore
================================================
# Hiding the 'eai' dockerfile
eai
================================================
FILE: dockers/base/Dockerfile
================================================
# syntax=docker/dockerfile:1
FROM pytorch/pytorch:1.8.1-cuda11.1-cudnn8-runtime
USER root
EXPOSE 2222
EXPOSE 6000
EXPOSE 8088
ENV LANG=en_US.UTF-8
RUN apt update && \
apt install -y \
git wget zsh unzip rsync build-essential \
ca-certificates supervisor openssh-server ssh \
curl wget vim procps htop locales nano man net-tools iputils-ping \
libosmesa6-dev libgl1-mesa-glx libgl1-mesa-dev libglu1-mesa-dev libglfw3 \
libglfw3-dev freeglut3 xvfb ffmpeg curl patchelf cmake zlib1g zlib1g-dev \
swig libopenmpi-dev aptitude screen xz-utils locate && \
sed -i "s/# en_US.UTF-8/en_US.UTF-8/" /etc/locale.gen && locale-gen && \
useradd -m -u 13011 -s /bin/zsh toolkit && passwd -d toolkit && \
useradd -m -u 13011 -s /bin/zsh --non-unique console && passwd -d console && \
useradd -m -u 13011 -s /bin/zsh --non-unique _toolchain && passwd -d _toolchain && \
useradd -m -u 13011 -s /bin/bash --non-unique coder && passwd -d coder && \
chown -R toolkit:toolkit /run /etc/shadow /etc/profile && \
apt autoremove --purge && apt-get clean && \
rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* && \
echo ssh >> /etc/securetty && \
rm -f /etc/legal /etc/motd
# RUN conda install -c conda-forge opencv
RUN conda install matplotlib numpy scipy hdf5 h5py cython
# RUN pip install \
# # Needed to build atari_py: (WHY don't they put it in a build_requires?)
# lockfile
# fasteners \
# pybullet \
# wandb \
# tqdm \
# # tensorflow \
# bs4 \
# pandas notebook plotly tqdm pyamg lxml numba pyyaml torchmeta
# Removing this `torchtext` package, seems to be causing an import issue in pytorch!
RUN pip uninstall -y torchtext
RUN chown -R toolkit:root /workspace
RUN chmod -R 777 /workspace
# this doesn't do anything
RUN adduser toolkit sudo
RUN chown -R toolkit:root /mnt/
# RUN mkdir -p /mnt/home
RUN chmod 777 /opt/conda
RUN chmod 777 /mnt
RUN chmod -R 777 /workspace
SHELL [ "conda", "run", "-n", "base", "/bin/bash", "-c"]
## Unused zshell and oh-my-zsh stuff:
# RUN sh -c "$(wget -O- https://raw.githubusercontent.com/ohmyzsh/ohmyzsh/master/tools/install.sh)"
# RUN sed -i 's/robbyrussell/clean/' ~/.zshrc
# RUN sed -i 's/plugins=(git)/plugins=(git debian history-substring-search)/' ~/.zshrc
# MuJoCo-related stuff:
# RUN curl -o ~/mujoco200_linux.zip -L -C - https://www.roboti.us/download/mujoco200_linux.zip
# RUN curl -o ~/mjpro150_linux.zip -L -C - https://www.roboti.us/download/mjpro150_linux.zip
# RUN cd ~ && unzip mujoco200_linux.zip && rm mujoco200_linux.zip
# RUN cd ~ && unzip mjpro150_linux.zip && rm mjpro150_linux.zip
# RUN mkdir ~/.mujoco
# RUN mv ~/mujoco200_linux ~/.mujoco/mujoco200
# RUN mv ~/mjpro150 ~/.mujoco
# RUN echo "export LD_LIBRARY_PATH=\$LD_LIBRARY_PATH:~/.mujoco/mujoco200/bin" >> ~/.bashrc
# RUN echo "export LD_LIBRARY_PATH=\$LD_LIBRARY_PATH:~/.mujoco/mjpro150/bin" >> ~/.bashrc
# COPY mjkey.txt /home/toolkit/.mujoco/
# ENV LD_LIBRARY_PATH /home/toolkit/.mujoco/mujoco200/bin:${LD_LIBRARY_PATH}
# ENV LD_LIBRARY_PATH /home/toolkit/.mujoco/mjpro150/bin:${LD_LIBRARY_PATH}
# RUN mkdir /workspace/tools
# RUN cd /workspace/tools && git clone https://github.com/openai/mujoco-py.git && pip install -e mujoco-py
# For Wandb (TODO: Doesn't appear to work, using env variable with WANDB_API_KEY
# instead.)
# COPY .netrc /home/toolkit/.netrc
# COPY .netrc /root/.netrc
# COPY .netrc /tmp/.netrc
VOLUME /mnt/data
VOLUME /mnt/results
# USER toolkit
ENV DATA_DIR=/mnt/data
ENV RESULTS_DIR=/mnt/results
ENV WANDB_DIR=/mnt/results
# VOLUME /mnt/home
# WORKDIR /mnt/home
ENV PATH /home/toolkit/.local/bin:${PATH}
# RUN cd /workspace/tools && git clone https://github.com/openai/gym.git && cd gym && pip install -e '.[all]'
# RUN cd /workspace/tools && git clone https://github.com/openai/baselines.git && cd baselines && pip install -e .
RUN cd /workspace/ && git clone https://github.com/lebrice/Sequoia.git
RUN pip install -e /workspace/Sequoia[no_mujoco]
ENTRYPOINT ["conda", "run", "--no-capture-output", "-n", "base", "/bin/bash", "-c"]
================================================
FILE: dockers/base/build.sh
================================================
#!/bin/bash
set -o errexit # Used to exit upon error, avoiding cascading errors
set -o errtrace # Show error trace
set -o pipefail # Unveils hidden failures
set -o nounset # Exposes unset variables
if git diff-index --quiet HEAD --; then
# No changes
echo "All good, no uncommitted changes."
else
# Changes
echo "Can't build dockers when there are uncommited changes!"
exit 1
fi
echo "Building the 'base' dockerfile"
docker build . --file dockers/base/Dockerfile --tag sequoia:base
REGISTRY=${REGISTRY:-`docker info | sed '/Username:/!d;s/.* //'`}
echo "Using registry $REGISTRY"
docker tag sequoia:base $REGISTRY/sequoia:base
docker push $REGISTRY/sequoia:base
================================================
FILE: dockers/branch/Dockerfile
================================================
# syntax=docker/dockerfile:1
FROM lebrice/sequoia:base
USER root
SHELL [ "conda", "run", "-n", "base", "/bin/bash", "-c"]
ARG BRANCH=master
RUN conda install -y cudatoolkit
RUN cd /workspace/Sequoia && git fetch -p && git checkout ${BRANCH} && pip install -e .[no_mujoco]
ENTRYPOINT ["conda", "run", "--no-capture-output", "-n", "base", "/bin/bash", "-c"]
================================================
FILE: dockers/branch/build.sh
================================================
#!/bin/bash
set -o errexit # Used to exit upon error, avoiding cascading errors
set -o errtrace # Show error trace
set -o pipefail # Unveils hidden failures
set -o nounset # Exposes unset variables
export CURRENT_BRANCH="`git branch --show-current`"
export BRANCH=${BRANCH:-$CURRENT_BRANCH}
echo "Using branch $BRANCH"
export REGISTRY=${REGISTRY:-`docker info | sed '/Username:/!d;s/.* //'`}
echo "Using registry $REGISTRY"
if git diff-index --quiet HEAD --; then
# No changes
echo "all good."
else
# Changes
echo "Can't build dockers when you have uncommited changes!"
exit 1
fi
git push
echo "Building the container for branch $BRANCH (no cache)"
docker build . --file dockers/branch/Dockerfile \
--no-cache \
--build-arg BRANCH=$BRANCH \
--tag sequoia:$BRANCH
docker tag sequoia:$BRANCH $REGISTRY/sequoia:$BRANCH
docker push $REGISTRY/sequoia:$BRANCH
================================================
FILE: docs/diagrams/src/gym.puml
================================================
@startuml gym
package gym {
package spaces as gym.spaces {
abstract class Space {
+ contains(T sample) -> bool
+ sample() -> T
}
class Box extends Space {
+ low: np.ndarray
+ high: np.ndarray
+ shape: Tuple[int, ...]
+ dtype: np.dtype
+ contains(np.ndarray sample) -> bool
+ sample() -> np.ndarray
}
class Discrete extends Space {
+ n: int
+ contains(int sample) -> bool
+ sample() -> int
}
class Tuple extends Space {
+ spaces: Tuple[Space]
+ contains(Tuple sample) -> bool
+ sample() -> Tuple
}
' Tuple spaces contain other spaces.
Tuple *-- Space
class Dict extends Space {
+ spaces: dict[str, Space]
+ contains(dict sample) -> bool
+ sample() -> dict
}
' Same for Dicts.
Dict *-- Space
}
abstract class gym.Env {
+ observation_space: Space
+ action_space: Space
+ step(Actions) -> Tuple[Obs, Rew, bool, dict]
+ reset() -> Obs
}
gym.Env .. Space
abstract class Wrapper extends gym.Env{
+ env: gym.Env
}
}
@enduml
================================================
FILE: docs/diagrams/src/pytorch_lightning.puml
================================================
@startuml pytorch_lightning
package pytorch_lightning {
abstract class LightningDataModule {
{abstract} + prepare_data()
{abstract} + setup()
{abstract} + train_dataloader(): torch.DataLoader
{abstract} + val_dataloader(): torch.DataLoader
{abstract} + test_dataloader(): torch.DataLoader
}
abstract class LightningModule {
{abstract} + train_step(batch)
+ val_step()
+ test_step()
}
}
@enduml
================================================
FILE: docs/diagrams/src/seq_diagram.puml
================================================
@startuml ContinualRLSetting
header Page Header
footer Page %page% of %lastpage%
title Overall Evaluation loop - Sequoia
note over User, Setting
Even though this diagram is somewhat large,
keep in mind that there are but a few key methods:
1. Method.configure()
2. Method.fit()
3. Method.get_actions()
4. Method.on_task_switch()
end note
actor User
participant Setting << (A,#2121FF) Setting >>
collections TrainEnv
collections ValidEnv
collections TestEnv
' autoactivate on
participant Method << (C,#ADD1B2) Method >>
participant Model << (C,#ADD1B2) nn.Module >>
' activate Setting
' autoactivate on
User -> Setting: Create the Setting
Setting -> TrainEnv: Create temp env
return observation / action / reward spaces
User <-- Setting
User -> Method: Create the Method
User <-- Method
User -> Setting: setting.apply(method)
Setting -> Method: **method.configure(setting)**
Method -> Method: create model, optimizer, etc.
' deactivate Method
Method -> Model: Create
' activate Model
Setting <-- Method
autoactivate off
== training ==
group train_loop [for each task `i`]
alt task_labels_at_train_time?
else True
Setting -> Method: **on_task_switch(i)**
Method -> Method: consolidate knowledge, \n switch output heads, etc.
Setting <-- Method
else False
Setting -> Method: **on_task_switch(None)**
Method -> Method: consolidate knowledge etc.
Setting <-- Method
end
Setting -> TrainEnv: Create train env for task i
Setting -> ValidEnv: Create valid env for task i
' activate ValidEnv
Setting -> Method: **Method.fit(train_env, valid_env)**
' loop
' alt loop
group loop
note right
The Method is free to do whatever
it wants with the Train and Valid envs
of the current task.
end note
Method -> Model: train()
return
' group training
Model <--> TrainEnv: train with the env
...
Method -> Model: eval()
return
Model <--> ValidEnv: Evaluate performance
...
' autoactivate on
' Model -> TrainEnv: reset
' return Observations
' Model -> TrainEnv: step(actions)
' return Observations, Rewards, done, info
end
end
== testing ==
note over Setting, Method
We currently only perform the test loop after training is complete on all tasks,
however, in the future we will run this test loop after the end of training on
each task. See issue#46 on GitHub for more info.
end note
group test_loop
Setting --> Setting: Concatenate datasets for all tasks, \n create test wrappers, etc.
Setting --> TestEnv: Create test environment (all tasks)
autoactivate on
Setting -> TestEnv: reset
return observations
' loop
alt
else normal step
Setting -> Method: **get_actions(observations)**
Method -> Model: predict(x)
return y_pred
return actions
Setting -> TestEnv: step(actions)
return observations, rewards, done, info
else end of episode reached
Setting -> TestEnv: reset
return observations
else task boundary is reached
' TestEnv --> Method: **on_task_switch(i)**
alt known_task_boundaries?
else False: do nothing
note over Method
When known_task_boundaries=False, the Method doesn't get informed
of task boundaries (it might have to perform some kind of change-point
detection, for instance).
end note
else True
note over TestEnv
Minor note: here it's the TestEnv
that calls the Method when a
task boundary is reached.
end note
alt task_labels_at_test_time?
else true
' note right of Setting: If task labels are given
TestEnv -> Method: **on_task_switch(i)**
autoactivate off
Method -> Method
autoactivate on
return
else false
TestEnv -> Method: **on_task_switch(None)**
autoactivate off
Method -> Method
autoactivate on
return
end
end
end
autoactivate off
note over TestEnv
The test environment uses a `Monitor` wrapper, and gather
statistics of interest like the mean reward, accuracy, etc.
end note
TestEnv -> Setting: report performance of the Method
end
Setting -> Setting: Weigh performance of each task \n depending on the Setting
User <-- Setting: Results
' return Results
@enduml
================================================
FILE: examples/README.md
================================================
# Examples
Here's a brief description of the examples in this folder:
## Prerequisites:
- [Intro to dataclasses & simple-parsing](prerequisites/dataclasses_example.py)
- [Basics of openai gym](https://github.com/openai/gym#basics)
## Basic examples:
- [pl_example.py](basic/pl_example.py):
**Recommended entry-point for ML Practicioners**. Shows an example method and model
using [PyTorch Lightning](https://github.com/PyTorchLightning/pytorch-lightning).
This is the best way to get started if you don't mind some level of abstraction in your code
(a good thing in general!)
- [quick_demo.ipynb](basic/quick_demo.ipynb):
**Recommended entry-point for new users**. Simple demo showing how to create a `Method`
from scratch that targets a Supervised CL `Setting`, as well as how to
improve this simple Method using a simple regularization loss.
- [quick_demo.py](basic/quick_demo.py): First part of the above
notebook: shows how to create a Method from scratch that
targets a Supervised CL Setting.
- [quick_demo_ewc.py](basic/quick_demo_ewc.py): Second part of the
above notebook: shows how to improve upon an existing Method by adding a
CL regularization loss.
- [baseline_demo.py](basic/baseline_demo.py): Shows how the
BaseMethod can be applied to get results in both RL and SL Settings.
## CLVision Workshop Submission Examples:
Examples in this folder are aimed at solving the supervised learning track of the competition.
Each example builds on top of the previous, in a manner that improves the overall performance you can expect on any given CL setting.
As such, it is recommended that you take a look at the examples in the following order:
0. [DummyMethod](clcomp21/dummy_method.py)
Non-parametric method that simply returns a random prediction for each observation.
1. [Simple Classifier](clcomp21/classifier.py):
Standard neural net classifier without any CL-related mechanism. Works in the SL track, but has very poor performance.
2. [Multi-Head / Task Inference Classifier](clcomp21/multihead_classifier.py):
Performs multi-head prediction, and a simple form of task inference. Gets better results that the example.
3. [CL Regularized Classifier](clcomp21/regularization_example.py):
Adds a simple CL regularization loss to the multihead classifier above.
## Advanced examples:
- [RL_and_SL_demo.py](advanced/RL_and_SL_demo.py):
Example that shows how the BaseMethod can easily be extended by adding
AuxiliaryTasks to it, allows you to get results in both RL and SL.
- [continual_rl_demo.py](advanced/ewc_in_rl.py):
Demonstrates how to create Reinforcement Learning (RL) Settings, as well as
how methods from [stable-baselines3](https://github.com/DLR-RM/stable-baselines3)
can be applied to these settings.
- [Extending Stable-Baselines3 (RL Settings only)](advanced/ewc_in_rl.py):
(Not recommended for new users!)
Very specific example which shows how, if you really wanted to, you could
extend one or more of the Methods from SB3 with some kind of regularization
loss hooking into the internal optimization loop of SB3.
================================================
FILE: examples/__init__.py
================================================
================================================
FILE: examples/advanced/RL_and_SL_demo.py
================================================
""" Demo where we add the same regularization loss from the other examples, but
this time as an `AuxiliaryTask` on top of the BaseMethod.
This makes it easy to create CL methods that apply to both RL and SL Settings!
"""
import copy
import random
import sys
from argparse import Namespace
from dataclasses import dataclass
from typing import ClassVar, List
import torch
from simple_parsing import ArgumentParser, field
from torch import Tensor
# This "hack" is required so we can run `python examples/custom_baseline_demo.py`
sys.path.extend([".", ".."])
from sequoia.common.config import Config
from sequoia.common.loss import Loss
from sequoia.methods import BaseMethod
from sequoia.methods.aux_tasks import AuxiliaryTask
from sequoia.methods.models import BaseModel, ForwardPass
from sequoia.methods.trainer import TrainerConfig
from sequoia.settings import Environment, RLSetting, Setting
from sequoia.utils.utils import camel_case, dict_intersection
from sequoia.utils.logging_utils import get_logger
logger = get_logger(__name__)
class SimpleRegularizationAuxTask(AuxiliaryTask):
"""Same regularization loss as in the previous examples, this time
implemented as an `AuxiliaryTask`, which gets added to the BaseModel,
making it applicable to both RL and SL.
This adds a CL regularizaiton loss to the BaseModel.
The most important methods of `AuxiliaryTask` is `get_loss`, which should
return a `Loss` for the given forward pass and resulting rewards/labels.
Take a look at the `AuxiliaryTask` class for more info.
"""
name: ClassVar[str] = "simple_regularization"
@dataclass
class Options(AuxiliaryTask.Options):
"""Hyper-parameters / configuration options of this auxiliary task."""
# Coefficient used to scale this regularization loss before it gets
# added to the 'base' loss of the model.
coefficient: float = 0.01
# Wether to use the absolute difference of the weights or the difference
# in the `regularize` method below.
use_abs_diff: bool = False
# The norm term for the 'distance' between the current and old weights.
distance_norm: int = 2
def __init__(
self,
*args,
name: str = None,
options: "SimpleRegularizationAuxTask.Options" = None,
**kwargs,
):
super().__init__(*args, options=options, name=name, **kwargs)
self.options: SimpleRegularizationAuxTask.Options
self.previous_task: int = None
# TODO: Figure out a clean way to persist this dict into the state_dict.
self.previous_model_weights: Dict[str, Tensor] = {}
self.n_switches: int = 0
def get_loss(self, forward_pass: ForwardPass, y: Tensor = None) -> Loss:
"""Get a `Loss` for the given forward pass and resulting rewards/labels.
Take a look at the `AuxiliaryTask` class for more info,
NOTE: This is the same simplified version of EWC used throughout the
other examples: the loss is the P-norm between the current weights and
the weights as they were on the begining of the task.
Also note, this particular example doesn't actually use the provided
arguments.
"""
if self.previous_task is None:
# We're in the first task: do nothing.
return Loss(name=self.name)
old_weights: Dict[str, Tensor] = self.previous_model_weights
new_weights: Dict[str, Tensor] = dict(self.model.named_parameters())
loss = 0.0
for weight_name, (new_w, old_w) in dict_intersection(new_weights, old_weights):
loss += torch.dist(new_w, old_w.type_as(new_w), p=self.options.distance_norm)
ewc_loss = Loss(name=self.name, loss=loss)
return ewc_loss
def on_task_switch(self, task_id: int) -> None:
"""Executed when the task switches (to either a new or known task)."""
if not self.enabled:
return
if self.previous_task is None and self.n_switches == 0:
logger.debug(f"Starting the first task, no update.")
pass
elif task_id is None or task_id != self.previous_task:
logger.debug(
f"Switching tasks: {self.previous_task} -> {task_id}: "
f"Updating the 'anchor' weights."
)
self.previous_task = task_id
self.previous_model_weights.clear()
self.previous_model_weights.update(
copy.deepcopy({k: v.detach() for k, v in self.model.named_parameters()})
)
self.n_switches += 1
class CustomizedBaselineModel(BaseModel):
@dataclass
class HParams(BaseModel.HParams):
"""Hyper-parameters of our customized baseline model."""
# Hyper-parameters of our simple new auxiliary task.
simple_reg: SimpleRegularizationAuxTask.Options = field(
default_factory=SimpleRegularizationAuxTask.Options
)
def __init__(
self,
setting: Setting,
hparams: "CustomizedBaselineModel.HParams",
config: Config,
):
super().__init__(setting=setting, hparams=hparams, config=config)
self.hp: CustomizedBaselineModel.HParams
# Here we add our new auxiliary task:
self.add_auxiliary_task(SimpleRegularizationAuxTask(options=self.hp.simple_reg))
# Or, add replay buffers of some sort:
self.replay_buffer: List = []
# (...)
@dataclass
class CustomMethod(BaseMethod, target_setting=Setting):
"""Example methods which adds regularization to the baseline in RL and SL.
This extends the `BaseMethod` by adding the simple regularization
auxiliary task defined above to the `BaseModel`.
NOTE: Since this class inherits from `BaseMethod`, which targets the
`Setting` setting, i.e. the "root" node, it is applicable to all settings,
both in RL and SL. However, you could customize the `target_setting`
argument above to limit this to any particular subtree (only SL, only RL,
only when task labels are present, etc).
"""
# Hyper-parameters of the customized Baseline Model used by this method.
hparams: CustomizedBaselineModel.HParams = field(
default_factory=CustomizedBaselineModel.HParams
)
def __init__(
self,
hparams: CustomizedBaselineModel.HParams = None,
config: Config = None,
trainer_options: TrainerConfig = None,
**kwargs,
):
super().__init__(
hparams=hparams,
config=config,
trainer_options=trainer_options,
**kwargs,
)
def create_model(self, setting: Setting) -> CustomizedBaselineModel:
"""Creates the Model to be used for the given `Setting`."""
return CustomizedBaselineModel(setting=setting, hparams=self.hparams, config=self.config)
def configure(self, setting: Setting):
"""Configure this Method before being trained / tested on this Setting."""
super().configure(setting)
# For example, change the value of the coefficient of our
# regularization loss when in RL vs SL:
if isinstance(setting, RLSetting):
self.hparams.simple_reg.coefficient = 0.01
else:
self.hparams.simple_reg.coefficient = 1.0
def fit(self, train_env: Environment, valid_env: Environment):
"""Called by the Setting to let the Method train on a given task.
You can do whatever you want with the train and valid
environments. As it is currently, in most `Settings`, the valid
environment will contain data from only the current task. (See issue at
https://github.com/lebrice/Sequoia/issues/46 for more context).
"""
return super().fit(train_env=train_env, valid_env=valid_env)
@classmethod
def add_argparse_args(cls, parser: ArgumentParser):
"""Adds command-line arguments for this Method to an argument parser.
NOTE: This doesn't do anything differently than the base implementation,
but it's included here just for illustration purposes.
"""
# 'dest' is where the arguments will be stored on the namespace.
dest = camel_case(cls.__qualname__)
# Add all command-line arguments. This adds arguments for all fields of
# this dataclass.
parser.add_arguments(cls, dest=dest)
# You could add arguments here if you wanted to:
# parser.add_argument("--foo", default=1.23, help="example argument")
@classmethod
def from_argparse_args(cls, args: Namespace):
"""Create an instance of this class from the parsed arguments."""
# Retrieve the parsed arguments:
dest = camel_case(cls.__qualname__)
method: CustomMethod = getattr(args, dest)
# You could retrieve other arguments like so:
# foo: int = args.foo
return method
def demo_manual():
"""Apply the custom method to a Setting, creating both manually in code."""
# Create any Setting from the tree:
from sequoia.settings import TaskIncrementalRLSetting, TaskIncrementalSLSetting
# setting = TaskIncrementalSLSetting(dataset="mnist", nb_tasks=5) # SL
setting = TaskIncrementalRLSetting( # RL
dataset="cartpole",
train_task_schedule={
0: {"gravity": 10, "length": 0.5},
5000: {"gravity": 10, "length": 1.0},
},
train_max_steps=10_000,
)
## Create the BaseMethod:
config = Config(debug=True)
trainer_options = TrainerConfig(max_epochs=1)
hparams = BaseModel.HParams()
base_method = BaseMethod(hparams=hparams, config=config, trainer_options=trainer_options)
## Get the results of the baseline method:
base_results = setting.apply(base_method, config=config)
## Create the CustomMethod:
config = Config(debug=True)
trainer_options = TrainerConfig(max_epochs=1)
hparams = CustomizedBaselineModel.HParams()
new_method = CustomMethod(hparams=hparams, config=config, trainer_options=trainer_options)
## Get the results for the 'improved' method:
new_results = setting.apply(new_method, config=config)
print(f"\n\nComparison: BaseMethod vs CustomMethod")
print("\n BaseMethod results: ")
print(base_results.summary())
print("\n CustomMethod results: ")
print(new_results.summary())
def demo_command_line():
"""Run the same demo as above, but customizing the Setting and Method from
the command-line.
NOTE: Remember to uncomment the function call below to use this instead of
demo_simple!
"""
## Create the `Setting` and the `Config` from the command-line, like in
## the other examples.
parser = ArgumentParser(description=__doc__)
## Add command-line arguments for any Setting in the tree:
from sequoia.settings import TaskIncrementalRLSetting, TaskIncrementalSLSetting
# parser.add_arguments(TaskIncrementalSLSetting, dest="setting")
parser.add_arguments(TaskIncrementalRLSetting, dest="setting")
parser.add_arguments(Config, dest="config")
# Add the command-line arguments for our CustomMethod (including the
# arguments for our simple regularization aux task).
CustomMethod.add_argparse_args(parser, dest="method")
args = parser.parse_args()
setting: ClassIncrementalSetting = args.setting
config: Config = args.config
# Create the BaseMethod:
base_method = BaseMethod.from_argparse_args(args, dest="method")
# Get the results of the BaseMethod:
base_results = setting.apply(base_method, config=config)
## Create the CustomMethod:
new_method = CustomMethod.from_argparse_args(args, dest="method")
# Get the results for the CustomMethod:
new_results = setting.apply(new_method, config=config)
print(f"\n\nComparison: BaseMethod vs CustomMethod:")
print(base_results.summary())
print(new_results.summary())
if __name__ == "__main__":
demo_manual()
# demo_command_line()
================================================
FILE: examples/advanced/continual_rl_demo.py
================================================
import sys
# This "hack" is required so we can run `python examples/continual_rl_demo.py`
sys.path.extend([".", ".."])
from sequoia.methods.stable_baselines3_methods import A2CMethod, DQNMethod
from sequoia.settings import (
ContinualRLSetting,
IncrementalRLSetting,
RLSetting,
TaskIncrementalRLSetting,
)
if __name__ == "__main__":
task_schedule = {
0: {"gravity": 10, "length": 0.2},
1000: {"gravity": 100, "length": 1.2},
2000: {"gravity": 10, "length": 0.2},
}
setting = ContinualRLSetting(
# setting = IncrementalRLSetting(
# setting = TaskIncrementalRLSetting(
# setting = RLSetting(
dataset="CartPole-v1",
train_max_steps=2000,
train_task_schedule=task_schedule,
)
# Create the method to use here:
# NOTE: The DQN method doesn't seem to work nearly as well as A2C.
# method = DQNMethod(train_steps_per_task=1_000)
method = A2CMethod(train_steps_per_task=1_000)
# You could change the hyper-parameters of the method too:
# method.hparams.buffer_size = 100
results = setting.apply(method)
print(results.summary())
================================================
FILE: examples/advanced/ewc_in_rl.py
================================================
""" Example of how to add a simplified regularization method to algos from
stable-baseline-3.
"""
from collections import deque
from copy import deepcopy
from dataclasses import dataclass
from typing import ClassVar, Dict, List, Optional, Type, TypeVar, Union
import gym
import torch
from nngeometry.generator.jacobian import Jacobian
from nngeometry.layercollection import LayerCollection
from nngeometry.object.pspace import PMatAbstract, PMatDiag, PMatKFAC, PVector
from simple_parsing import choice
from stable_baselines3.common.base_class import BaseAlgorithm
from stable_baselines3.common.policies import BasePolicy
from torch import Tensor
from torch.utils.data import DataLoader, TensorDataset
from sequoia.methods import register_method
from sequoia.methods.stable_baselines3_methods import StableBaselines3Method
from sequoia.methods.stable_baselines3_methods.policy_wrapper import PolicyWrapper
from sequoia.settings import TaskIncrementalRLSetting
from sequoia.settings.base import Actions, Environment, Method, Observations
from sequoia.utils.utils import dict_intersection
from sequoia.utils.logging_utils import get_logger
logger = get_logger(__name__)
Policy = TypeVar("Policy", bound=BasePolicy)
class NormRegularizer(PolicyWrapper[Policy]):
"""A Wrapper class that adds a `on_task_switch` and a `ewc_loss` method to
an nn.Module (in this particular case, a Policy from SB3.)
By subclassing PolicyWrapper, this is able to leverage some 'hooks' into the
optimizer of the policy.
"""
def __init__(self: Policy, *args, reg_coefficient: float = 1.0, ewc_p_norm: int = 2, **kwargs):
super().__init__(*args, **kwargs)
self.reg_coefficient = reg_coefficient
self.ewc_p_norm = ewc_p_norm
self.previous_model_weights: Dict[str, Tensor] = {}
self._previous_task: Optional[int] = None
self._n_switches: int = 0
def on_task_switch(self: Policy, task_id: Optional[int], *args, **kwargs) -> None:
"""Executed when the task switches (to either a known or unknown task)."""
logger.info(f"On task switch called: task_id={task_id}")
if self._previous_task is None and self._n_switches == 0 and not task_id:
logger.info("Starting the first task, no EWC update.")
elif task_id is None or task_id != self._previous_task:
# NOTE: We also switch between unknown tasks.
logger.info(
f"Switching tasks: {self._previous_task} -> {task_id}: "
f"Updating the EWC 'anchor' weights."
)
self._previous_task = task_id
self.previous_model_weights.clear()
self.previous_model_weights.update(
deepcopy({k: v.detach() for k, v in self.named_parameters()})
)
self._n_switches += 1
def get_loss(self: Policy) -> Union[float, Tensor]:
"""This will get called before the call to `policy.optimizer.step()`
from within the `train` method of the algos from stable-baselines3.
You can use this to return some kind of loss tensor to use.
"""
return self.reg_coefficient * self.ewc_loss()
def after_zero_grad(self: Policy):
"""Called after `self.policy.optimizer.zero_grad()` in the training
loop of the SB3 algos.
"""
# Backpropagate the loss here, by default, so that any grad clipping
# also affects the grads of the loss, for instance.
wrapper_loss = self.get_loss()
if isinstance(wrapper_loss, Tensor) and wrapper_loss != 0.0 and wrapper_loss.requires_grad:
logger.info(f"{type(self).__name__} loss: {wrapper_loss.item()}")
wrapper_loss.backward(retain_graph=True)
def before_optimizer_step(self: Policy):
"""Called before `self.policy.optimizer.step()` in the training
loop of the SB3 algos.
"""
def ewc_loss(self: Policy) -> Union[float, Tensor]:
"""Gets an 'ewc-like' regularization loss.
NOTE: This is a simplified version of EWC where the loss is the P-norm
between the current weights and the weights as they were on the begining
of the task.
"""
if self._previous_task is None:
# We're in the first task: do nothing.
return 0.0
old_weights: Dict[str, Tensor] = self.previous_model_weights
new_weights: Dict[str, Tensor] = dict(self.named_parameters())
loss = 0.0
for weight_name, (new_w, old_w) in dict_intersection(new_weights, old_weights):
loss += torch.dist(new_w, old_w.type_as(new_w), p=self.ewc_p_norm)
return loss
class EWCPolicy(NormRegularizer):
"""A Wrapper class that adds a `on_task_switch` and a `ewc_loss` method to
an nn.Module (in this particular case, a Policy from SB3) and implements the EWC method.
"""
def __init__(
self: Policy,
*args,
reg_coefficient: float = 1.0,
ewc_p_norm: int = 2,
fim_representation: PMatAbstract = PMatDiag,
**kwargs,
):
super().__init__(*args, reg_coefficient, ewc_p_norm, **kwargs)
self.FIMs: List[PMatAbstract] = None
self.previous_model_weights: PVector = None
self.FIM_representation = fim_representation
def consolidate(self, new_fims: List[PMatAbstract], task: int) -> None:
"""
Consolidates the previous FIMs and the new onces.
See online EWC in https://arxiv.org/pdf/1805.06370.pdf.
"""
if self.FIMs is None:
self.FIMs = new_fims
return
assert len(new_fims) == len(self.FIMs)
for i, (fim_previous, fim_new) in enumerate(zip(self.FIMs, new_fims)):
if fim_previous is None:
self.FIMs[i] = fim_new
else:
# consolidate the FIMs
self.FIMs[i] = EWCPolicy._consolidate_fims(fim_previous, fim_new, task)
@staticmethod
def _consolidate_fims(
fim_previous: PMatAbstract, fim_new: PMatAbstract, task: int
) -> PMatAbstract:
# consolidate the fim_new into fim_previous in place
if isinstance(fim_new, PMatDiag):
fim_previous.data = ((deepcopy(fim_new.data)) + fim_previous.data * (task)) / (task + 1)
elif isinstance(fim_new.data, dict):
for (n, p), (n_, p_) in zip(fim_previous.data.items(), fim_new.data.items()):
for item, item_ in zip(p, p_):
item.data = ((item.data * (task)) + deepcopy(item_.data)) / (task + 1)
return fim_previous
def on_task_switch(
self: Policy, task_id: Optional[int], dataloader: DataLoader, method: str = "a2c"
) -> None:
"""Executed when the task switches (to either a known or unknown task)."""
logger.info(f"On task switch called: task_id={task_id}")
if self._previous_task is None and self._n_switches == 0 and not task_id:
self._previous_task = task_id
logger.info("Starting the first task, no EWC update.")
self._n_switches += 1
elif task_id is None or self._previous_task is None or task_id > self._previous_task:
# we dont want to go here at test tiem
# NOTE: We also switch between unknown tasks.
logger.info(
f"Switching tasks: {self._previous_task} -> {task_id}: "
f"Updating the EWC 'anchor' weights."
)
self._previous_task = task_id
self.previous_model_weights = PVector.from_model(self).clone().detach()
# TODO: keepng to FIMs might be not the optimal way of doing this
new_fims = []
if method == "dqn":
function = self.q_net
n_output = self.action_space.n
else:
function = self
n_output = 1
# TODO: Import this FIM function, from wherever it was defined.
new_fim = FIM(
model=self,
loader=dataloader,
representation=self.FIM_representation,
n_output=n_output,
variant=method,
function=function,
device=self.device.type,
)
new_fims.append(new_fim)
if method == "a2c":
# apply EWC also to the value net
new_fim_critic = FIM(
model=self,
loader=dataloader,
representation=self.FIM_representation,
n_output=1,
variant="regression",
function=lambda *x: self(x[0])[1],
device=self.device.type,
)
new_fims.append(new_fim_critic)
self.consolidate(new_fims, task=self._previous_task)
self._n_switches += 1
def ewc_loss(self: Policy) -> Union[float, Tensor]:
"""Gets an 'ewc-like' regularization loss."""
regularizer = 0.0
if self._previous_task is None or self.reg_coefficient == 0 or self.FIMs is None:
# We're in the first task: do nothing.
return regularizer
v_current = PVector.from_model(self)
for fim in self.FIMs:
regularizer += fim.vTMv(v_current - self.previous_model_weights)
return regularizer
from sequoia.methods.stable_baselines3_methods import (
A2CModel,
DDPGModel,
DQNModel,
PPOModel,
SACModel,
TD3Model,
)
@register_method
@dataclass
class ExampleRegularizationMethod(StableBaselines3Method):
Model: ClassVar[Type[BaseAlgorithm]]
# You could use any of these 'backbones' from SB3:
Model = A2CModel # Works great! (fastest)
# Model = PPOModel # Works great! (somewhat fast)
# Model = SACModel # Works (seems to be quite a bit slower).
# These don't yet work, they have the same error, which seems to be
# related to the action space being Discrete:
# stable_baselines3/td3/td3.py", line 143, in train
# noise = replay_data.actions.clone().data.normal_(0, self.target_policy_noise)
# RuntimeError: "normal_kernel_cuda" not implemented for 'Long'
# Model = TD3Model # TODO
# Model = DDPGModel # TODO
# Model = DQNModel # Doesn't work: predictions have more than one value?!
# Coefficient for the EWC-like loss.
reg_coefficient: float = 1.0
# norm of the 'distance' used in the ewc-like loss above.
ewc_p_norm: int = 2
def create_model(self, train_env: gym.Env, valid_env: gym.Env) -> BaseAlgorithm:
# Create the model, as usual:
model = super().create_model(train_env, valid_env)
# 'Wrap' the algorithm's policy with the EWC wrapper.
model = NormRegularizer.wrap_algorithm(
model,
reg_coefficient=self.reg_coefficient,
ewc_p_norm=self.ewc_p_norm,
)
return model
def on_task_switch(self, task_id: Optional[int]) -> None:
"""Called when switching tasks in a CL setting.
If task labels are available, `task_id` will correspond to the index of
the new task. Otherwise, if task labels aren't available, `task_id` will
be `None`.
todo: use this to customize how your method handles task transitions.
"""
if self.model:
self.model.policy.on_task_switch(task_id)
@register_method
@dataclass
class EWCExampleMethod(StableBaselines3Method):
Model: ClassVar[Type[BaseAlgorithm]]
# Model = A2CModel # Works great! (fastest)
Model = DQNModel # Works great! (fastest)
# Coefficient for the EWC-like loss.
reg_coefficient: float = 1.0
# Number of observations to use for FIM calculation
total_steps_fim: int = 1000
# Fisher information type (diagonal or block diagobnal)
fim_representation: PMatAbstract = choice(
{"diagonal": PMatDiag, "block_diagonal": PMatKFAC}, default=PMatKFAC
)
def create_model(self, train_env: gym.Env, valid_env: gym.Env) -> BaseAlgorithm:
# Create the model, as usual:
model = super().create_model(train_env, valid_env)
# 'Wrap' the algorithm's policy with the EWC wrapper.
model = EWCPolicy.wrap_algorithm(
model,
reg_coefficient=self.reg_coefficient,
fim_representation=self.fim_representation,
)
return model
def on_task_switch(self, task_id: Optional[int]) -> None:
"""Called when switching tasks in a CL setting.
If task labels are available, `task_id` will correspond to the index of
the new task. Otherwise, if task labels aren't available, `task_id` will
be `None`.
"""
if self.model:
# create onbservation collection to use for FIM calculation
observation_collection = []
while len(observation_collection) < self.total_steps_fim:
state = self.model.env.reset()
for _ in range(1000):
action = self.get_actions(Observations(state), self.model.env.action_space)
state, _, done, _ = self.model.env.step(action)
observation_collection.append(torch.tensor(state).to(self.model.device))
if done:
break
dataloader = DataLoader(
TensorDataset(torch.cat(observation_collection)), batch_size=100, shuffle=False
)
if "a2c" in str(self.model.__class__):
rl_method = "a2c"
elif "dqn" in str(self.model.__class__):
rl_method = "dqn"
else:
raise NotImplementedError
self.model.policy.on_task_switch(task_id, dataloader, method=rl_method)
if __name__ == "__main__":
setting = TaskIncrementalRLSetting(
dataset="cartpole",
nb_tasks=2,
train_task_schedule={
0: {"gravity": 10, "length": 0.3},
1000: {"gravity": 10, "length": 0.5}, # second task is 'easier' than the first one.
},
train_max_steps=2000,
)
method = EWCExampleMethod(reg_coefficient=0.0)
results_without_reg = setting.apply(method)
method = EWCExampleMethod(reg_coefficient=100)
results_with_reg = setting.apply(method)
print("-" * 40)
print("WITHOUT EWC ")
print(results_without_reg.summary())
print(f"With EWC (coefficient={method.reg_coefficient}):")
print(results_with_reg.summary())
================================================
FILE: examples/advanced/hat_demo.py
================================================
import sys
from argparse import Namespace
from dataclasses import dataclass
from typing import Dict, NamedTuple, Optional, Tuple
import gym
import numpy as np
import torch
import tqdm
from gym import Space, spaces
from numpy import inf
from simple_parsing import ArgumentParser
from torch import Tensor
from sequoia.common import Config
from sequoia.common.spaces import Image
from sequoia.methods import register_method
from sequoia.settings import Environment, Method
from sequoia.settings.sl import TaskIncrementalSLSetting
from sequoia.settings.sl.environment import PassiveEnvironment
from sequoia.settings.sl.incremental import Actions, Observations, Rewards
class Masks(NamedTuple):
"""Named tuple for the masked tensors created in the HATNet."""
gc1: Tensor
gc2: Tensor
gc3: Tensor
gfc1: Tensor
gfc2: Tensor
class HatNet(torch.nn.Module):
"""
@inproceedings{serra2018overcoming,
title={Overcoming Catastrophic Forgetting with Hard Attention to the Task},
author={Serra, Joan and Suris, Didac and Miron, Marius and Karatzoglou, Alexandros},
booktitle={International Conference on Machine Learning},
pages={4548--4557},
year={2018}
}
The model is where the model weights are initialized.
Just like a classic PyTorch, here the different layers and components of the model are defined
"""
def __init__(self, image_space: Image, n_classes_per_task: Dict[int, int], s_hat: int = 50):
super().__init__()
ncha = image_space.channels
size = image_space.width
self.n_classes_per_task = n_classes_per_task
self.s_hat = s_hat
self.c1 = torch.nn.Conv2d(ncha, 64, kernel_size=size // 8)
s = compute_conv_output_size(size, size // 8)
s //= 2
self.c2 = torch.nn.Conv2d(64, 128, kernel_size=size // 10)
s = compute_conv_output_size(s, size // 10)
s //= 2
self.c3 = torch.nn.Conv2d(128, 256, kernel_size=2)
s = compute_conv_output_size(s, 2)
s //= 2
self.smid = s
self.maxpool = torch.nn.MaxPool2d(2)
self.relu = torch.nn.ReLU()
self.drop1 = torch.nn.Dropout(0.2)
self.drop2 = torch.nn.Dropout(0.5)
self.fc1 = torch.nn.Linear(256 * self.smid * self.smid, 2048)
self.fc2 = torch.nn.Linear(2048, 2048)
self.output_layers = torch.nn.ModuleList()
n_tasks = len(self.n_classes_per_task)
# TODO: (@lebrice) Here I'm 'fixing' this, by making it so each output head has
# as many outputs as there are classes in total. It's not super efficient, but
# it should work.
total_classes = sum(self.n_classes_per_task.values())
for task_index, n_classes_in_task in self.n_classes_per_task.items():
self.output_layers.append(torch.nn.Linear(2048, total_classes))
self.gate = torch.nn.Sigmoid()
# All embedding stuff should start with 'e'
self.ec1 = torch.nn.Embedding(n_tasks, 64)
self.ec2 = torch.nn.Embedding(n_tasks, 128)
self.ec3 = torch.nn.Embedding(n_tasks, 256)
self.efc1 = torch.nn.Embedding(n_tasks, 2048)
self.efc2 = torch.nn.Embedding(n_tasks, 2048)
self.flatten = torch.nn.Flatten()
self.loss = torch.nn.CrossEntropyLoss()
self.current_task: Optional[int] = 0
def forward(self, observations: TaskIncrementalSLSetting.Observations) -> Tuple[Tensor, Masks]:
observations.as_list_of_tuples()
x = observations.x
t = observations.task_labels
# BUG: This won't work if task_labels is None (which is the case at
# test-time in the ClassIncrementalSetting)
masks = self.mask(t, s_hat=self.s_hat)
gc1, gc2, gc3, gfc1, gfc2 = masks
# Gated
h = self.maxpool(self.drop1(self.relu(self.c1(x))))
h = h * gc1.unsqueeze(2).unsqueeze(3)
h = self.maxpool(self.drop1(self.relu(self.c2(h))))
h = h * gc2.unsqueeze(2).unsqueeze(3)
h = self.maxpool(self.drop2(self.relu(self.c3(h))))
h = h * gc3.unsqueeze(2).unsqueeze(3)
h = self.flatten(h)
h = self.drop2(self.relu(self.fc1(h)))
h = h * gfc1.expand_as(h)
h = self.drop2(self.relu(self.fc2(h)))
h = h * gfc2.expand_as(h)
# Each batch can have elements of more than one Task (in test)
# In Task Incremental Learning, each task have it own classification head.
y: Optional[Tensor] = None
task_masks = {}
for task_id in set(t.tolist()):
task_mask = t == task_id
task_masks[task_id] = task_mask
y_pred_t = self.output_layers[task_id](h.clone())
if y is None:
y = y_pred_t
else:
y[task_mask] = y_pred_t[task_mask]
assert y is not None
return y, masks
def mask(self, t: Tensor, s_hat: float) -> Masks:
gc1 = self.gate(s_hat * self.ec1(t))
gc2 = self.gate(s_hat * self.ec2(t))
gc3 = self.gate(s_hat * self.ec3(t))
gfc1 = self.gate(s_hat * self.efc1(t))
gfc2 = self.gate(s_hat * self.efc2(t))
return Masks(gc1, gc2, gc3, gfc1, gfc2)
def shared_step(
self, batch: Tuple[Observations, Optional[Rewards]], environment: Environment
) -> Tuple[Tensor, Dict]:
"""Shared step used for both training and validation.
Parameters
----------
batch : Tuple[Observations, Optional[Rewards]]
Batch containing Observations, and optional Rewards. When the Rewards are
None, it means that we'll need to provide the Environment with actions
before we can get the Rewards (e.g. image labels) back.
This happens for example when being applied in a Setting which cares about
sample efficiency or training performance, for example.
environment : Environment
The environment we're currently interacting with. Used to provide the
rewards when they aren't already part of the batch (as mentioned above).
Returns
-------
Tuple[Tensor, Dict]
The Loss tensor, and a dict of metrics to be logged.
"""
# Since we're training on a Passive environment, we will get both observations
# and rewards, unless we're being evaluated based on our training performance,
# in which case we will need to send actions to the environments before we can
# get the corresponding rewards (image labels) back.
observations: Observations = batch[0]
rewards: Optional[Rewards] = batch[1]
# Get the predictions:
logits, _ = self(observations)
y_pred = logits.argmax(-1)
if rewards is None:
# If the rewards in the batch were None, it means we're expected to give
# actions before we can get rewards back from the environment.
# This happens when the Setting is monitoring our training performance.
rewards = environment.send(Actions(y_pred))
assert rewards is not None
image_labels = rewards.y
loss = self.loss(logits, image_labels)
accuracy = (y_pred == image_labels).sum().float() / len(image_labels)
metrics_dict = {"accuracy": accuracy}
return loss, metrics_dict
def compute_conv_output_size(
Lin: int, kernel_size: int, stride: int = 1, padding: int = 0, dilation: int = 1
) -> int:
return int(np.floor((Lin + 2 * padding - dilation * (kernel_size - 1) - 1) / float(stride) + 1))
@register_method
class HatDemoMethod(Method, target_setting=TaskIncrementalSLSetting):
"""
Here we implement the method according to the characteristics and methodology of the current proposal.
It should be as much as possible agnostic to the model and setting we are going to use.
The method proposed can be specific to a setting to make comparisons easier.
Here what we control is the model's training process, given a setting that delivers data in a certain way.
"""
@dataclass
class HParams:
"""Hyper-parameters of the Settings."""
# Learning rate of the optimizer.
learning_rate: float = 0.001
# Batch size
batch_size: int = 128
# weight/importance of the task embedding to the gate function
s_hat: float = 50.0
# Maximum number of training epochs per task
max_epochs_per_task: int = 2
def __init__(self, hparams: HParams = None):
self.hparams: HatDemoMethod.HParams = hparams or self.HParams()
# We will create those when `configure` will be called, before training.
self.model: HatNet
self.optimizer: torch.optim.Optimizer
def configure(self, setting: TaskIncrementalSLSetting):
"""Called before the method is applied on a setting (before training).
You can use this to instantiate your model, for instance, since this is
where you get access to the observation & action spaces.
"""
setting.batch_size = self.hparams.batch_size
assert (
setting.increment == setting.test_increment
), "Assuming same number of classes per task for training and testing."
n_classes_per_task = {
i: setting.num_classes_in_task(i, train=True) for i in range(setting.nb_tasks)
}
image_space: Image = setting.observation_space["x"]
self.model = HatNet(
image_space=image_space,
n_classes_per_task=n_classes_per_task,
s_hat=self.hparams.s_hat,
)
self.optimizer = torch.optim.Adam(
self.model.parameters(),
lr=self.hparams.learning_rate,
)
def fit(self, train_env: PassiveEnvironment, valid_env: PassiveEnvironment):
"""
Train loop
Different Settings can return elements from tasks in an other way,
be it class incremental, task incremental, etc.
Batch can have information about en environment, rewards, input, task labels, etc.
And we call the forward training function of our method, independent of the settings
"""
# configure() will have been called by the setting before we get here,
best_val_loss = inf
best_epoch = 0
for epoch in range(self.hparams.max_epochs_per_task):
self.model.train()
print(f"Starting epoch {epoch}")
# Training loop:
with tqdm.tqdm(train_env) as train_pbar:
postfix = {}
train_pbar.set_description(f"Training Epoch {epoch}")
for i, batch in enumerate(train_pbar):
loss, metrics_dict = self.model.shared_step(
batch,
environment=train_env,
)
self.optimizer.zero_grad()
loss.backward()
self.optimizer.step()
postfix.update(metrics_dict)
train_pbar.set_postfix(postfix)
# Validation loop:
self.model.eval()
torch.set_grad_enabled(False)
with tqdm.tqdm(valid_env) as val_pbar:
postfix = {}
val_pbar.set_description(f"Validation Epoch {epoch}")
epoch_val_loss = 0.0
for i, batch in enumerate(val_pbar):
batch_val_loss, metrics_dict = self.model.shared_step(
batch,
environment=valid_env,
)
epoch_val_loss += batch_val_loss
postfix.update(metrics_dict, val_loss=epoch_val_loss)
val_pbar.set_postfix(postfix)
torch.set_grad_enabled(True)
if epoch_val_loss < best_val_loss:
best_val_loss = epoch_val_loss
best_epoch = i
def get_actions(self, observations: Observations, action_space: gym.Space) -> Actions:
"""Get a batch of predictions (aka actions) for these observations."""
with torch.no_grad():
logits, _ = self.model(observations)
# Get the predicted classes
y_pred = logits.argmax(dim=-1)
return self.target_setting.Actions(y_pred)
def on_task_switch(self, task_id: Optional[int]):
# This method gets called if task boundaries are known in the current
# setting. Furthermore, if task labels are available, task_id will be
# the index of the new task. If not, task_id will be None.
# TODO: Does this method actually work when task_id is None?
self.model.current_task = task_id
@classmethod
def add_argparse_args(cls, parser: ArgumentParser) -> None:
parser.add_arguments(cls.HParams, dest="hparams")
# You can also add arguments as usual:
# parser.add_argument("--foo", default=123)
@classmethod
def from_argparse_args(cls, args: Namespace) -> "HatDemoMethod":
hparams: HatDemoMethod.HParams = args.hparams
# foo: int = args.foo
method = cls(hparams=hparams)
return method
if __name__ == "__main__":
# Example: Evaluate a Method on a single CL setting:
parser = ArgumentParser(description=__doc__, add_dest_to_option_strings=False)
"""
We must define 3 main components:
1.- Setting: It is the continual learning scenario that we are working, SL or RL, TI or CI
Each settings has it own parameters that can be customized.
2.- Model: Is the parameters and layers of the model, just like in PyTorch.
We can use a predefined model or create your own
3.- Method: It is how we are going to use what the settings give us to train our model.
Same as before, we can define our own or use pre-defined Methods.
"""
## Add arguments for the Method, the Setting, and the Config.
## (Config contains options like the log_dir, the data_dir, etc.)
HatDemoMethod.add_argparse_args(parser, dest="method")
parser.add_arguments(TaskIncrementalSLSetting, dest="setting")
parser.add_arguments(Config, "config")
args = parser.parse_args()
## Create the Method from the args, and extract the Setting, and the Config:
method: HatDemoMethod = HatDemoMethod.from_argparse_args(args, dest="method")
setting: TaskIncrementalSLSetting = args.setting
config: Config = args.config
## Apply the method to the setting, optionally passing in a Config,
## producing Results.
results = setting.apply(method, config=config)
print(results.summary())
print(f"objective: {results.objective}")
================================================
FILE: examples/advanced/hparam_tuning.py
================================================
"""Runs a hyper-parameter tuning sweep, using Orion for HPO and wandb for visualization.
# PREREQUISITES:
1. (Optional): If you want to run the sweep on the monsterkong env:
At the time of writing, the monsterkong repo is private. Once the challenge is out,
it will most probably be made public. In the meantime, you'll need to ask
@mattriemer for access to the MonsterKong_examples repo.
```
pip install -e .[rl]
```
2. Install the repo, along with the optional dependencies for Hyper-Parameter
Optimization (HPO):
```console
pip install -e .[hpo]
```
NOTE: You can also fuse the two steps above with `pip install -e .[rl,hpo]`
3. (Optional) Setup a database to hold the hyper-parameter configurations, following
the [Orion database configuration documentation](https://orion.readthedocs.io/en/stable/install/database.html)
The quickest way to get this setup is to run the `orion db setup` wizard, entering
"pickleddb" as the database type:
```console
$ orion db setup
Enter the database type: (default: mongodb) pickleddb
Enter the database name: (default: test)
Enter the database host: (default: localhost)
Default configuration file will be saved at:
/home//.config/orion.core/orion_config.yaml
```
"""
import wandb
from sequoia.common import Config
from sequoia.methods.base_method import BaseMethod
from sequoia.settings import Results, Setting, TraditionalSLSetting
from sequoia.utils.logging_utils import get_logger
logger = get_logger(__name__)
if __name__ == "__main__":
from simple_parsing import ArgumentParser
## Create the Setting:
from sequoia.settings import RLSetting
setting = RLSetting(dataset="monsterkong")
# from sequoia.settings import TaskIncrementalSLSetting
# setting = TaskIncrementalSLSetting(dataset="cifar10")
## Create the BaseMethod:
# Option 1: Create the method manually:
# method = BaseMethod()
# Option 2: From the command-line:
method, unused_args = BaseMethod.from_known_args() # allow unused args.
# parser = ArgumentParser(description=__doc__)
# BaseMethod.add_argparse_args(parser, dest="method")
# args, unused_args = parser.parse_known_args()
# method: BaseMethod = BaseMethod.from_argparse_args(args, dest="method")
# Search space for the Hyper-Parameter optimization algorithm.
# NOTE: This is just a copy of the spaces that are auto-generated from the fields of
# the `BaseModel.HParams` class. You can change those as you wish though.
search_space = {
"learning_rate": "loguniform(1e-06, 1e-02, default_value=0.001)",
"weight_decay": "loguniform(1e-12, 1e-03, default_value=1e-06)",
"optimizer": "choices(['sgd', 'adam', 'rmsprop'], default_value='adam')",
"encoder": "choices({'resnet18': 0.5, 'simple_convnet': 0.5}, default_value='resnet18')",
"output_head": {
"activation": "choices(['relu', 'tanh', 'elu', 'gelu', 'relu6'], default_value='tanh')",
"dropout_prob": "uniform(0, 0.8, default_value=0.2)",
"gamma": "uniform(0.9, 0.999, default_value=0.99)",
"normalize_advantages": "choices([True, False])",
"actor_loss_coef": "uniform(0.1, 1, default_value=0.5)",
"critic_loss_coef": "uniform(0.1, 1, default_value=0.5)",
"entropy_loss_coef": "uniform(0, 1, discrete=True, default_value=0)",
},
}
best_hparams, best_results = method.hparam_sweep(
setting, search_space=search_space, experiment_id="123"
)
print(f"Best hparams: {best_hparams}, best perf: {best_results}")
# results = setting.apply(method, config=Config(debug=True))
================================================
FILE: examples/advanced/pnn/__init__.py
================================================
================================================
FILE: examples/advanced/pnn/layers.py
================================================
import torch.nn as nn
import torch.nn.functional as F
from torchvision import transforms
"""
Based on https://github.com/TomVeniat/ProgressiveNeuralNetworks.pytorch
"""
class PNNConvLayer(nn.Module):
def __init__(self, col, depth, n_in, n_out, kernel_size=3):
super(PNNConvLayer, self).__init__()
self.col = col
self.layer = nn.Conv2d(n_in, n_out, kernel_size, stride=2, padding=1)
self.u = nn.ModuleList()
if depth > 0:
self.u.extend(
[nn.Conv2d(n_in, n_out, kernel_size, stride=2, padding=1) for _ in range(col)]
)
def forward(self, inputs):
if not isinstance(inputs, list):
inputs = [inputs]
cur_column_out = self.layer(inputs[-1])
prev_columns_out = [mod(x) for mod, x in zip(self.u, inputs)]
return F.relu(cur_column_out + sum(prev_columns_out))
class PNNLinearBlock(nn.Module):
def __init__(self, col: int, depth: int, n_in: int, n_out: int):
super(PNNLinearBlock, self).__init__()
self.layer = nn.Linear(n_in, n_out)
self.u = nn.ModuleList()
if depth > 0:
self.u.extend([nn.Linear(n_in, n_out) for _ in range(col)])
def forward(self, inputs):
if not isinstance(inputs, list):
inputs = [inputs]
cur_column_out = self.layer(inputs[-1])
prev_columns_out = [mod(x) for mod, x in zip(self.u, inputs)]
return F.relu(cur_column_out + sum(prev_columns_out))
================================================
FILE: examples/advanced/pnn/model_rl.py
================================================
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import transforms
from .layers import PNNConvLayer, PNNLinearBlock
class PnnA2CAgent(nn.Module):
"""
@article{rusu2016progressive,
title={Progressive neural networks},
author={Rusu, Andrei A and Rabinowitz, Neil C and Desjardins, Guillaume and Soyer, Hubert and Kirkpatrick, James and Kavukcuoglu, Koray and Pascanu, Razvan and Hadsell, Raia},
journal={arXiv preprint arXiv:1606.04671},
year={2016}
}
"""
def __init__(self, arch="mlp", hidden_size=256):
super(PnnA2CAgent, self).__init__()
self.columns_actor = nn.ModuleList([])
self.columns_critic = nn.ModuleList([])
self.columns_conv = nn.ModuleList([])
self.arch = arch
self.hidden_size = hidden_size
# Original size 3 x 400 x 600
self.transformation = transforms.Compose(
[
transforms.ToPILImage(),
transforms.Resize(256),
transforms.CenterCrop(224),
transforms.ToTensor(),
]
)
def forward(self, observations):
assert (
self.columns_actor
), "PNN should at least have one column (missing call to `new_task` ?)"
t = observations.task_labels
if self.arch == "mlp":
x = torch.from_numpy(observations.x).unsqueeze(0).float()
inputs_critic = [c[1](c[0](x)) for c in self.columns_critic]
inputs_actor = [c[1](c[0](x)) for c in self.columns_actor]
outputs_critic = []
outputs_actor = []
for i, column in enumerate(self.columns_critic):
outputs_critic.append(column[2](inputs_critic[: i + 1]))
outputs_actor.append(self.columns_actor[i][2](inputs_actor[: i + 1]))
ind_depth = 3
else:
x = self.transfor_img(observations.x).unsqueeze(0).float()
inputs = [c[1](c[0](x)) for c in self.columns_conv]
outputs = []
for i, column in enumerate(self.columns_conv):
outputs.append(column[3](column[2](inputs[: i + 1])))
inputs = outputs
outputs = []
for i, column in enumerate(self.columns_conv):
outputs.append(column[5](column[4](inputs[: i + 1])))
inputs_critic = [c[6](outputs[i]).view(1, -1) for i, c in enumerate(self.columns_conv)]
inputs_actor = inputs_critic[:]
outputs_critic = []
outputs_actor = []
for i, column in enumerate(self.columns_critic):
outputs_critic.append(column[0](inputs_critic[: i + 1]))
outputs_actor.append(self.columns_actor[i][0](inputs_actor[: i + 1]))
ind_depth = 1
critic = []
for i, column in enumerate(self.columns_critic):
critic.append(column[ind_depth](outputs_critic[i]))
actor = []
for i, column in enumerate(self.columns_actor):
actor.append(F.softmax(column[ind_depth](outputs_actor[i]), dim=1))
return critic[t], actor[t]
def new_task(self, device, num_inputs, num_actions=5):
task_id = len(self.columns_actor)
if self.arch == "conv":
sizes = [num_inputs, 32, 64, self.hidden_size]
modules_conv = nn.Sequential()
modules_conv.add_module("Conv1", PNNConvLayer(task_id, 0, sizes[0], sizes[1]))
modules_conv.add_module("MaxPool1", nn.MaxPool2d(3))
modules_conv.add_module("Conv2", PNNConvLayer(task_id, 1, sizes[1], sizes[2]))
modules_conv.add_module("MaxPool2", nn.MaxPool2d(3))
modules_conv.add_module("Conv3", PNNConvLayer(task_id, 2, sizes[2], sizes[3]))
modules_conv.add_module("MaxPool3", nn.MaxPool2d(3))
modules_conv.add_module("globavgpool2d", nn.AdaptiveAvgPool2d((1, 1)))
self.columns_conv.append(modules_conv)
modules_actor = nn.Sequential()
modules_critic = nn.Sequential()
if self.arch == "mlp":
modules_actor.add_module("linAc1", nn.Linear(num_inputs, self.hidden_size))
modules_actor.add_module("relAc", nn.ReLU(inplace=True))
modules_actor.add_module(
"linAc2", PNNLinearBlock(task_id, 1, self.hidden_size, self.hidden_size)
)
modules_actor.add_module("linAc3", nn.Linear(self.hidden_size, num_actions))
if self.arch == "mlp":
modules_critic.add_module("linCr1", nn.Linear(num_inputs, self.hidden_size))
modules_critic.add_module("relCr", nn.ReLU(inplace=True))
modules_critic.add_module(
"linCr2", PNNLinearBlock(task_id, 1, self.hidden_size, self.hidden_size)
)
modules_critic.add_module("linCr3", nn.Linear(self.hidden_size, 1))
self.columns_actor.append(modules_actor)
self.columns_critic.append(modules_critic)
print("Add column of the new task")
def unfreeze_columns(self):
for i, c in enumerate(self.columns_actor):
for params in c.parameters():
params.requires_grad = True
for params in self.columns_critic[i].parameters():
params.requires_grad = True
for i, c in enumerate(self.columns_conv):
for params in c.parameters():
params.requires_grad = True
def freeze_columns(self, skip=None):
if skip == None:
skip = []
self.unfreeze_columns()
for i, c in enumerate(self.columns_actor):
if i not in skip:
for params in c.parameters():
params.requires_grad = False
for params in self.columns_critic[i].parameters():
params.requires_grad = False
for i, c in enumerate(self.columns_conv):
if i not in skip:
for params in c.parameters():
params.requires_grad = False
print("Freeze columns from previous tasks")
def parameters(self, task_id):
param = []
for p in self.columns_critic[task_id].parameters():
param.append(p)
for p in self.columns_actor[task_id].parameters():
param.append(p)
if len(self.columns_conv) > 0:
for p in self.columns_conv[task_id].parameters():
param.append(p)
return param
def transfor_img(self, img):
return self.transformation(img)
# return lambda img: imresize(img[35:195].mean(2), (80,80)).astype(np.float32).reshape(1,80,80)/255.
================================================
FILE: examples/advanced/pnn/model_sl.py
================================================
from typing import Dict, List, Optional, Tuple
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch import Tensor
from sequoia.settings import Actions, PassiveEnvironment
from sequoia.settings.sl.incremental import Observations, Rewards
from .layers import PNNConvLayer, PNNLinearBlock
class PnnClassifier(nn.Module):
"""
@article{rusu2016progressive,
title={Progressive neural networks},
author={Rusu, Andrei A and Rabinowitz, Neil C and Desjardins, Guillaume and Soyer, Hubert and Kirkpatrick, James and Kavukcuoglu, Koray and Pascanu, Razvan and Hadsell, Raia},
journal={arXiv preprint arXiv:1606.04671},
year={2016}
}
"""
def __init__(self, n_layers):
super().__init__()
self.n_layers = n_layers
self.columns = nn.ModuleList([])
self.loss = torch.nn.CrossEntropyLoss()
self.device = None
self.n_tasks = 0
self.n_classes_per_task: List[int] = []
def forward(self, observations):
assert self.columns, "PNN should at least have one column (missing call to `new_task` ?)"
x = observations.x
x = torch.flatten(x, start_dim=1)
labels = observations.task_labels
# TODO: Debug this:
inputs = [
c[0](x) + n_classes_in_task
for n_classes_in_task, c in zip(self.n_classes_per_task, self.columns)
]
for l in range(1, self.n_layers):
outputs = []
for i, column in enumerate(self.columns):
outputs.append(column[l](inputs[: i + 1]))
inputs = outputs
y: Optional[Tensor] = None
task_masks = {}
for task_id in set(labels.tolist()):
task_mask = labels == task_id
task_masks[task_id] = task_mask
if y is None:
y = inputs[task_id]
else:
y[task_mask] = inputs[task_id][task_mask]
assert y is not None, "Can't get prediction in model PNN"
return y
# def new_task(self, device, num_inputs, num_actions = 5):
def new_task(self, device, sizes: List[int]):
assert len(sizes) == self.n_layers + 1, (
f"Should have the out size for each layer + input size (got {len(sizes)} "
f"sizes but {self.n_layers} layers)."
)
self.n_tasks += 1
# TODO: Fix this to use the actual number of classes per task.
self.n_classes_per_task.append(2)
task_id = len(self.columns)
modules = []
for i in range(0, self.n_layers):
modules.append(PNNLinearBlock(col=task_id, depth=i, n_in=sizes[i], n_out=sizes[i + 1]))
new_column = nn.ModuleList(modules).to(device)
self.columns.append(new_column)
self.device = device
print("Add column of the new task")
def freeze_columns(self, skip=None):
if skip == None:
skip = []
for i, c in enumerate(self.columns):
for params in c.parameters():
params.requires_grad = True
for i, c in enumerate(self.columns):
if i not in skip:
for params in c.parameters():
params.requires_grad = False
print("Freeze columns from previous tasks")
def shared_step(
self,
batch: Tuple[Observations, Optional[Rewards]],
environment: PassiveEnvironment,
):
"""Shared step used for both training and validation.
Parameters
----------
batch : Tuple[Observations, Optional[Rewards]]
Batch containing Observations, and optional Rewards. When the Rewards are
None, it means that we'll need to provide the Environment with actions
before we can get the Rewards (e.g. image labels) back.
This happens for example when being applied in a Setting which cares about
sample efficiency or training performance, for example.
environment : Environment
The environment we're currently interacting with. Used to provide the
rewards when they aren't already part of the batch (as mentioned above).
Returns
-------
Tuple[Tensor, Dict]
The Loss tensor, and a dict of metrics to be logged.
"""
# Since we're training on a Passive environment, we will get both observations
# and rewards, unless we're being evaluated based on our training performance,
# in which case we will need to send actions to the environments before we can
# get the corresponding rewards (image labels).
observations: Observations = batch[0].to(self.device)
rewards: Optional[Rewards] = batch[1]
# Get the predictions:
logits = self(observations)
y_pred = logits.argmax(-1)
# TODO: PNN is coded for the DomainIncrementalSetting, where the action space
# is the same for each task.
# Get the rewards, if necessary:
if rewards is None:
rewards = environment.send(Actions(y_pred))
image_labels = rewards.y.to(self.device)
# print(logits.size())
loss = self.loss(logits, image_labels)
accuracy = (y_pred == image_labels).sum().float() / len(image_labels)
metrics_dict = {"accuracy": accuracy}
return loss, metrics_dict
def parameters(self, task_id):
return self.columns[task_id].parameters()
================================================
FILE: examples/advanced/pnn/pnn_method.py
================================================
import sys
from argparse import Namespace
from dataclasses import dataclass
from typing import Any, Dict, Optional, Tuple, Union
import gym
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import tqdm
from gym import spaces
from gym.spaces import Box
from numpy import inf
from scipy.signal import lfilter
from simple_parsing import ArgumentParser
from torchvision import transforms
from examples.advanced.pnn.model_rl import PnnA2CAgent
from examples.advanced.pnn.model_sl import PnnClassifier
from sequoia import Environment
from sequoia.common import Config
from sequoia.common.spaces import Image
from sequoia.common.transforms.utils import is_image
from sequoia.settings import Actions, Method, Observations, Rewards, Setting
from sequoia.settings.assumptions import IncrementalAssumption
from sequoia.settings.rl import ActiveEnvironment, RLSetting, TaskIncrementalRLSetting
from sequoia.settings.sl import (
DomainIncrementalSLSetting,
PassiveEnvironment,
SLSetting,
TaskIncrementalSLSetting,
)
class PnnMethod(Method, target_setting=Setting):
"""
Here we implement the PNN Method according to the characteristics and methodology of
the current proposal. It should be as much as possible agnostic to the model and
setting we are going to use.
The method proposed can be specific to a setting to make comparisons easier.
Here what we control is the model's training process, given a setting that delivers
data in a certain way.
"""
@dataclass
class HParams:
"""Hyper-parameters of the Pnn method."""
# Learning rate of the optimizer. Defauts to 0.0001 when in SL.
learning_rate: float = 2e-4
num_steps: int = 200 # (only applicable in RL settings.)
# Discount factor (Only used in RL settings).
gamma: float = 0.99
# Number of hidden units (only used in RL settings.)
hidden_size: int = 256
# Batch size in SL, and number of parallel environments in RL.
# Defaults to None in RL, and 32 when in SL.
batch_size: Optional[int] = None
# Maximum number of training epochs per task. (only used in SL Settings)
max_epochs_per_task: int = 2
def __init__(self, hparams: HParams = None):
# We will create those when `configure` will be called, before training.
self.config: Optional[Config] = None
self.task_id: Optional[int] = 0
self.hparams: Optional[PnnMethod.HParams] = hparams
self.model: Union[PnnA2CAgent, PnnClassifier]
self.optimizer: torch.optim.Optimizer
def configure(self, setting: Setting):
"""Called before the method is applied on a setting (before training).
You can use this to instantiate your model, for instance, since this is
where you get access to the observation & action spaces.
"""
input_space: Box = setting.observation_space["x"]
task_label_space = setting.observation_space["task_labels"]
# For now all Settings have `Discrete` (i.e. classification) action spaces.
action_space: spaces.Discrete = setting.action_space
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
self.num_actions = action_space.n
self.num_inputs = np.prod(input_space.shape)
self.added_tasks = []
if isinstance(setting, RLSetting):
# If we're applied to an RL setting:
# Used these as the default hparams in RL:
self.hparams = self.hparams or self.HParams(
learning_rate=2e-4,
num_steps=200,
gamma=0.99,
hidden_size=256,
batch_size=None,
)
assert self.hparams
self.train_steps_per_task = setting.steps_per_task
# We want a batch_size of None, i.e. only one observation at a time.
setting.batch_size = None
self.num_steps = self.hparams.num_steps
# Otherwise, we can train basically as long as we want on each task.
self.loss_function = {
"gamma": self.hparams.gamma,
}
x_space = setting.observation_space.x
if is_image(setting.observation_space.x):
# Observing pixel input.
self.arch = "conv"
else:
# Observing state input (e.g. the 4 floats in cartpole rather than images)
self.arch = "mlp"
self.model = PnnA2CAgent(self.arch, self.hparams.hidden_size)
else:
# If we're applied to a Supervised Learning setting:
# Used these as the default hparams in SL:
self.hparams = self.hparams or self.HParams(
learning_rate=0.0001,
batch_size=32,
)
if self.hparams.batch_size is None:
self.hparams.batch_size = 32
# Set the batch size on the setting.
setting.batch_size = self.hparams.batch_size
# For now all Settings on the supervised side of the tree have images as
# inputs, so the observation spaces are of type `Image` (same as Box, but with
# additional `h`, `w`, `c` and `b` attributes).
assert isinstance(input_space, Image)
assert (
setting.increment == setting.test_increment
), "Assuming same number of classes per task for training and testing."
# TODO: (@lebrice): Temporarily 'fixing' this by making it so each output
# head has as many outputs as there are classes in total, which might make
# no sense, but currently works.
# It would be better to refactor this so that each output head can have only
# as many outputs as is required, and then reshape / offset the predictions.
n_outputs = setting.increment
n_outputs = setting.action_space.n
self.layer_size = [self.num_inputs, 256, n_outputs]
self.model = PnnClassifier(
n_layers=len(self.layer_size) - 1,
)
def on_task_switch(self, task_id: Optional[int]) -> None:
"""Called when switching tasks in a CL setting."""
# This method gets called if task boundaries are known in the current
# setting. Furthermore, if task labels are available, task_id will be
# the index of the new task. If not, task_id will be None.
# For example, you could do something like this:
# self.model.current_task = task_id
# This freezes all columns except the one for the next task.. but there might
# not yet be a column for the new task!
self.model.freeze_columns(skip=[task_id])
if task_id not in self.added_tasks:
if isinstance(self.model, PnnA2CAgent):
self.model.new_task(
device=self.device,
num_inputs=self.num_inputs,
num_actions=self.num_actions,
)
else:
self.model.new_task(device=self.device, sizes=self.layer_size)
self.added_tasks.append(task_id)
self.task_id = task_id
def set_optimizer(self):
self.optimizer = torch.optim.Adam(
self.model.parameters(self.task_id),
lr=self.hparams.learning_rate,
)
def get_actions(self, observations: Observations, action_space: spaces.Space) -> Actions:
"""Get a batch of predictions (aka actions) for the given observations."""
observations = observations.to(self.device)
with torch.no_grad():
if isinstance(self.model, PnnA2CAgent):
predictions = self.model(observations)
_, logit = predictions
# get the predicted action:
action = torch.argmax(logit).item()
else:
logits = self.model(observations)
# Get the predicted classes
y_pred = logits.argmax(dim=-1)
action = y_pred
assert action in action_space, (action, action_space)
return action
def fit(self, train_env: Environment, valid_env: Environment):
"""Train and validate this method using the "environments" for the current task.
NOTE: `train_env` and `valid_env` are both `gym.Env`s as well as `DataLoader`s.
This means that if you want to write a "regular" SL training loop, you totally
can, and if you want to write you RL-style training loop, you can also do that.
"""
if isinstance(train_env.unwrapped, PassiveEnvironment):
self.fit_sl(train_env, valid_env)
else:
self.fit_rl(train_env, valid_env)
def fit_rl(self, train_env: gym.Env, valid_env: gym.Env):
"""Training loop for Reinforcement Learning (a.k.a. "active") environment."""
"""
base on https://towardsdatascience.com/understanding-actor-critic-methods-931b97b6df3f
"""
if self.model is None:
self.model = PnnA2CAgent(self.arch, self.hparams.hidden_size)
assert isinstance(self.model, PnnA2CAgent)
self.set_optimizer()
assert self.hparams
# self.model.float()
all_lengths = []
average_lengths = []
all_rewards = []
entropy_term = 0
for episode in range(self.train_steps_per_task):
values = []
rewards = []
log_probs = []
state = train_env.reset()
for steps in range(self.num_steps):
value, policy_dist = self.model(state)
value = value.item()
dist = policy_dist.detach().numpy()
action = np.random.choice(self.num_actions, p=np.squeeze(dist))
log_prob = torch.log(policy_dist.squeeze(0)[action])
entropy = -np.sum(np.mean(dist) * np.log(dist))
new_state, reward, done, _ = train_env.step(action)
rewards.append(reward.y)
values.append(value)
log_probs.append(log_prob)
entropy_term += entropy
state = new_state
if done or steps == self.num_steps - 1:
Qval, _ = self.model(state)
Qval = Qval.item()
all_rewards.append(np.sum(rewards))
all_lengths.append(steps)
average_lengths.append(np.mean(all_lengths[-10:]))
if episode % 10 == 0:
print(
f"episode: {episode}, "
f"reward: {np.sum(rewards)}, "
f"total length: {steps}, "
f"average length: {average_lengths[-1]}"
)
break
Qvals = np.zeros_like(values)
for t in reversed(range(len(rewards))):
Qval = rewards[t] + self.hparams.gamma * Qval
Qvals[t] = Qval
# update actor critic
values_tensor = torch.as_tensor(values, dtype=torch.float)
Qvals = torch.as_tensor(Qvals, dtype=torch.float)
log_probs_tensor = torch.stack(log_probs)
advantage = Qvals - values_tensor
actor_loss = (-log_probs_tensor * advantage).mean()
critic_loss = 0.5 * advantage.pow(2).mean()
ac_loss = actor_loss + critic_loss + 0.001 * entropy_term
self.optimizer.zero_grad()
ac_loss.backward()
self.optimizer.step()
def fit_sl(self, train_env: PassiveEnvironment, valid_env: PassiveEnvironment):
"""Train on a Supervised Learning (a.k.a. "passive") environment."""
observations: TaskIncrementalSLSetting.Observations = train_env.reset()
cuda_observations = observations.to(self.device)
assert isinstance(self.model, PnnClassifier)
assert self.hparams
self.set_optimizer()
best_val_loss = inf
best_epoch = 0
for epoch in range(self.hparams.max_epochs_per_task):
self.model.train()
print(f"Starting epoch {epoch}")
# Training loop:
with torch.set_grad_enabled(True), tqdm.tqdm(train_env) as train_pbar:
postfix: Dict[str, Any] = {}
train_pbar.set_description(f"Training Epoch {epoch}")
for i, batch in enumerate(train_pbar):
loss, metrics_dict = self.model.shared_step(
batch,
environment=train_env,
)
self.optimizer.zero_grad()
loss.backward()
self.optimizer.step()
postfix.update(metrics_dict)
train_pbar.set_postfix(postfix)
# Validation loop:
self.model.eval()
with torch.set_grad_enabled(False), tqdm.tqdm(valid_env) as val_pbar:
postfix = {}
val_pbar.set_description(f"Validation Epoch {epoch}")
epoch_val_loss = 0.0
for i, batch in enumerate(val_pbar):
batch_val_loss, metrics_dict = self.model.shared_step(
batch,
environment=valid_env,
)
epoch_val_loss += batch_val_loss
postfix.update(metrics_dict, val_loss=epoch_val_loss)
val_pbar.set_postfix(postfix)
@classmethod
def add_argparse_args(cls, parser: ArgumentParser) -> None:
parser.add_arguments(cls.HParams, dest="hparams", default=None)
@classmethod
def from_argparse_args(cls, args: Namespace) -> "PnnMethod":
hparams: PnnMethod.HParams = args.hparams
method = cls(hparams=hparams)
return method
def main_rl():
"""Applies the PnnMethod in a RL Setting."""
parser = ArgumentParser(description=__doc__, add_dest_to_option_strings=False)
Config.add_argparse_args(parser, dest="config")
PnnMethod.add_argparse_args(parser, dest="method")
setting = TaskIncrementalRLSetting(
dataset="cartpole",
nb_tasks=2,
train_task_schedule={
0: {"gravity": 10, "length": 0.3},
1000: {"gravity": 10, "length": 0.5},
},
)
args = parser.parse_args()
config: Config = Config.from_argparse_args(args, dest="config")
method: PnnMethod = PnnMethod.from_argparse_args(args, dest="method")
method.config = config
# 2. Creating the Method
# method = ImproveMethod()
# 3. Applying the method to the setting:
results = setting.apply(method, config=config)
print(results.summary())
print(f"objective: {results.objective}")
return results
def main_sl():
"""Applies the PnnMethod in a SL Setting."""
parser = ArgumentParser(description=__doc__, add_dest_to_option_strings=False)
# Add arguments for the Setting
# TODO: PNN is coded for the DomainIncrementalSetting, where the action space
# is the same for each task.
# parser.add_arguments(DomainIncrementalSetting, dest="setting")
parser.add_arguments(TaskIncrementalSLSetting, dest="setting")
# TaskIncrementalSLSetting.add_argparse_args(parser, dest="setting")
Config.add_argparse_args(parser, dest="config")
# Add arguments for the Method:
PnnMethod.add_argparse_args(parser, dest="method")
args = parser.parse_args()
# setting: TaskIncrementalSLSetting = args.setting
setting: TaskIncrementalSLSetting = TaskIncrementalSLSetting.from_argparse_args(
# setting: DomainIncrementalSetting = DomainIncrementalSetting.from_argparse_args(
args,
dest="setting",
)
config: Config = Config.from_argparse_args(args, dest="config")
method: PnnMethod = PnnMethod.from_argparse_args(args, dest="method")
method.config = config
results = setting.apply(method, config=config)
print(results.summary())
return results
if __name__ == "__main__":
# Run RL Setting
main_sl()
# Run SL Setting
# main_rl()
================================================
FILE: examples/advanced/procgen_example.py
================================================
""" Example of how to create an incremental RL Setting with custom environments for each task.
In this example, we create environments using [the `procgen` package](https://github.com/openai/procgen).
"""
import dataclasses
from dataclasses import dataclass, replace
from typing import Dict, List, NamedTuple, Optional, Type, TypeVar
import gym
import numpy as np
from sequoia.settings.rl import (
IncrementalRLSetting,
MultiTaskRLSetting,
TaskIncrementalRLSetting,
TraditionalRLSetting,
)
@dataclass
class ProcGenConfig:
"""Options for creating an environment from ProcGen.
The fields on this dataclass match the arguments that can be passed to `gym.make`, based on the
README of the procgen repo.
"""
# Name of environment, or comma-separate list of environment names to instantiate as each env
# in the VecEnv.
env_name: str = "coinrun-v0"
# The number of unique levels that can be generated. Set to 0 to use unlimited levels.
num_levels: int = 0
# The lowest seed that will be used to generated levels. 'start_level' and 'num_levels' fully
# specify the set of possible levels.
start_level: int = 0
# Paint player velocity info in the top left corner. Only supported by certain games.
paint_vel_info: bool = False
# Use randomly generated assets in place of human designed assets.
use_generated_assets: bool = False
# Set to True to use the debug build if building from source.
debug: bool = False
# Useful flag that's passed through to procgen envs. Use however you want during debugging.
debug_mode: int = 0
# Determines whether observations are centered on the agent or display the full level.
# Override at your own risk.
center_agent: bool = True
# When you reach the end of a level, the episode is ended and a new level is selected.
# If use_sequential_levels is set to True, reaching the end of a level does not end the episode,
# and the seed for the new level is derived from the current level seed.
# If you combine this with start_level= and num_levels=1, you can have a single
# linear series of levels similar to a gym-retro or ALE game.
use_sequential_levels: bool = False
# What variant of the levels to use, the options are "easy", "hard", "extreme", "memory",
# "exploration". All games support "easy" and "hard", while other options are game-specific.
# The default is "hard". Switching to "easy" will reduce the number of timesteps required to
# solve each game and is useful for testing or when working with limited compute resources.
distribution_mode: str = "hard"
# Normally games use human designed backgrounds, if this flag is set to False, games will use
# pure black backgrounds.
use_backgrounds: bool = True
# Some games select assets from multiple themes, if this flag is set to True, those games will
# only use a single theme.
restrict_themes: bool = False
# If set to True, games will use monochromatic rectangles instead of human designed assets.
# Best used with restrict_themes=True.
use_monochrome_assets: bool = False
def make_env(self) -> gym.Env:
"""Creates the environment using these options."""
env_id = f"procgen:procgen-{self.env_name}"
# Create the env by passing the arguments to gym.make, same as what is done in the README of
# the procgen repo.
procgen_env = gym.make(
id=env_id,
num_levels=self.num_levels,
start_level=self.start_level,
paint_vel_info=self.paint_vel_info,
use_generated_assets=self.use_generated_assets,
debug=self.debug,
center_agent=self.center_agent,
use_sequential_levels=self.use_sequential_levels,
distribution_mode=self.distribution_mode,
use_backgrounds=self.use_backgrounds,
restrict_themes=self.restrict_themes,
use_monochrome_assets=self.use_monochrome_assets,
)
# NOTE: The environments that are created with `gym.make("procgen:procgen-...")` are
# instances of the `gym3.interop:ToGymEnv` class, which has a slightly different API than
# the `gym.Env` class:
# (Taken From gym3/interop.py:)
# > - The `render()` method does nothing in "human" mode, in "rgb_array" mode the info dict
# is checked for a key named "rgb" and info["rgb"][0] is returned if present
# > - `seed()` and `close() are ignored since gym3 environments do not require these methods
#
# Therefore, for now, since in Sequoia we assume that the envs fit the gym.Env API, we have to
# "patch" these different methods up a bit. Here I suggest we do this using a wrapper
# (defined below)
wrapped_env = SequoiaProcGenAdapterWrapper(env=procgen_env)
return wrapped_env
class SequoiaProcGenAdapterWrapper(gym.Wrapper):
"""A wrapper around an environment from ProcGen to patch up the methods/properties that differ
from the gym API:
- The `seed` method doesn't ahve the right number of arguments.
- The `done` value is of type `np.bool_` instead of a plain bool.
- `render` returns None.
"""
def __init__(self, env):
super().__init__(env=env)
def step(self, action):
obs, rewards, done, info = self.env.step(action)
if isinstance(done, np.bool_):
done = bool(done)
return obs, rewards, done, info
def seed(self, seed: Optional[int] = None) -> List[int]:
# The procgen env apparently doesn't have (or need?) a `seed` method, but they don't
# implement it corrently, by not accepting a `seed` argument!
return []
def render(self, mode: str = "rgb_array"):
# note: rendering doesn't seem to be working: `self.env.render("rgb_array")` returns None.
array: Optional[np.ndarray] = self.env.render("rgb_array")
return array
# Type variable for a type of setting that supports passing envs for each task (all settings below
# `InrementalRLSetting`).
SettingType = TypeVar("SettingType", bound=IncrementalRLSetting)
available_envs = [
"bigfish",
"bossfight",
"caveflyer",
"chaser",
"climber",
"coinrun",
"dodgeball",
"fruitbot",
"heist",
"jumper",
"leaper",
"maze",
"miner",
"ninja",
"plunder",
"starpilot",
]
def make_procgen_setting(
env_name: str,
nb_tasks: int,
num_levels_per_task: int = 1,
overlapping_levels_between_tasks: int = 0,
common_options: ProcGenConfig = None,
setting_type: Type[SettingType] = TaskIncrementalRLSetting,
) -> SettingType:
"""Creates an RL Setting that uses environments from procgen.
Parameters
----------
env_name : str
Name of the environment from procgen to use. Should include the version tag.
For example: "coinrun-v0".
nb_tasks : int
Number of tasks in the setting.
num_levels_per_task : int, optional
Number of generated levels per task, by default 1
overlapping_levels_between_tasks : int, optional
Number of levels in common between neighbouring tasks. Needs to be less than
`num_levels_per_task`. Defaults to 0, in which case all tasks distinct levels.
common_options : ProcGenConfig, optional
Set of options common to the envs of all the tasks. This can be used to set the starting
level, for example. Defaults to None, in which case the default options from `ProcGenConfig`
are used.
setting_type : Type[SettingType], optional
The type of setting to create, by default TaskIncrementalRLSetting.
For example, say `nb_tasks`=5, `num_levels_per_task`=2, `overlapping_levels_between_tasks`=1:
task #1: levels: [0, 1]
task #2: levels: [1, 2]
task #3: levels: [2, 3]
task #4: levels: [3, 4]
task #5: levels: [4, 5]
For example, say `nb_tasks`=5, `num_levels_per_task`=5, `overlapping_levels_between_tasks`=2:
task #1: levels: [0, 1, 2, 3, 4]
task #2: levels: [3, 4, 5, 6, 7]
task #3: levels: [6, 7, 8, 9, 10]
task #4: levels: [9, 10, 11, 12, 13]
task #5: levels: [12, 13, 14, 15, 16]
NOTE: (lebrice): Maybe this (and other benchmark-creating functions) could be classmethods on
the settings, instead of passing the setting_type as a parameter!
Returns
-------
SettingType
A Setting of type `setting_type` (`TaskIncrementalRLSetting`) by default, where each task
uses environments from ProcGen.
"""
assert overlapping_levels_between_tasks < num_levels_per_task
# Create the options common to every task.
if common_options is None:
common_options = ProcGenConfig(env_name=env_name)
else:
common_options = dataclasses.replace(common_options, env_name=env_name)
# Get the starting levels for each task, as shown in the docstring above.
offset = num_levels_per_task - overlapping_levels_between_tasks
first_task_start_level = common_options.start_level
last_task_start_level = common_options.start_level + offset * nb_tasks
start_levels: List[int] = list(range(first_task_start_level, last_task_start_level, offset))
# Create the configurations that will be used to create the train/valid/test environments for
# each task by starting from the common options, and overwriting the values of `start_level`.
train_env_configs: List[ProcGenConfig] = [
replace(common_options, start_level=start_levels[task_id], num_levels=num_levels_per_task)
for task_id in range(nb_tasks)
]
# NOTE: For now the validation and testing environment are the same as those for training.
# This could easily be different though!
# For example:
# - the test environments could have a background while the train/valid envs don't!
# --> This could be super interesting to researchers in Out-of-Distribution RL!
valid_env_configs: List[ProcGenConfig] = train_env_configs.copy()
test_env_configs: List[ProcGenConfig] = train_env_configs.copy()
# Here we pass a list of functions to be called to create each env. This can be a bit better
# than passing the envs themselves, as it saves some memory, and also because we'll be able to
# close the envs after each task (since we can always re-create them).
setting = setting_type(
dataset=None,
train_envs=[config.make_env for config in train_env_configs],
val_envs=[config.make_env for config in valid_env_configs],
test_envs=[config.make_env for config in test_env_configs],
)
return setting
from sequoia.common.config import Config
from sequoia.methods.random_baseline import RandomBaselineMethod
def main_simple():
# Simple example: Create a Task-Incremental RL setting using procgen envs.
setting = make_procgen_setting(env_name="coinrun-v0", nb_tasks=5)
method = RandomBaselineMethod()
# NOTE: The `render` option isn't yet working (see above)
results = setting.apply(method, config=Config(debug=True, render=False))
print(results.summary())
def main_using_other_setting():
# Example where we change what kind of setting we want to create.
class Key(NamedTuple):
stationary_context: bool
task_labels_at_test_time: bool
# This is here just to give an idea of the differences between these settings.
available_settings: Dict[Key, Type[IncrementalRLSetting]] = {
Key(task_labels_at_test_time=False, stationary_context=False): IncrementalRLSetting,
Key(task_labels_at_test_time=True, stationary_context=False): TaskIncrementalRLSetting,
Key(task_labels_at_test_time=False, stationary_context=True): TraditionalRLSetting,
Key(task_labels_at_test_time=True, stationary_context=True): MultiTaskRLSetting,
}
# You can choose whichever setting you want, but for example:
setting_type = available_settings[Key(task_labels_at_test_time=True, stationary_context=False)]
# Create the Method.
method = RandomBaselineMethod()
setting = make_procgen_setting(env_name="coinrun-v0", nb_tasks=5, setting_type=setting_type)
results = setting.apply(method, config=Config(debug=True, render=False))
print(results.summary())
if __name__ == "__main__":
main_simple()
================================================
FILE: examples/basic/__init__.py
================================================
================================================
FILE: examples/basic/base_method_demo.py
================================================
""" Example showing how the BaseMethod can be applied to get results in both
RL and SL settings.
"""
from simple_parsing import ArgumentParser
from sequoia.common import Config
from sequoia.methods import BaseMethod
from sequoia.settings import Setting, TaskIncrementalRLSetting, TaskIncrementalSLSetting
def baseline_demo_simple():
config = Config()
method = BaseMethod(config=config, max_epochs=1)
## Create *any* Setting from the tree, for example:
# Supervised Learning Setting:
setting = TaskIncrementalSLSetting(
dataset="cifar10",
nb_tasks=2,
)
## Reinforcement Learning Setting:
# setting = TaskIncrementalRLSetting(
# dataset="cartpole",
# train_max_steps=4000,
# nb_tasks=2,
# )
results = setting.apply(method, config=config)
print(results.summary())
return results
def baseline_demo_command_line():
parser = ArgumentParser(__doc__, add_dest_to_option_strings=False)
# Supervised Learning Setting:
parser.add_arguments(TaskIncrementalSLSetting, dest="setting")
# Reinforcement Learning Setting:
# parser.add_arguments(TaskIncrementalRLSetting, dest="setting")
parser.add_arguments(Config, dest="config")
BaseMethod.add_argparse_args(parser, dest="method")
args = parser.parse_args()
setting: Setting = args.setting
config: Config = args.config
method: BaseMethod = BaseMethod.from_argparse_args(args, dest="method")
results = setting.apply(method, config=config)
print(results.summary())
return results
if __name__ == "__main__":
### Option 1: Create the BaseMethod and Settings manually.
baseline_demo_simple()
### Option 2: Create the BaseMethod and Settings from the command-line.
# baseline_demo_command_line()
================================================
FILE: examples/basic/pl_example.py
================================================
"""A simple example for creating a Method using PyTorch-Lightning.
Run this as:
```console
$> python examples/basic/pl_examples.py
```
"""
from dataclasses import asdict, dataclass
from typing import Optional, Tuple
import torch
from gym import spaces
from pytorch_lightning import LightningModule, Trainer
from torch import Tensor, nn
from torch.optim import Adam
from sequoia.common.config import Config
from sequoia.common.spaces import Image
from sequoia.methods import Method
from sequoia.settings.assumptions.task_type import ClassificationActions
from sequoia.settings.sl.continual import (
Actions,
ContinualSLSetting,
Observations,
ObservationSpace,
Rewards,
)
class Model(LightningModule):
"""Example Pytorch Lightning model used for continual image classification.
Used by the `ExampleMethod` below.
"""
@dataclass
class HParams:
"""Hyper-parameters of our model.
NOTE: dataclasses are totally optional. This is just much nicer than dicts or
ugly namespaces.
"""
# Learning rate.
learning_rate: float = 1e-3
# Maximum number of training epochs per task.
max_epochs_per_task: int = 1
def __init__(
self,
input_space: ObservationSpace,
output_space: spaces.Discrete,
hparams: HParams = None,
):
super().__init__()
hparams = hparams or self.HParams()
# NOTE: `input_space` is a subclass of `gym.spaces.Dict`. It contains (at least)
# the `x` key, but can also contain other things, for example the task labels.
# Doing things this way makes sure that this Model can also be applied to any
# more specific Setting in the future (any setting with more information given)!
image_space: Image = input_space.x
# NOTE: `Image` is just a subclass of `gym.spaces.Box` with a few extra properties
self.input_dims = image_space.shape
# NOTE: Can't set the `hparams` attribute in PL, so use hp instead:
self.hp = hparams
self.save_hyperparameters({"hparams": asdict(hparams)})
in_channels: int = image_space.channels
num_classes: int = output_space.n
# Imitates the SimpleConvNet from sequoia.common.models.simple_convnet
self.features = nn.Sequential(
nn.Conv2d(in_channels, 6, kernel_size=5, stride=1, padding=1, bias=False),
nn.BatchNorm2d(6),
nn.ReLU(inplace=True),
nn.Conv2d(6, 16, kernel_size=5, stride=1, padding=1, bias=False),
nn.BatchNorm2d(16),
nn.ReLU(inplace=True),
nn.Conv2d(16, 16, kernel_size=3, stride=1, padding=1, bias=False),
nn.BatchNorm2d(16),
nn.AdaptiveAvgPool2d(output_size=(8, 8)), # [16, 8, 8]
# [32, 6, 6]
nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=0, bias=False),
nn.BatchNorm2d(32),
nn.ReLU(inplace=True),
# [32, 4, 4]
nn.Conv2d(32, 32, kernel_size=3, stride=1, padding=0, bias=False),
nn.BatchNorm2d(32),
nn.Flatten(),
)
# Quick tip: In this case we have a fixed hidden size (thanks to the Adaptive
# pooling layer above), but you could also use the cool new `nn.LazyLinear` when
# you don't know the hidden size in advance!
self.fc = nn.Sequential(
nn.Flatten(),
# nn.LazyLinear(out_features=120),
nn.Linear(512, 120),
nn.ReLU(),
nn.Linear(120, 84),
nn.ReLU(),
nn.Linear(84, num_classes),
)
self.loss = nn.CrossEntropyLoss()
self.trainer: Trainer
def forward(self, observations: ContinualSLSetting.Observations) -> Tensor:
"""Returns the logits for the given observation.
Parameters
----------
observations : ContinualSLSetting.Observations
dataclass with (at least) the following attributes:
- "x" (Tensor): the samples (images)
- "task_labels" (Optional[Tensor]): Task labels, when applicable.
Returns
-------
Tensor
Classification logits for each class.
"""
x: Tensor = observations.x
# Task labels for each sample. We don't use them in this example.
t: Optional[Tensor] = observations.task_labels
h_x = self.features(x)
logits = self.fc(h_x)
return logits
def training_step(
self, batch: Tuple[Observations, Optional[Rewards]], batch_idx: int
) -> Tensor:
return self.shared_step(batch=batch, batch_idx=batch_idx, stage="train")
def validation_step(
self, batch: Tuple[Observations, Optional[Rewards]], batch_idx: int
) -> Tensor:
return self.shared_step(batch=batch, batch_idx=batch_idx, stage="val")
def test_step(self, batch: Tuple[Observations, Optional[Rewards]], batch_idx: int) -> Tensor:
return self.shared_step(batch=batch, batch_idx=batch_idx, stage="test")
def shared_step(
self,
batch: Tuple[Observations, Optional[Rewards]],
batch_idx: int,
stage: str,
) -> Tensor:
observations, rewards = batch
logits = self(observations)
y_pred = logits.argmax(-1)
actions = ClassificationActions(y_pred=y_pred, logits=logits)
if rewards is None:
environment: ContinualSLSetting.Environment
# The rewards (image labels) might not be given at the same time as the
# observations (images), for example during testing, or if we're being
# evaluated based on our online performance during training!
#
# When that is the case, we need to send the "action" (predictions) to the
# environment using `send()` to get the rewards.
actions = y_pred
# Get the current environment / dataloader from the Trainer.
environment = self.trainer.request_dataloader(self, stage)
rewards = environment.send(actions)
y: Tensor = rewards.y
accuracy = (y_pred == y).int().sum() / len(y)
self.log(f"{stage}/accuracy", accuracy, prog_bar=True)
loss = self.loss(logits, y)
return loss
def configure_optimizers(self):
return Adam(self.parameters(), lr=self.hp.learning_rate)
class ExampleMethod(Method, target_setting=ContinualSLSetting):
"""Example method for solving Continual SL Settings with PyTorch-Lightning
This ExampleMethod declares that it can be applied to any `Setting` that inherits
from this `ContinualSLSetting`.
NOTE: Settings in Sequoia are a subclass of `LightningDataModule`, which create
the training/validation/testing `Environment`s that methods will interact with.
Each setting defines an `apply` method, which serves as a "main loop", and describes
when and on what data to train the Method, and how it will be evaluated, according
to the usual methodology for that setting in the litterature.
Importantly, settings do NOT describe **how** the method is to be trained, that is
entirely up to the Method!
"""
def __init__(self, hparams: Model.HParams = None):
super().__init__()
self.hparams = hparams or Model.HParams()
self.current_task: Optional[int] = None
# NOTE: These get assigned in `configure` below:
self.model: Model
self.trainer: Trainer
def configure(self, setting: ContinualSLSetting):
"""Called by the Setting so the method can configure itself before training.
This could be used to, for example, create a model, since the observation space
(which describes the types and shapes of the data) and the `nb_tasks` can be
read from the Setting.
Parameters
----------
setting : ContinualSLSetting
The research setting that this `Method` will be applied to.
"""
if not setting.known_task_boundaries_at_train_time:
# If we're being applied on a Setting where we don't have access to task
# boundaries, then there is only one training environment that transitions
# between all tasks and then closes itself.
# We therefore limit the number of epochs per task to 1 in that case.
self.hparams.max_epochs_per_task = 1
self.model = Model(
input_space=setting.observation_space,
output_space=setting.action_space,
hparams=self.hparams,
)
def fit(
self,
train_env: ContinualSLSetting.Environment,
valid_env: ContinualSLSetting.Environment,
):
"""Called by the Setting to allow the method to train.
The passed environments inherit from `DataLoader` as well as from `gym.Env`.
They produce `Observations` (which have an `x` Tensor field, for instance), and
return `Rewards` when they receive `Actions`.
This interface is the same between RL and SL, making it easy to create methods
that can adapt to both domains.
Parameters
----------
train_env : ContinualSLSetting.Environment
The Training environment. In the case of a `ContinualSLSetting`, this
environment will smoothly transition between the different tasks.
NOTE: Regardless of what exact type of `Setting` this method is being
applied to, this environment will always be a subclass of
`ContinualSLSetting.Environment`, and the `Observations`, `Actions`,
`Rewards` produced by this environment will also always follow this
hierarchy.
This is important to note, since it makes it possible to create a Method
that also works in other settings which add extra information in the
observations (e.g. task labels)!
valid_env : ContinualSLSetting.Environment
The Validation environment.
"""
# NOTE: Currently have to 'reset' the Trainer for each call to `fit`.
self.trainer = Trainer(
gpus=torch.cuda.device_count(),
max_epochs=self.hparams.max_epochs_per_task,
)
self.trainer.fit(self.model, train_dataloader=train_env, val_dataloaders=valid_env)
def test(self, test_env: ContinualSLSetting.Environment):
"""Called to let the Method handle the test loop by itself.
The `test_env` will only give back rewards (y) once an action (y_pred) is sent
to it via its `send` method.
This test environment keeps track of some metrics of interest for its `Setting`
(accuracy in this case) and reports them back to the `Setting` once the test
environment has been exhausted.
NOTE: The test environment will close itself when done, signifying the end
of the test period. At that point, `test_env.is_closed()` will return `True`.
"""
# BUG: There is currently a bug with the test loop with Trainer: on_task_switch
# doesn't get called properly.
raise NotImplementedError
# Use ckpt_path=None to use the current weights, rather than the "best" ones.
self.trainer.test(self.model, ckpt_path=None, test_dataloaders=test_env)
def get_actions(self, observations: Observations, action_space: spaces.MultiDiscrete):
"""Called by the Setting to query for individual predictions.
You currently have to implement this, but if `test` is implemented, it will be
used instead. Sorry if this isn't super clear.
"""
self.model.eval()
with torch.no_grad():
logits = self.model(observations.to(self.model.device))
y_pred = logits.argmax(-1)
return Actions(y_pred=y_pred)
def on_task_switch(self, task_id: Optional[int]) -> None:
"""Can be called by the Setting when a task boundary is reached.
This will be called if `setting.known_task_boundaries_at_[train/test]_time` is
True, depending on if this is called during training or during testing.
If `setting.task_labels_at_[train/test]_time` is True, then `task_id` will be
the identifyer (index) of the next task. If the value is False, then `task_id`
will be None.
"""
if task_id != self.current_task:
phase = "training" if self.training else "testing"
print(f"Switching tasks during {phase}: {self.current_task} -> {task_id}")
self.current_task = task_id
def main():
"""Runs the example: applies the method on a Continual Supervised Learning Setting."""
# You could use any of the settings in SL, since this example methods targets the
# most general Continual SL Setting in Sequoia: `ContinualSLSetting`:
# from sequoia.settings.sl import ClassIncrementalSetting
# Create the Setting:
# NOTE: Since our model above uses an adaptive pooling layer, it should work on any
# dataset!
setting = ContinualSLSetting(dataset="mnist", monitor_training_performance=True)
# Create the Method:
method = ExampleMethod()
# Create a config for the experiment (just so we can set a few options for this
# example)
config = Config(debug=True, log_dir="results/pl_example")
# Launch the experiment: trains and tests the method according to the chosen
# setting and returns a Results object.
results = setting.apply(method, config=config)
# Print the results, and show some plots!
print(results.summary())
for figure_name, figure in results.make_plots().items():
print("Figure:", figure_name)
figure.show()
# figure.waitforbuttonpress(10)
if __name__ == "__main__":
main()
================================================
FILE: examples/basic/pl_example_packnet.py
================================================
from dataclasses import dataclass
from typing import Optional
import torch
from simple_parsing import mutable_field
from examples.basic.pl_example import ExampleMethod, Model
from sequoia.common import Config
from sequoia.methods import BaseModel
from sequoia.methods.packnet_method import PackNet
from sequoia.methods.trainer import Trainer, TrainerConfig
from sequoia.settings.sl import ContinualSLSetting, TaskIncrementalSLSetting
class ExamplePackNetMethod(ExampleMethod, target_setting=TaskIncrementalSLSetting):
def __init__(self, hparams: Model.HParams = None, packnet_hparams: PackNet.HParams = None):
super().__init__(hparams=hparams)
self.packnet_hparams = packnet_hparams or PackNet.HParams()
# TODO: Modify `hparams.max_epochs_per_task` to at least be enough so that
# PackNet will work.
min_epochs = self.packnet_hparams.train_epochs + self.packnet_hparams.fine_tune_epochs
if self.hparams.max_epochs_per_task < min_epochs:
self.hparams.max_epochs_per_task = min_epochs
self.p_net: PackNet
def configure(self, setting: TaskIncrementalSLSetting):
super().configure(setting)
# TODO: Why does PackNet need access to the number of tasks again?
self.p_net = PackNet(
n_tasks=setting.nb_tasks,
hparams=self.packnet_hparams,
)
# TODO: This could be set as default values in the PackNet constructor.
self.p_net.current_task = -1
self.p_net.config_instructions()
def fit(
self,
train_env: TaskIncrementalSLSetting.Environment,
valid_env: TaskIncrementalSLSetting.Environment,
):
# NOTE: PackNet is not compatible with EarlyStopping, thus we set max_epochs==min_epochs
self.trainer = Trainer(
gpus=torch.cuda.device_count(),
min_epochs=self.p_net.total_epochs(),
max_epochs=self.p_net.total_epochs(),
callbacks=[self.p_net],
)
self.trainer.fit(self.model, train_dataloader=train_env, val_dataloaders=valid_env)
def on_task_switch(self, task_id: Optional[int]):
"""Called when switching between tasks.
Args:
task_id (int, optional): the id of the new task. When None, we are
basically being informed that there is a task boundary, but without
knowing what task we're switching to.
"""
super().on_task_switch(task_id=task_id)
if task_id is not None and len(self.p_net.masks) > task_id:
self.p_net.load_final_state(model=self.model)
self.p_net.apply_eval_mask(task_idx=task_id, model=self.model)
self.p_net.current_task = task_id
def main():
"""Runs the example: applies the method on a Continual Supervised Learning Setting."""
# You could use any of the settings in SL, since this example methods targets the
# most general Continual SL Setting in Sequoia: `ContinualSLSetting`:
# from sequoia.settings.sl import ClassIncrementalSetting
# Create the Setting:
# NOTE: Since our model above uses an adaptive pooling layer, it should work on any
# dataset!
setting = TaskIncrementalSLSetting(
dataset="mnist", nb_tasks=5, monitor_training_performance=True
)
# Create the Method:
method = ExamplePackNetMethod()
# Create a config for the experiment (just so we can set a few options for this
# example)
config = Config(debug=False, log_dir="results/pl_example_packnet")
# Launch the experiment: trains and tests the method according to the chosen
# setting and returns a Results object.
results = setting.apply(method, config=config)
# Print the results, and show some plots!
print(results.summary())
for figure_name, figure in results.make_plots().items():
print("Figure:", figure_name)
figure.show()
# figure.waitforbuttonpress(10)
if __name__ == "__main__":
main()
================================================
FILE: examples/basic/pl_example_test.py
================================================
""" Unit-tests for the PyTorch-Lightning Example.
Can be run like so:
```console
$ pytest examples/basic/pl_example_test.py
```
"""
from typing import Type
import pytest
from examples.basic.pl_example import ExampleMethod, Model
from sequoia.common.config import Config
from sequoia.common.metrics import ClassificationMetrics
from sequoia.methods import Method
from sequoia.methods.method_test import MethodTests, config, session_config # type: ignore
from sequoia.settings import Results
from sequoia.settings.sl import ContinualSLSetting, IncrementalSLSetting
class TestPLExample(MethodTests):
"""Tests for this PL Example.
This `MethodTests` base class generates a `test_debug` test for us.
"""
Method: Type[Method] = ExampleMethod
@pytest.fixture()
def method(self, config: Config):
"""Required fixture, which creates a Method that can be used for quick tests."""
return ExampleMethod(hparams=Model.HParams(max_epochs_per_task=1))
def validate_results(
self, setting: ContinualSLSetting, method: ExampleMethod, results: Results
):
"""This gets called by `test_debug` to check that the results make sense for
the given setting and method.
"""
# NOTE: This particular example isn't that great: We just check that the average
# final test accuracy and the average online accuracy are both non-zero.
# It would be best to do some kind of branching depending on what type of
# Setting was used, since each setting can produce different types of results.
print(results.summary())
average_metrics: ClassificationMetrics
online_metrics: ClassificationMetrics
assert setting.monitor_training_performance
todo = 0.0
if isinstance(setting, IncrementalSLSetting):
# The results in this case include the entire nb_tasks x nb_tasks transfer
# matrix.
assert isinstance(results, IncrementalSLSetting.Results)
average_metrics = results.average_final_performance
online_metrics = results.average_online_performance
if setting.stationary_context:
# Example: Should expect better performance if the data is i.i.d!
assert average_metrics.accuracy > todo
else:
assert average_metrics.accuracy > todo
if setting.monitor_training_performance:
assert online_metrics.accuracy > todo
else:
# In this case, there aren't clear 'tasks' to speak of, so the results are
# just aggregated metrics for each test batch:
assert isinstance(results, ContinualSLSetting.Results)
average_metrics = results.average_metrics
online_metrics = results.online_performance_metrics
assert average_metrics.accuracy > todo
assert online_metrics.accuracy > todo
================================================
FILE: examples/basic/quick_demo.ipynb
================================================
{
"metadata": {
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.5-final"
},
"orig_nbformat": 2,
"kernelspec": {
"name": "python38364bitpy38conda80a8f432976e4e99926307fddceb6e0b",
"display_name": "Python 3.8.3 64-bit ('py38': conda)",
"language": "python"
}
},
"nbformat": 4,
"nbformat_minor": 2,
"cells": [
{
"source": [
"# Quick Demo (Notebook version)\n",
"\n",
"(I hate notebooks.)\n",
"\n",
"In this demo, we will create a simple method and apply it to various Continual Learning settings.\n",
"\n",
"For the purposes of this demo, we will restrict ourselves to classification problems on the mnist and fashion-mnist datasets."
],
"cell_type": "markdown",
"metadata": {}
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"# Imports:\n",
"import sys\n",
"from dataclasses import dataclass\n",
"from typing import Dict, Optional, Tuple, Type\n",
"\n",
"import gym\n",
"import torch\n",
"from gym import spaces\n",
"from torch import Tensor, nn\n",
"from simple_parsing import ArgumentParser\n",
"\n",
"sys.path.extend([\".\", \"..\"])\n",
"from sequoia.settings import Method, Setting\n",
"from sequoia.settings.sl.class_incremental import ClassIncrementalSetting, DomainIncrementalSetting\n",
"from sequoia.settings.sl.class_incremental.objects import (\n",
" Actions,\n",
" Environment,\n",
" Observations,\n",
" PassiveEnvironment,\n",
" Results,\n",
" Rewards,\n",
")"
]
},
{
"source": [
"# Basic Model:"
],
"cell_type": "markdown",
"metadata": {}
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"\n",
"class MyModel(nn.Module):\n",
" \"\"\" Simple classification model without any CL-related mechanism.\n",
"\n",
" To keep things simple, this demo model is designed for supervised\n",
" (classification) settings where observations have shape [3, 28, 28] (ie the\n",
" MNIST variants: Mnist, FashionMnist, RotatedMnist, EMnist, etc.)\n",
" \"\"\"\n",
" def __init__(self,\n",
" observation_space: gym.Space,\n",
" action_space: gym.Space,\n",
" reward_space: gym.Space):\n",
" super().__init__()\n",
" image_shape = observation_space["x"].shape\n",
" assert image_shape == (3, 28, 28)\n",
" assert isinstance(action_space, spaces.Discrete)\n",
" assert action_space == reward_space\n",
" n_classes = action_space.n\n",
" image_channels = image_shape[0]\n",
"\n",
" self.encoder = nn.Sequential(\n",
" nn.Conv2d(image_channels, 6, 5),\n",
" nn.ReLU(),\n",
" nn.MaxPool2d(2),\n",
" nn.Conv2d(6, 16, 5),\n",
" nn.ReLU(),\n",
" nn.MaxPool2d(2),\n",
" )\n",
" self.classifier = nn.Sequential(\n",
" nn.Flatten(),\n",
" nn.Linear(256, 120),\n",
" nn.ReLU(),\n",
" nn.Linear(120, 84),\n",
" nn.ReLU(),\n",
" nn.Linear(84, n_classes),\n",
" )\n",
" self.loss = nn.CrossEntropyLoss()\n",
"\n",
" def forward(self, observations: Observations) -> Tensor:\n",
" # NOTE: here we don't make use of the task labels.\n",
" x = observations.x\n",
" task_labels = observations.task_labels\n",
" features = self.encoder(x)\n",
" logits = self.classifier(features)\n",
" return logits\n",
"\n",
" def shared_step(\n",
" self, batch: Tuple[Observations, Optional[Rewards]], environment: Environment\n",
" ) -> Tuple[Tensor, Dict]:\n",
" \"\"\"Shared step used for both training and validation.\n",
" \n",
" Parameters\n",
" ----------\n",
" batch : Tuple[Observations, Optional[Rewards]]\n",
" Batch containing Observations, and optional Rewards. When the Rewards are\n",
" None, it means that we'll need to provide the Environment with actions\n",
" before we can get the Rewards (e.g. image labels) back.\n",
" \n",
" This happens for example when being applied in a Setting which cares about\n",
" sample efficiency or training performance, for example.\n",
" \n",
" environment : Environment\n",
" The environment we're currently interacting with. Used to provide the\n",
" rewards when they aren't already part of the batch (as mentioned above).\n",
"\n",
" Returns\n",
" -------\n",
" Tuple[Tensor, Dict]\n",
" The Loss tensor, and a dict of metrics to be logged.\n",
" \"\"\"\n",
" # Since we're training on a Passive environment, we will get both observations\n",
" # and rewards, unless we're being evaluated based on our training performance,\n",
" # in which case we will need to send actions to the environments before we can\n",
" # get the corresponding rewards (image labels).\n",
" observations: Observations = batch[0]\n",
" rewards: Optional[Rewards] = batch[1]\n",
" # Get the predictions:\n",
" logits = self(observations)\n",
" y_pred = logits.argmax(-1)\n",
"\n",
" if rewards is None:\n",
" # If the rewards in the batch is None, it means we're expected to give\n",
" # actions before we can get rewards back from the environment.\n",
" rewards = environment.send(Actions(y_pred))\n",
"\n",
" assert rewards is not None\n",
" image_labels = rewards.y\n",
"\n",
" loss = self.loss(logits, image_labels)\n",
"\n",
" accuracy = (y_pred == image_labels).sum().float() / len(image_labels)\n",
" metrics_dict = {\"accuracy\": accuracy.item()}\n",
" return loss, metrics_dict\n"
]
},
{
"source": [
"## Creating our Method\n",
"\n",
"Here by subclassing 'MethodABC' and passing in a target_setting, we indicate that we are creating a new method, and that it will work on any Setting that is an instance of ClassIncrementalSetting or one of its subclasses. "
],
"cell_type": "markdown",
"metadata": {}
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"\n",
"class DemoMethod(Method, target_setting=ClassIncrementalSetting):\n",
" \"\"\" Minimal example of a Method targetting the Class-Incremental CL setting.\n",
" \n",
" For a quick intro to dataclasses, see examples/dataclasses_example.py \n",
" \"\"\"\n",
"\n",
" @dataclass\n",
" class HParams:\n",
" \"\"\" Hyper-parameters of the demo model. \"\"\"\n",
" # Learning rate of the optimizer.\n",
" learning_rate: float = 0.001\n",
" \n",
" def __init__(self, hparams: HParams):\n",
" self.hparams: DemoMethod.HParams = hparams\n",
" self.max_epochs: int = 1\n",
" self.early_stop_patience: int = 2\n",
"\n",
" # We will create those when `configure` will be called, before training.\n",
" self.model: MyModel\n",
" self.optimizer: torch.optim.Optimizer\n",
"\n",
" def configure(self, setting: ClassIncrementalSetting):\n",
" \"\"\" Called before the method is applied on a setting (before training). \n",
"\n",
" You can use this to instantiate your model, for instance, since this is\n",
" where you get access to the observation & action spaces.\n",
" \"\"\"\n",
" self.model = MyModel(\n",
" observation_space=setting.observation_space,\n",
" action_space=setting.action_space,\n",
" reward_space=setting.reward_space,\n",
" )\n",
" self.optimizer = torch.optim.Adam(self.model.parameters(), lr=self.hparams.learning_rate)\n",
"\n",
" def fit(self, train_env: PassiveEnvironment, valid_env: PassiveEnvironment):\n",
" # configure() will have been called by the setting before we get here.\n",
" import tqdm\n",
" from numpy import inf\n",
" best_val_loss = inf\n",
" best_epoch = 0\n",
" for epoch in range(self.max_epochs):\n",
" self.model.train()\n",
" # Training loop:\n",
" with tqdm.tqdm(train_env) as train_pbar:\n",
" train_pbar.set_description(f\"Training Epoch {epoch}\")\n",
" for i, batch in enumerate(train_pbar):\n",
" loss, metrics_dict = self.model.shared_step(batch, environment=train_env)\n",
" self.optimizer.zero_grad()\n",
" loss.backward()\n",
" self.optimizer.step()\n",
" train_pbar.set_postfix(**metrics_dict)\n",
"\n",
" # Validation loop:\n",
" self.model.eval()\n",
" torch.set_grad_enabled(False)\n",
" with tqdm.tqdm(valid_env) as val_pbar:\n",
" val_pbar.set_description(f\"Validation Epoch {epoch}\")\n",
" epoch_val_loss = 0.\n",
"\n",
" for i, batch in enumerate(val_pbar):\n",
" batch_val_loss, metrics_dict = self.model.shared_step(batch, environment=valid_env)\n",
" epoch_val_loss += batch_val_loss\n",
" val_pbar.set_postfix(**metrics_dict, val_loss=epoch_val_loss)\n",
" torch.set_grad_enabled(True)\n",
"\n",
" if epoch_val_loss < best_val_loss:\n",
" best_val_loss = valid_env\n",
" best_epoch = epoch\n",
" if epoch - best_epoch > self.early_stop_patience:\n",
" print(f\"Early stopping at epoch {i}.\")\n",
" break\n",
"\n",
" def get_actions(self, observations: Observations, action_space: gym.Space) -> Actions:\n",
" \"\"\" Get a batch of predictions (aka actions) for these observations. \"\"\" \n",
" with torch.no_grad():\n",
" logits = self.model(observations)\n",
" # Get the predicted classes\n",
" y_pred = logits.argmax(dim=-1)\n",
" return self.target_setting.Actions(y_pred)\n",
" \n",
" @classmethod\n",
" def add_argparse_args(cls, parser: ArgumentParser, dest: str = \"\"):\n",
" \"\"\"Adds command-line arguments for this Method to an argument parser.\"\"\"\n",
" parser.add_arguments(cls.HParams, \"hparams\")\n",
"\n",
" @classmethod\n",
" def from_argparse_args(cls, args, dest: str = \"\"):\n",
" \"\"\"Creates an instance of this Method from the parsed arguments.\"\"\"\n",
" hparams: cls.HParams = args.hparams\n",
" return cls(hparams=hparams)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"2021-02-25:17:29:01,958 INFO [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:184] Starting training on task 0.\n",
"2021-02-25:17:29:01,959 WARNING [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:148] On a task boundary, but since your method doesn't have an `on_task_switch` method, it won't know about it! \n",
"2021-02-25:17:29:02,13 INFO [/home/fabrice/repos/Sequoia/sequoia/settings/passive/cl/class_incremental_setting.py:433] Number of train tasks: 5.\n",
"2021-02-25:17:29:02,14 INFO [/home/fabrice/repos/Sequoia/sequoia/settings/passive/cl/class_incremental_setting.py:434] Number of test tasks: 5.\n",
"Training Epoch 0: 100%|██████████| 300/300 [00:04<00:00, 64.17it/s, accuracy=1]\n",
"Validation Epoch 0: 100%|██████████| 75/75 [00:00<00:00, 155.53it/s, accuracy=1, val_loss=tensor(3.1905)]\n",
"2021-02-25:17:29:07,205 INFO [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:212] Finished Training on task 0.\n",
"2021-02-25:17:29:07,246 INFO [/home/fabrice/repos/Sequoia/sequoia/settings/passive/cl/class_incremental_setting.py:433] Number of train tasks: 5.\n",
"2021-02-25:17:29:07,246 INFO [/home/fabrice/repos/Sequoia/sequoia/settings/passive/cl/class_incremental_setting.py:434] Number of test tasks: 5.\n",
"2021-02-25:17:29:07,274 INFO [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:347] Will query the method for actions at each step, since it doesn't implement a `test` method.\n",
"Test: 0%| | 0/312 [00:00, ?it/s]2021-02-25:17:29:07,361 WARNING [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:305] On a task boundary, but since your method doesn't have an `on_task_switch` method, it won't know about it! \n",
"2021-02-25:17:29:07,365 WARNING [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:305] On a task boundary, but since your method doesn't have an `on_task_switch` method, it won't know about it! \n",
"2021-02-25:17:29:07,373 WARNING [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:305] On a task boundary, but since your method doesn't have an `on_task_switch` method, it won't know about it! \n",
"2021-02-25:17:29:07,382 WARNING [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:305] On a task boundary, but since your method doesn't have an `on_task_switch` method, it won't know about it! \n",
"2021-02-25:17:29:07,394 WARNING [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:305] On a task boundary, but since your method doesn't have an `on_task_switch` method, it won't know about it! \n",
"Test: 100%|██████████| 312/312 [00:01<00:00, 232.18it/s]\n",
"2021-02-25:17:29:08,713 INFO [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:217] Resulting objective of Test Loop: 0.626102\n",
"2021-02-25:17:29:08,713 INFO [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:184] Starting training on task 1.\n",
"2021-02-25:17:29:08,714 WARNING [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:148] On a task boundary, but since your method doesn't have an `on_task_switch` method, it won't know about it! \n",
"Training Epoch 0: 100%|██████████| 300/300 [00:03<00:00, 79.71it/s, accuracy=0.969]\n",
"Validation Epoch 0: 100%|██████████| 75/75 [00:00<00:00, 170.55it/s, accuracy=0.969, val_loss=tensor(5.7692)]\n",
"2021-02-25:17:29:12,923 INFO [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:212] Finished Training on task 1.\n",
"2021-02-25:17:29:12,926 INFO [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:347] Will query the method for actions at each step, since it doesn't implement a `test` method.\n",
"Test: 0%| | 0/312 [00:00, ?it/s]2021-02-25:17:29:13,14 WARNING [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:305] On a task boundary, but since your method doesn't have an `on_task_switch` method, it won't know about it! \n",
"2021-02-25:17:29:13,19 WARNING [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:305] On a task boundary, but since your method doesn't have an `on_task_switch` method, it won't know about it! \n",
"2021-02-25:17:29:13,27 WARNING [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:305] On a task boundary, but since your method doesn't have an `on_task_switch` method, it won't know about it! \n",
"2021-02-25:17:29:13,36 WARNING [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:305] On a task boundary, but since your method doesn't have an `on_task_switch` method, it won't know about it! \n",
"2021-02-25:17:29:13,46 WARNING [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:305] On a task boundary, but since your method doesn't have an `on_task_switch` method, it won't know about it! \n",
"Test: 100%|██████████| 312/312 [00:01<00:00, 248.27it/s]\n",
"2021-02-25:17:29:14,276 INFO [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:217] Resulting objective of Test Loop: 0.568409\n",
"2021-02-25:17:29:14,277 INFO [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:184] Starting training on task 2.\n",
"2021-02-25:17:29:14,278 WARNING [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:148] On a task boundary, but since your method doesn't have an `on_task_switch` method, it won't know about it! \n",
"Training Epoch 0: 100%|██████████| 300/300 [00:03<00:00, 86.51it/s, accuracy=1]\n",
"Validation Epoch 0: 100%|██████████| 75/75 [00:00<00:00, 152.03it/s, accuracy=1, val_loss=tensor(0.0980)]\n",
"2021-02-25:17:29:18,245 INFO [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:212] Finished Training on task 2.\n",
"2021-02-25:17:29:18,249 INFO [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:347] Will query the method for actions at each step, since it doesn't implement a `test` method.\n",
"Test: 0%| | 0/312 [00:00, ?it/s]2021-02-25:17:29:18,339 WARNING [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:305] On a task boundary, but since your method doesn't have an `on_task_switch` method, it won't know about it! \n",
"2021-02-25:17:29:18,343 WARNING [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:305] On a task boundary, but since your method doesn't have an `on_task_switch` method, it won't know about it! \n",
"2021-02-25:17:29:18,356 WARNING [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:305] On a task boundary, but since your method doesn't have an `on_task_switch` method, it won't know about it! \n",
"2021-02-25:17:29:18,362 WARNING [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:305] On a task boundary, but since your method doesn't have an `on_task_switch` method, it won't know about it! \n",
"2021-02-25:17:29:18,371 WARNING [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:305] On a task boundary, but since your method doesn't have an `on_task_switch` method, it won't know about it! \n",
"Test: 100%|██████████| 312/312 [00:01<00:00, 243.46it/s]\n",
"2021-02-25:17:29:19,632 INFO [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:217] Resulting objective of Test Loop: 0.757212\n",
"2021-02-25:17:29:19,632 INFO [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:184] Starting training on task 3.\n",
"2021-02-25:17:29:19,633 WARNING [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:148] On a task boundary, but since your method doesn't have an `on_task_switch` method, it won't know about it! \n",
"Training Epoch 0: 100%|██████████| 300/300 [00:03<00:00, 79.67it/s, accuracy=1]\n",
"Validation Epoch 0: 100%|██████████| 75/75 [00:00<00:00, 140.42it/s, accuracy=1, val_loss=tensor(0.1427)]\n",
"2021-02-25:17:29:23,940 INFO [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:212] Finished Training on task 3.\n",
"2021-02-25:17:29:23,942 INFO [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:347] Will query the method for actions at each step, since it doesn't implement a `test` method.\n",
"Test: 0%| | 0/312 [00:00, ?it/s]2021-02-25:17:29:24,35 WARNING [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:305] On a task boundary, but since your method doesn't have an `on_task_switch` method, it won't know about it! \n",
"2021-02-25:17:29:24,71 WARNING [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:305] On a task boundary, but since your method doesn't have an `on_task_switch` method, it won't know about it! \n",
"2021-02-25:17:29:24,82 WARNING [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:305] On a task boundary, but since your method doesn't have an `on_task_switch` method, it won't know about it! \n",
"2021-02-25:17:29:24,96 WARNING [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:305] On a task boundary, but since your method doesn't have an `on_task_switch` method, it won't know about it! \n",
"2021-02-25:17:29:24,103 WARNING [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:305] On a task boundary, but since your method doesn't have an `on_task_switch` method, it won't know about it! \n",
"Test: 100%|██████████| 312/312 [00:01<00:00, 223.35it/s]\n",
"2021-02-25:17:29:25,441 INFO [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:217] Resulting objective of Test Loop: 0.791366\n",
"2021-02-25:17:29:25,441 INFO [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:184] Starting training on task 4.\n",
"2021-02-25:17:29:25,442 WARNING [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:148] On a task boundary, but since your method doesn't have an `on_task_switch` method, it won't know about it! \n",
"Training Epoch 0: 100%|██████████| 300/300 [00:03<00:00, 81.25it/s, accuracy=0.969]\n",
"Validation Epoch 0: 100%|██████████| 75/75 [00:00<00:00, 157.25it/s, accuracy=1, val_loss=tensor(0.7817)]\n",
"2021-02-25:17:29:29,616 INFO [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:212] Finished Training on task 4.\n",
"2021-02-25:17:29:29,619 INFO [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:347] Will query the method for actions at each step, since it doesn't implement a `test` method.\n",
"Test: 0%| | 0/312 [00:00, ?it/s]2021-02-25:17:29:29,706 WARNING [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:305] On a task boundary, but since your method doesn't have an `on_task_switch` method, it won't know about it! \n",
"2021-02-25:17:29:29,710 WARNING [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:305] On a task boundary, but since your method doesn't have an `on_task_switch` method, it won't know about it! \n",
"2021-02-25:17:29:29,719 WARNING [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:305] On a task boundary, but since your method doesn't have an `on_task_switch` method, it won't know about it! \n",
"2021-02-25:17:29:29,727 WARNING [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:305] On a task boundary, but since your method doesn't have an `on_task_switch` method, it won't know about it! \n",
"2021-02-25:17:29:29,735 WARNING [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:305] On a task boundary, but since your method doesn't have an `on_task_switch` method, it won't know about it! \n",
"Test: 100%|██████████| 312/312 [00:01<00:00, 247.82it/s]\n",
"2021-02-25:17:29:30,971 INFO [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:217] Resulting objective of Test Loop: 0.798978\n",
"2021-02-25:17:29:30,971 INFO [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:237] Finished main loop in 30.118470110999997 seconds.\n",
"2021-02-25:17:29:31,57 INFO [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:257] {\n",
"\t\"Task 0\": {\n",
"\t\t\"Task 0\": {\n",
"\t\t\t\"n_samples\": 1984,\n",
"\t\t\t\"accuracy\": 0.989919\n",
"\t\t},\n",
"\t\t\"Task 1\": {\n",
"\t\t\t\"n_samples\": 2016,\n",
"\t\t\t\"accuracy\": 0.666667\n",
"\t\t},\n",
"\t\t\"Task 2\": {\n",
"\t\t\t\"n_samples\": 1984,\n",
"\t\t\t\"accuracy\": 0.481351\n",
"\t\t},\n",
"\t\t\"Task 3\": {\n",
"\t\t\t\"n_samples\": 2016,\n",
"\t\t\t\"accuracy\": 0.494048\n",
"\t\t},\n",
"\t\t\"Task 4\": {\n",
"\t\t\t\"n_samples\": 1984,\n",
"\t\t\t\"accuracy\": 0.5\n",
"\t\t}\n",
"\t},\n",
"\t\"Task 1\": {\n",
"\t\t\"Task 0\": {\n",
"\t\t\t\"n_samples\": 1984,\n",
"\t\t\t\"accuracy\": 0.61744\n",
"\t\t},\n",
"\t\t\"Task 1\": {\n",
"\t\t\t\"n_samples\": 2016,\n",
"\t\t\t\"accuracy\": 0.96131\n",
"\t\t},\n",
"\t\t\"Task 2\": {\n",
"\t\t\t\"n_samples\": 1984,\n",
"\t\t\t\"accuracy\": 0.422379\n",
"\t\t},\n",
"\t\t\"Task 3\": {\n",
"\t\t\t\"n_samples\": 2016,\n",
"\t\t\t\"accuracy\": 0.360119\n",
"\t\t},\n",
"\t\t\"Task 4\": {\n",
"\t\t\t\"n_samples\": 1984,\n",
"\t\t\t\"accuracy\": 0.477823\n",
"\t\t}\n",
"\t},\n",
"\t\"Task 2\": {\n",
"\t\t\"Task 0\": {\n",
"\t\t\t\"n_samples\": 1984,\n",
"\t\t\t\"accuracy\": 0.506048\n",
"\t\t},\n",
"\t\t\"Task 1\": {\n",
"\t\t\t\"n_samples\": 2016,\n",
"\t\t\t\"accuracy\": 0.564484\n",
"\t\t},\n",
"\t\t\"Task 2\": {\n",
"\t\t\t\"n_samples\": 1984,\n",
"\t\t\t\"accuracy\": 1.0\n",
"\t\t},\n",
"\t\t\"Task 3\": {\n",
"\t\t\t\"n_samples\": 2016,\n",
"\t\t\t\"accuracy\": 0.996528\n",
"\t\t},\n",
"\t\t\"Task 4\": {\n",
"\t\t\t\"n_samples\": 1984,\n",
"\t\t\t\"accuracy\": 0.718246\n",
"\t\t}\n",
"\t},\n",
"\t\"Task 3\": {\n",
"\t\t\"Task 0\": {\n",
"\t\t\t\"n_samples\": 1984,\n",
"\t\t\t\"accuracy\": 0.498488\n",
"\t\t},\n",
"\t\t\"Task 1\": {\n",
"\t\t\t\"n_samples\": 2016,\n",
"\t\t\t\"accuracy\": 0.502976\n",
"\t\t},\n",
"\t\t\"Task 2\": {\n",
"\t\t\t\"n_samples\": 1984,\n",
"\t\t\t\"accuracy\": 0.996472\n",
"\t\t},\n",
"\t\t\"Task 3\": {\n",
"\t\t\t\"n_samples\": 2016,\n",
"\t\t\t\"accuracy\": 1.0\n",
"\t\t},\n",
"\t\t\"Task 4\": {\n",
"\t\t\t\"n_samples\": 1984,\n",
"\t\t\t\"accuracy\": 0.960181\n",
"\t\t}\n",
"\t},\n",
"\t\"Task 4\": {\n",
"\t\t\"Task 0\": {\n",
"\t\t\t\"n_samples\": 1984,\n",
"\t\t\t\"accuracy\": 0.537802\n",
"\t\t},\n",
"\t\t\"Task 1\": {\n",
"\t\t\t\"n_samples\": 2016,\n",
"\t\t\t\"accuracy\": 0.549603\n",
"\t\t},\n",
"\t\t\"Task 2\": {\n",
"\t\t\t\"n_samples\": 1984,\n",
"\t\t\t\"accuracy\": 0.918851\n",
"\t\t},\n",
"\t\t\"Task 3\": {\n",
"\t\t\t\"n_samples\": 2016,\n",
"\t\t\t\"accuracy\": 0.994048\n",
"\t\t},\n",
"\t\t\"Task 4\": {\n",
"\t\t\t\"n_samples\": 1984,\n",
"\t\t\t\"accuracy\": 0.995464\n",
"\t\t}\n",
"\t},\n",
"\t\"Final/Average Online Performance\": 0,\n",
"\t\"Final/Average Final Performance\": 0.798978,\n",
"\t\"Final/Runtime (seconds)\": 30.118470110999997,\n",
"\t\"Final/CL Score\": 0.6793868\n",
"}\n",
"\n",
"2021-02-25:17:29:31,143 INFO [/home/fabrice/repos/Sequoia/sequoia/settings/passive/cl/class_incremental_setting.py:395] {\n",
"\t\"Task 0\": {\n",
"\t\t\"Task 0\": {\n",
"\t\t\t\"n_samples\": 1984,\n",
"\t\t\t\"accuracy\": 0.989919\n",
"\t\t},\n",
"\t\t\"Task 1\": {\n",
"\t\t\t\"n_samples\": 2016,\n",
"\t\t\t\"accuracy\": 0.666667\n",
"\t\t},\n",
"\t\t\"Task 2\": {\n",
"\t\t\t\"n_samples\": 1984,\n",
"\t\t\t\"accuracy\": 0.481351\n",
"\t\t},\n",
"\t\t\"Task 3\": {\n",
"\t\t\t\"n_samples\": 2016,\n",
"\t\t\t\"accuracy\": 0.494048\n",
"\t\t},\n",
"\t\t\"Task 4\": {\n",
"\t\t\t\"n_samples\": 1984,\n",
"\t\t\t\"accuracy\": 0.5\n",
"\t\t}\n",
"\t},\n",
"\t\"Task 1\": {\n",
"\t\t\"Task 0\": {\n",
"\t\t\t\"n_samples\": 1984,\n",
"\t\t\t\"accuracy\": 0.61744\n",
"\t\t},\n",
"\t\t\"Task 1\": {\n",
"\t\t\t\"n_samples\": 2016,\n",
"\t\t\t\"accuracy\": 0.96131\n",
"\t\t},\n",
"\t\t\"Task 2\": {\n",
"\t\t\t\"n_samples\": 1984,\n",
"\t\t\t\"accuracy\": 0.422379\n",
"\t\t},\n",
"\t\t\"Task 3\": {\n",
"\t\t\t\"n_samples\": 2016,\n",
"\t\t\t\"accuracy\": 0.360119\n",
"\t\t},\n",
"\t\t\"Task 4\": {\n",
"\t\t\t\"n_samples\": 1984,\n",
"\t\t\t\"accuracy\": 0.477823\n",
"\t\t}\n",
"\t},\n",
"\t\"Task 2\": {\n",
"\t\t\"Task 0\": {\n",
"\t\t\t\"n_samples\": 1984,\n",
"\t\t\t\"accuracy\": 0.506048\n",
"\t\t},\n",
"\t\t\"Task 1\": {\n",
"\t\t\t\"n_samples\": 2016,\n",
"\t\t\t\"accuracy\": 0.564484\n",
"\t\t},\n",
"\t\t\"Task 2\": {\n",
"\t\t\t\"n_samples\": 1984,\n",
"\t\t\t\"accuracy\": 1.0\n",
"\t\t},\n",
"\t\t\"Task 3\": {\n",
"\t\t\t\"n_samples\": 2016,\n",
"\t\t\t\"accuracy\": 0.996528\n",
"\t\t},\n",
"\t\t\"Task 4\": {\n",
"\t\t\t\"n_samples\": 1984,\n",
"\t\t\t\"accuracy\": 0.718246\n",
"\t\t}\n",
"\t},\n",
"\t\"Task 3\": {\n",
"\t\t\"Task 0\": {\n",
"\t\t\t\"n_samples\": 1984,\n",
"\t\t\t\"accuracy\": 0.498488\n",
"\t\t},\n",
"\t\t\"Task 1\": {\n",
"\t\t\t\"n_samples\": 2016,\n",
"\t\t\t\"accuracy\": 0.502976\n",
"\t\t},\n",
"\t\t\"Task 2\": {\n",
"\t\t\t\"n_samples\": 1984,\n",
"\t\t\t\"accuracy\": 0.996472\n",
"\t\t},\n",
"\t\t\"Task 3\": {\n",
"\t\t\t\"n_samples\": 2016,\n",
"\t\t\t\"accuracy\": 1.0\n",
"\t\t},\n",
"\t\t\"Task 4\": {\n",
"\t\t\t\"n_samples\": 1984,\n",
"\t\t\t\"accuracy\": 0.960181\n",
"\t\t}\n",
"\t},\n",
"\t\"Task 4\": {\n",
"\t\t\"Task 0\": {\n",
"\t\t\t\"n_samples\": 1984,\n",
"\t\t\t\"accuracy\": 0.537802\n",
"\t\t},\n",
"\t\t\"Task 1\": {\n",
"\t\t\t\"n_samples\": 2016,\n",
"\t\t\t\"accuracy\": 0.549603\n",
"\t\t},\n",
"\t\t\"Task 2\": {\n",
"\t\t\t\"n_samples\": 1984,\n",
"\t\t\t\"accuracy\": 0.918851\n",
"\t\t},\n",
"\t\t\"Task 3\": {\n",
"\t\t\t\"n_samples\": 2016,\n",
"\t\t\t\"accuracy\": 0.994048\n",
"\t\t},\n",
"\t\t\"Task 4\": {\n",
"\t\t\t\"n_samples\": 1984,\n",
"\t\t\t\"accuracy\": 0.995464\n",
"\t\t}\n",
"\t},\n",
"\t\"Final/Average Online Performance\": 0,\n",
"\t\"Final/Average Final Performance\": 0.798978,\n",
"\t\"Final/Runtime (seconds)\": 30.118470110999997,\n",
"\t\"Final/CL Score\": 0.6793868\n",
"}\n",
"\n"
]
}
],
"source": [
"method = DemoMethod(hparams=DemoMethod.HParams())\n",
"setting = DomainIncrementalSetting(dataset=\"fashionmnist\")\n",
"\n",
"results = setting.apply(method)"
]
},
{
"source": [
"## Results:"
],
"cell_type": "markdown",
"metadata": {}
},
{
"source": [
"print(results.summary())"
],
"cell_type": "code",
"metadata": {},
"execution_count": 5,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"{\n\t\"Task 0\": {\n\t\t\"Task 0\": {\n\t\t\t\"n_samples\": 1984,\n\t\t\t\"accuracy\": 0.989919\n\t\t},\n\t\t\"Task 1\": {\n\t\t\t\"n_samples\": 2016,\n\t\t\t\"accuracy\": 0.666667\n\t\t},\n\t\t\"Task 2\": {\n\t\t\t\"n_samples\": 1984,\n\t\t\t\"accuracy\": 0.481351\n\t\t},\n\t\t\"Task 3\": {\n\t\t\t\"n_samples\": 2016,\n\t\t\t\"accuracy\": 0.494048\n\t\t},\n\t\t\"Task 4\": {\n\t\t\t\"n_samples\": 1984,\n\t\t\t\"accuracy\": 0.5\n\t\t}\n\t},\n\t\"Task 1\": {\n\t\t\"Task 0\": {\n\t\t\t\"n_samples\": 1984,\n\t\t\t\"accuracy\": 0.61744\n\t\t},\n\t\t\"Task 1\": {\n\t\t\t\"n_samples\": 2016,\n\t\t\t\"accuracy\": 0.96131\n\t\t},\n\t\t\"Task 2\": {\n\t\t\t\"n_samples\": 1984,\n\t\t\t\"accuracy\": 0.422379\n\t\t},\n\t\t\"Task 3\": {\n\t\t\t\"n_samples\": 2016,\n\t\t\t\"accuracy\": 0.360119\n\t\t},\n\t\t\"Task 4\": {\n\t\t\t\"n_samples\": 1984,\n\t\t\t\"accuracy\": 0.477823\n\t\t}\n\t},\n\t\"Task 2\": {\n\t\t\"Task 0\": {\n\t\t\t\"n_samples\": 1984,\n\t\t\t\"accuracy\": 0.506048\n\t\t},\n\t\t\"Task 1\": {\n\t\t\t\"n_samples\": 2016,\n\t\t\t\"accuracy\": 0.564484\n\t\t},\n\t\t\"Task 2\": {\n\t\t\t\"n_samples\": 1984,\n\t\t\t\"accuracy\": 1.0\n\t\t},\n\t\t\"Task 3\": {\n\t\t\t\"n_samples\": 2016,\n\t\t\t\"accuracy\": 0.996528\n\t\t},\n\t\t\"Task 4\": {\n\t\t\t\"n_samples\": 1984,\n\t\t\t\"accuracy\": 0.718246\n\t\t}\n\t},\n\t\"Task 3\": {\n\t\t\"Task 0\": {\n\t\t\t\"n_samples\": 1984,\n\t\t\t\"accuracy\": 0.498488\n\t\t},\n\t\t\"Task 1\": {\n\t\t\t\"n_samples\": 2016,\n\t\t\t\"accuracy\": 0.502976\n\t\t},\n\t\t\"Task 2\": {\n\t\t\t\"n_samples\": 1984,\n\t\t\t\"accuracy\": 0.996472\n\t\t},\n\t\t\"Task 3\": {\n\t\t\t\"n_samples\": 2016,\n\t\t\t\"accuracy\": 1.0\n\t\t},\n\t\t\"Task 4\": {\n\t\t\t\"n_samples\": 1984,\n\t\t\t\"accuracy\": 0.960181\n\t\t}\n\t},\n\t\"Task 4\": {\n\t\t\"Task 0\": {\n\t\t\t\"n_samples\": 1984,\n\t\t\t\"accuracy\": 0.537802\n\t\t},\n\t\t\"Task 1\": {\n\t\t\t\"n_samples\": 2016,\n\t\t\t\"accuracy\": 0.549603\n\t\t},\n\t\t\"Task 2\": {\n\t\t\t\"n_samples\": 1984,\n\t\t\t\"accuracy\": 0.918851\n\t\t},\n\t\t\"Task 3\": {\n\t\t\t\"n_samples\": 2016,\n\t\t\t\"accuracy\": 0.994048\n\t\t},\n\t\t\"Task 4\": {\n\t\t\t\"n_samples\": 1984,\n\t\t\t\"accuracy\": 0.995464\n\t\t}\n\t},\n\t\"Final/Average Online Performance\": 0,\n\t\"Final/Average Final Performance\": 0.798978,\n\t\"Final/Runtime (seconds)\": 30.118470110999997,\n\t\"Final/CL Score\": 0.6793868\n}\n\n"
]
}
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"{'task_metrics': }"
]
},
"metadata": {},
"execution_count": 6
},
{
"output_type": "display_data",
"data": {
"text/plain": "",
"image/svg+xml": "\n\n\n\n",
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAYIAAAEWCAYAAABrDZDcAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAAAdO0lEQVR4nO3de7wVdd328c+1YXPLVu+QQCXQSINQCXe6RTuY3CpEaHqTGeKBDj7QCStPBSqmhlooeaRb8cmbNExNyVBRKNuJ8oiAhoqSCUaCmghBHrah6Pf5YwZcbPZhbWDWYu+53q/Xejnzm9+a9Z3lZq41v5k1SxGBmZnlV0W5CzAzs/JyEJiZ5ZyDwMws5xwEZmY55yAwM8s5B4GZWc45CCx3JE2RNL7cdZhtLxwEtt2T9EbB4z1JbxXMn1SiGqZIWi+pWylez6yUHAS23YuInTY8gBeALxS0Tc369SXtCBwH/As4OevXq/fa7Uv5epZPDgJrtST1l/SIpLWSXpZ0raQO6TJJukLSSkmvSXpKUt8G1rGzpFpJV0tSIy91HLAWuAj4Sr3nd5b0v5JekrRG0l0Fy46VtDB9/aWSBqftyyQdWdDvAkm/Sqd7SgpJp0p6Afhj2v4bSf+Q9C9JsyXtV/D8jpImSvp7uvzhtO1eSafVq/dJSUNb8DZbDjgIrDV7Fzgd6AJ8EjgC+Ha6bBDwWaA38AHgy8DqwidL+iDwADAnIr4bjd9v5SvAr4FbgT6SDixYdjNQBewH7Apcka67P3ATcDbQKa1lWQu27TBgH+Bz6fx9QK/0NR4HCo+ELgcOBD4FdAZ+ALwH/JKCIxhJ+wPdgXtbUAeSvidpkaSnJX1/w7rSEH5K0t2S/jNt/3QaNgsk9UrbOkmaJalV7G8k3Zh+gFhU0NZZ0u8lPZf+d5e0XemHiCXpdh+Qtn9M0mNp2yfTtvaS/iCpqjxb1oSI8MOPVvMg2Zke2ciy7wO/TacPB/4KHAJU1Os3BbgRWASc3czr7UmyU61O52cCV6XT3dJluzTwvOuBK4rZBuAC4FfpdE8ggL2aqKlT2ucDJB/m3gL2b6DfDsAaoFc6fznw8xa+333T96kKaA/8AfgoMB84LO3zdeDH6fQ0oAfwGWBiwesOKPffTgu2+bPAAcCigrYJwJh0egzw03R6CElIK/1bezRt/1n6HvQA7kzbTgO+Wu7ta+jRKhLarCGSeku6Jx0yeQ24hOTogIj4I3AtMAlYKWnyhk+tqaOAjsB1zbzMKcDiiFiYzk8FTpRUCewB/DMi1jTwvD2ApVu4aQDLN0xIaifpJ+nw0mu8f2TRJX3s0NBrRcS/gduAk9NP48NJjmBaYh+SnVtdRKwHHgS+SHKkNTvt83uS4TOAd0hCowp4R9LewB4R8acWvm7ZRMRs4J/1mo8lOcIi/e9/F7TfFIm5QKf0goL670Mn4AskR4nbHQeBtWb/A/yF5BPvfwLnkHwyAyAiro6IA4F9SXZcZxc89wbgfmBGejK4MSOAvdKw+QfJJ70uJJ8ElwOd03/k9S0H9m5knW+S7CA22L2BPoXDVCeS7HCOJDkK6Jm2C1gF/LuJ1/olcBLJsFldRDzSSL/GLAIOlfTBdEhjCEnIPZ3WBHB82gZwKcnObixJEF8MnNfC19we7RYRL6fT/wB2S6e7UxDawIq0bRLJ3+MvST6gjAMuiYj3SlNuyzgIrDXbGXgNeENSH+BbGxZIOkjSwekn9zdJdpb1/xGOBp4F7pbUsf7K07HdvYH+QHX66AvcAoxIdwz3AT+XtIukSkmfTZ/+C+Brko6QVCGpe1ojwELghLR/DfClIrZzHck5jiqSHQsA6Y7lRuBnkj6UHj18UtJ/pMsfSbd7Ii0/GiAiFgM/BWaRBOdCknMzXwe+LemxtL630/4LI+KQiPgvYC/gZZKh9Nsk/UrSbg28TKsSyThPk/fvj4gXImJARHwSqCMZIlos6eb0vehdilqLVu6xKT/8aMmDgvF1krHcvwBvAA+RXNXzcLrsCODJdNkqkiGdndJlU4Dx6XQFySfYWcAO9V7rOtLx3Xrt/Ul2zJ3Txy+BV0jG46cV9Bua1vA6sAT4XNq+F/BoWtu9wNVsfo6gfcF6dgJ+l67n7yRHKQF8NF3eEbgSeJHkEtfZQMeC559HM+cdWvD+XwJ8u15bb2BevTal72nn9L3/MMkJ8IvL/TdU5Hb2ZNNzBM8C3dLpbsCz6fT1wPCG+hW03UZyov/i9D34MDC13NtY+FBaqJm1UZJGAKMi4jNb+PxdI2KlpD1Jdu6HAB3StgqSYP1TRNxY8JyvkJxEv1LSb4HvkuxcvxgRp2/dFmVPUk/gnojom85fBqyOiJ9IGgN0jogfSDqK5MhyCHAwcHVE9C9Yz2HAf0fE6ZKuIDmZviztt91cxusvq5i1Yem4/reBn2/Fau5ML7V9B/hORKxNLyn9Trp8GvC/9V7zqySX8EJyXmUGyfDRiVtRR0lI+jUwAOgiaQXwI+AnwO2STiU5Kvty2n0GSQgsIRkC+lrBekRyNDYsbZpMcnTUnoJhzO1BZkcEkm4EjgZWbkjVessFXEXyJtaRXFb1eCbFmOWQpM+R7KT/ABwXyVU/ZpvJ8mTxFGBwE8s/TzJu1gsYRXIFiJltIxExMyJ2jIhjHQLWlMyCIBq+FrdQY9ffmplZCZXzHEFj19++XL+jpFEkRw3suOOOB/bp06d+FzMza8Jjjz22KiK6NrSsVZwsjojJJCdaqKmpiQULFpS5IjNrLXqOadGtlbZry35y1BY/V9LfG1tWziB4kfe/jQjJFy5eLFMtZm1aW9kZbs2O0BpXzm8WTwdGpHfvOwT4V7z/FW4zMyuRzI4IGrkWtxIgIq6jietvzcysdDILgogY3szyAL7TVB8zM8uebzpnVoSrrrqKvn37st9++3HllVcCcPbZZ9OnTx/69evH0KFDWbt2LQBz5syhX79+1NTU8NxzzwGwdu1aBg0axHvvbZc3n7SccxCYNWPRokXccMMNzJs3jyeeeIJ77rmHJUuWMHDgQBYtWsSTTz5J7969ufTSSwGYOHEiM2bM4Morr+S665KfOxg/fjznnHMOFRX+J2fbH/9VmjVj8eLFHHzwwVRVVdG+fXsOO+wwpk2bxqBBg2jfPhldPeSQQ1ixYgUAlZWV1NXVUVdXR2VlJUuXLmX58uUMGDCgjFth1rhW8T0Cs3Lq27cv5557LqtXr6Zjx47MmDGDmpqaTfrceOONDBuW3Fts7NixjBgxgo4dO3LzzTdz1llnMX78+HKUblYUB4FZM/bZZx9++MMfMmjQIHbccUeqq6tp167dxuUXX3wx7du356STTgKgurqauXPnAjB79my6detGRDBs2DAqKyuZOHEiu+3W6n+fxdoQDw2ZFeHUU0/lscceY/bs2eyyyy707p38wNSUKVO45557mDp1KskNdd8XEYwfP55x48Zx4YUXMmHCBEaOHMnVV19djk0wa5SPCMyKsHLlSnbddVdeeOEFpk2bxty5c7n//vuZMGECDz74IFVVVZs956abbmLIkCF07tyZuro6KioqqKiooK6urgxbYNY4B4FZEY477jhWr15NZWUlkyZNolOnTowePZp169YxcOBAIDlhvOEqobq6OqZMmcKsWbMAOOOMMxgyZAgdOnTglltuKdt2mDXEQWBWhIceemiztiVLljTav6qqitra2o3zhx56KE899VQmtZltLZ8jMDPLOQeBmVnOOQjMzHLO5wgsF9rK/fjB9+S3bc9HBGZmOecgMDPLOQeBmVnOOQjMzHLOQWBmlnMOAjOznHMQmJnlnIPAzCznHARmZjnnIDAzyzkHgZlZzjkIzMxyzkFgZpZzDgIzs5xzEJiZ5ZyDwMws5xwEZmY55yAwM8s5B4GZWc45CMzMcs5BYGaWcw4CM7OcyzQIJA2W9KykJZLGNLB8T0m1kv4s6UlJQ7Ksx8zMNpdZEEhqB0wCPg/sCwyXtG+9bucBt0fEJ4ATgJ9nVY+ZmTUsyyOC/sCSiHg+It4GbgWOrdcngP9Mpz8AvJRhPWZm1oAsg6A7sLxgfkXaVugC4GRJK4AZwGkNrUjSKEkLJC149dVXs6jVzCy3yn2yeDgwJSJ6AEOAmyVtVlNETI6Imoio6dq1a8mLNDNry7IMgheBPQrme6RthU4FbgeIiEeAHYAuGdZkZmb1ZBkE84Fekj4iqQPJyeDp9fq8ABwBIGkfkiDw2I+ZWQllFgQRsR4YDcwEFpNcHfS0pIskHZN2OxMYKekJ4NfAVyMisqrJzMw21z7LlUfEDJKTwIVt5xdMPwN8OssazMysaeU+WWxmZmXmIDAzyzkHgZlZzjkIzMxyzkFgZpZzDgIzs5xzEJiZ5ZyDwMws5xwEZmY55yAwM8s5B4GZWc45CMzMcs5BYGaWcw4CM7OccxCYmeWcg8DMLOccBGZmOecgMDPLOQeBmVnOOQjMzHLOQWBmlnMOAjOznHMQmJnlnIPAzCznHARmZjnnIDAzyzkHgZlZzjkIzMxyzkFgZpZzDgIzs5xzEJiZ5ZyDwMws5xwEZmY55yAwM8u5TINA0mBJz0paImlMI32+LOkZSU9LuiXLeszMbHPts1qxpHbAJGAgsAKYL2l6RDxT0KcXMBb4dESskbRrVvWYmVnDsjwi6A8siYjnI+Jt4Fbg2Hp9RgKTImINQESszLAe20o9e/bk4x//ONXV1dTU1ABwwQUX0L17d6qrq6murmbGjBkAzJkzh379+lFTU8Nzzz0HwNq1axk0aBDvvfde2bbBzDaX2REB0B1YXjC/Aji4Xp/eAJLmAO2ACyLi/vorkjQKGAWw5557ZlKsFae2tpYuXbps0nb66adz1llnbdI2ceJEZsyYwbJly7juuuuYOHEi48eP55xzzqGiwqemzLYn5f4X2R7oBQwAhgM3SOpUv1NETI6Imoio6dq1a2krtC1SWVlJXV0ddXV1VFZWsnTpUpYvX86AAQPKXZqZ1dNsEEj6gqQtCYwXgT0K5nukbYVWANMj4p2I+BvwV5Jg2C41NDSywcSJE5HEqlWrALjzzjvZb7/9OPTQQ1m9ejUAS5cuZdiwYSWve1uRxKBBgzjwwAOZPHnyxvZrr72Wfv368fWvf501a9YAMHbsWEaMGMGll17K6NGjOffccxk/fny5SjezJhSzgx8GPCdpgqQ+LVj3fKCXpI9I6gCcAEyv1+cukqMBJHUhGSp6vgWvUXK1tbUsXLiQBQsWbGxbvnw5s2bN2mTY6pprrmH+/Pl84xvf4JZbkouhzjvvvFa9M3z44Yd5/PHHue+++5g0aRKzZ8/mW9/6FkuXLmXhwoV069aNM888E4Dq6mrmzp1LbW0tzz//PN26dSMiGDZsGCeffDKvvPJKmbfGzDZoNggi4mTgE8BSYIqkRySNkrRzM89bD4wGZgKLgdsj4mlJF0k6Ju02E1gt6RmgFjg7IlZvxfaUxemnn86ECROQtLGtoqKCdevWbRwaeeihh9h9993p1Wu7PeBpVvfu3QHYddddGTp0KPPmzWO33XajXbt2VFRUMHLkSObNm7fJcyKC8ePHM27cOC688EImTJjAyJEjufrqq8uxCWbWgKKGfCLiNeAOkit/ugFDgcclndbM82ZERO+I2DsiLk7bzo+I6el0RMQZEbFvRHw8Im7dqq3JWENDI7/73e/o3r07+++//yZ9x44dy5FHHsndd9/N8OHD+fGPf8y4cePKUfY28eabb/L6669vnJ41axZ9+/bl5Zdf3tjnt7/9LX379t3keTfddBNDhgyhc+fO1NXVUVFRQUVFBXV1dSWt38wa1+xVQ+mn968BHwVuAvpHxEpJVcAzwDXZlrj9ePjhh+nevTsrV65k4MCB9OnTh0suuYRZs2Zt1nfgwIEMHDgQeH9n+Ne//pXLL7+cXXbZhauuuoqqqqpSb8IWe+WVVxg6dCgA69ev58QTT2Tw4MGccsopLFy4EEn07NmT66+/fuNz6urqmDJlysb354wzzmDIkCF06NBh43CZmZVfMZePHgdcERGzCxsjok7SqdmUtX2qPzTy4IMP8re//W3j0cCKFSs44IADmDdvHrvvvjvw/s5w5syZHH300UybNo077riDqVOnMnLkyLJtS0vttddePPHEE5u133zzzY0+p6qqitra2o3zhx56KE899VQm9ZnZlitmaOgCYOPAr6SOknoCRMQD2ZS1/WloaOSggw5i5cqVLFu2jGXLltGjRw8ef/zxjSEAcNlll/Hd736XyspK3nrrLSR5aMTMtivFHBH8BvhUwfy7adtBmVS0nWpsaKQpL730EvPmzeNHP/oRAKeddhoHHXQQnTp14q677sq6ZDOzohQTBO3TW0QAEBFvp5eD5kpjQyOFli1btsn8hz70Ie69996N88cffzzHH398FuWZmW2xYoLgVUnHbLjSR9KxwKpsy7Is9Bxzb/OdWoFlPzmq3CWYtSnFBME3gamSrgVEcv+gEZlWZWZmJdNsEETEUuAQSTul829kXpWZmZVMUXcflXQUsB+ww4Zvz0bERRnWlYm2MjQCHh4xs22nmJvOXUdyv6HTSIaGjgc+nHFdZmZWIsV8j+BTETECWBMRFwKfJP0dATMza/2KCYJ/p/+tk/Qh4B2S+w2ZmVkbUMw5grvTH4u5DHgcCOCGLIsyM7PSaTII0h+keSAi1gJ3SroH2CEi/lWK4szMLHtNDg1FxHvApIL5dQ4BM7O2pZhzBA9IOk6Fv7piZmZtRjFB8A2Sm8ytk/SapNclvZZxXWZmViLFfLO4yZ+kNDOz1q2YXyj7bEPt9X+oxszMWqdiLh89u2B6B6A/8BhweCYVmZlZSRUzNPSFwnlJewBXZlWQmZmVVjEni+tbAeyzrQsxM7PyKOYcwTUk3yaGJDiqSb5hbGZmbUAx5wgWFEyvB34dEXMyqsfMzEqsmCC4A/h3RLwLIKmdpKqIqMu2NDMzK4WivlkMdCyY7wj8IZtyzMys1IoJgh0Kf54yna7KriQzMyulYoLgTUkHbJiRdCDwVnYlmZlZKRVzjuD7wG8kvUTyU5W7k/x0pZmZtQHFfKFsvqQ+wMfSpmcj4p1syzIzs1Ip5sfrvwPsGBGLImIRsJOkb2dfmpmZlUIx5whGpr9QBkBErAFGZlaRmZmVVDFB0K7wR2kktQM6ZFeSmZmVUjEni+8HbpN0fTr/DeC+7EoyM7NSKiYIfgiMAr6Zzj9JcuWQmZm1Ac0ODaU/YP8osIzktwgOBxYXs3JJgyU9K2mJpDFN9DtOUkiqKa5sMzPbVho9IpDUGxiePlYBtwFExH8Vs+L0XMIkYCDJravnS5oeEc/U67cz8D2SsDEzsxJr6ojgLySf/o+OiM9ExDXAuy1Yd39gSUQ8HxFvA7cCxzbQ78fAT4F/t2DdZma2jTQVBF8EXgZqJd0g6QiSbxYXqzuwvGB+Rdq2UXrrij0i4t6mViRplKQFkha8+uqrLSjBzMya02gQRMRdEXEC0AeoJbnVxK6S/kfSoK19YUkVwM+AM5vrGxGTI6ImImq6du26tS9tZmYFijlZ/GZE3JL+dnEP4M8kVxI150Vgj4L5HmnbBjsDfYE/SVoGHAJM9wljM7PSatFvFkfEmvTT+RFFdJ8P9JL0EUkdgBOA6QXr+ldEdImInhHRE5gLHBMRCxpenZmZZWFLfry+KBGxHhgNzCS53PT2iHha0kWSjsnqdc3MrGWK+ULZFouIGcCMem3nN9J3QJa1mJlZwzI7IjAzs9bBQWBmlnMOAjOznHMQmJnlnIPAzCznHARmZjnnIDAzyzkHgZlZzjkIzMxyzkFgZpZzDgIzs5xzEJiZ5ZyDwMws5xwEZmY55yAwM8s5B4GZWc45CMzMcs5BYGaWcw4CM7OccxCYmeWcg8DMLOccBGZmOecgMDPLOQeBmVnOOQjMzHLOQWBmlnMOAjOznHMQmJnlnIPAzCznHARmZjnnIDAzyzkHgZlZzjkIzMxyzkFgZpZzmQaBpMGSnpW0RNKYBpafIekZSU9KekDSh7Osx8zMNpdZEEhqB0wCPg/sCwyXtG+9bn8GaiKiH3AHMCGreszMrGFZHhH0B5ZExPMR8TZwK3BsYYeIqI2IunR2LtAjw3rMzKwBWQZBd2B5wfyKtK0xpwL3NbRA0ihJCyQtePXVV7dhiWZmtl2cLJZ0MlADXNbQ8oiYHBE1EVHTtWvX0hZnZtbGtc9w3S8CexTM90jbNiHpSOBc4LCIWJdhPWZm1oAsjwjmA70kfURSB+AEYHphB0mfAK4HjomIlRnWYmZmjcgsCCJiPTAamAksBm6PiKclXSTpmLTbZcBOwG8kLZQ0vZHVmZlZRrIcGiIiZgAz6rWdXzB9ZJavb2ZmzdsuThabmVn5OAjMzHLOQWBmlnMOAjOznHMQmJnlnIPAzCznHARmZjnnIDAzyzkHgZlZzjkIzMxyzkFgZpZzDgIzs5xzEJiZ5ZyDwMws5xwEZmY55yAwM8s5B4GZWc45CMzMcs5BYGaWcw4CM7OccxCYmeWcg8DMLOccBGZmOecgMDPLOQeBmVnOOQjMzHLOQWBmlnMOAjOznHMQmJnlnIPAzCznHARmZjnnIDAzyzkHgZlZzjkIzMxyzkFgZpZzmQaBpMGSnpW0RNKYBpb/h6Tb0uWPSuqZZT1mZra5zIJAUjtgEvB5YF9guKR963U7FVgTER8FrgB+mlU9ZmbWsCyPCPoDSyLi+Yh4G7gVOLZen2OBX6bTdwBHSFKGNZmZWT2KiGxWLH0JGBwR/yedPwU4OCJGF/RZlPZZkc4vTfusqreuUcCodPZjwLOZFL3tdAFWNdurbfK251eet781bPuHI6JrQwval7qSLRERk4HJ5a6jWJIWRERNuesoB297Prcd8r39rX3bsxwaehHYo2C+R9rWYB9J7YEPAKszrMnMzOrJMgjmA70kfURSB+AEYHq9PtOBr6TTXwL+GFmNVZmZWYMyGxqKiPWSRgMzgXbAjRHxtKSLgAURMR34BXCzpCXAP0nCoi1oNcNYGfC251eet79Vb3tmJ4vNzKx18DeLzcxyzkFgZpZzDoJtqLlbarRlkm6UtDL9bkiuSNpDUq2kZyQ9Lel75a6pVCTtIGmepCfSbb+w3DWVg6R2kv4s6Z5y17IlHATbSJG31GjLpgCDy11EmawHzoyIfYFDgO/k6P/9OuDwiNgfqAYGSzqkvCWVxfeAxeUuYks5CLadYm6p0WZFxGySK79yJyJejojH0+nXSXYI3ctbVWlE4o10tjJ95OoKFEk9gKOA/1vuWraUg2Db6Q4sL5hfQU52Bva+9A66nwAeLXMpJZMOiywEVgK/j4jcbHvqSuAHwHtlrmOLOQjMthFJOwF3At+PiNfKXU+pRMS7EVFNcveA/pL6lrmkkpF0NLAyIh4rdy1bw0Gw7RRzSw1royRVkoTA1IiYVu56yiEi1gK15Otc0aeBYyQtIxkOPlzSr8pbUss5CLadYm6pYW1Qeuv0XwCLI+Jn5a6nlCR1ldQpne4IDAT+UtaiSigixkZEj4joSfJv/o8RcXKZy2oxB8E2EhHrgQ231FgM3B4RT5e3qtKR9GvgEeBjklZIOrXcNZXQp4FTSD4NLkwfQ8pdVIl0A2olPUnyYej3EdEqL6HMM99iwsws53xEYGaWcw4CM7OccxCYmeWcg8DMLOccBGZmOdcqfrzerJwkfRB4IJ3dHXgXeDWd75/eW6qp538VqImI0ZkVabYVHARmzYiI1SR31kTSBcAbEXF5OWsy25Y8NGS2BSSNlDQ/vQ//nZKq0vbjJS1K22c38LyjJD0iqUvpqzZrmIPAbMtMi4iD0vvwLwY2fJP6fOBzafsxhU+QNBQYAwyJiFUlrdasCR4aMtsyfSWNBzoBO5HcWgRgDjBF0u1A4c3nDgdqgEF5ujOptQ4+IjDbMlOA0RHxceBCYAeAiPgmcB7JnWgfS080AywFdgZ6l75Us6Y5CMy2zM7Ay+ntp0/a0Chp74h4NCLOJ7myaMOtyf8OHAfcJGm/kldr1gQHgdmWGUfyK2Rz2PS2y5dJekrSIuD/AU9sWBARfyEJjd9I2ruUxZo1xXcfNTPLOR8RmJnlnIPAzCznHARmZjnnIDAzyzkHgZlZzjkIzMxyzkFgZpZz/x/jOYg2+yx1FwAAAABJRU5ErkJggg==\n"
},
"metadata": {
"needs_background": "light"
}
}
],
"source": [
"results.make_plots()"
]
},
{
"source": [
"As you can see, our model's performance quickly deteriorates as new tasks are learned, a process refered to as \"Catastrophic Forgetting\".\n",
"Next, we'll try to do something about it.\n"
],
"cell_type": "markdown",
"metadata": {}
},
{
"source": [
"## Adding a CL Mechanism\n",
"\n",
"First, by taking a look at the logs above, you will notice that we are told that our Method doesn't have an `on_task_switch` method.\n",
"\n",
"A Setting would call this `on_task_switch` method during training or evaluation if we are allowed to know when task boundaries occur in that setting. Additionally, if it's allowed in that Setting, we might also receive the index of the new task we are switching to.\n",
"\n",
"Using this information, here we will add an EWC-like penalty to our model, which will prevent its weights from changing too much between tasks. We'll use the `on_task_switch` method to update the 'anchor' weights everytime a task boundary is encountered.\n"
],
"cell_type": "markdown",
"metadata": {}
},
{
"source": [],
"cell_type": "markdown",
"metadata": {}
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"from copy import deepcopy\n",
"from sequoia.utils import dict_intersection\n",
"\n",
"class MyImprovedModel(MyModel):\n",
" \"\"\" Adds an ewc-like penalty to the demo model. \"\"\"\n",
" def __init__(self,\n",
" observation_space: gym.Space,\n",
" action_space: gym.Space,\n",
" reward_space: gym.Space,\n",
" ewc_coefficient: float = 1.0,\n",
" ewc_p_norm: int = 2,\n",
" ):\n",
" super().__init__(\n",
" observation_space,\n",
" action_space,\n",
" reward_space,\n",
" )\n",
" self.ewc_coefficient = ewc_coefficient\n",
" self.ewc_p_norm = ewc_p_norm\n",
"\n",
" self.previous_model_weights: Dict[str, Tensor] = {}\n",
"\n",
" self._previous_task: Optional[int] = None\n",
" self._n_switches: int = 0\n",
"\n",
" def shared_step(self, batch: Tuple[Observations, Rewards], *args, **kwargs):\n",
" base_loss, metrics = super().shared_step(batch, *args, **kwargs)\n",
" ewc_loss = self.ewc_coefficient * self.ewc_loss()\n",
" metrics[\"ewc_loss\"] = ewc_loss\n",
" return base_loss + ewc_loss, metrics\n",
"\n",
" def on_task_switch(self, task_id: Optional[int])-> None:\n",
" \"\"\" Executed when the task switches (to either a known or unknown task).\n",
" \"\"\"\n",
" if self._previous_task is None and self._n_switches == 0:\n",
" print(\"Starting the first task, no EWC update.\")\n",
" elif task_id is None or task_id != self._previous_task:\n",
" # NOTE: We also switch between unknown tasks.\n",
" print(f\"Switching tasks: {self._previous_task} -> {task_id}: \")\n",
" print(f\"Updating the EWC 'anchor' weights.\")\n",
" self._previous_task = task_id\n",
" self.previous_model_weights.clear()\n",
" self.previous_model_weights.update(deepcopy({\n",
" k: v.detach() for k, v in self.named_parameters()\n",
" }))\n",
" self._n_switches += 1\n",
"\n",
" def ewc_loss(self) -> Tensor:\n",
" \"\"\"Gets an 'ewc-like' regularization loss.\n",
"\n",
" NOTE: This is a simplified version of EWC where the loss is the P-norm\n",
" between the current weights and the weights as they were on the begining\n",
" of the task.\n",
" \"\"\"\n",
" if self._previous_task is None:\n",
" # We're in the first task: do nothing.\n",
" return 0.\n",
"\n",
" old_weights: Dict[str, Tensor] = self.previous_model_weights\n",
" new_weights: Dict[str, Tensor] = dict(self.named_parameters())\n",
"\n",
" loss = 0.\n",
" for weight_name, (new_w, old_w) in dict_intersection(new_weights, old_weights):\n",
" loss += torch.dist(new_w, old_w.type_as(new_w), p=self.ewc_p_norm)\n",
" return loss\n"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"\n",
"class ImprovedDemoMethod(DemoMethod):\n",
" \"\"\" Improved version of the demo method, that adds an ewc-like regularizer.\n",
" \"\"\"\n",
" # Name of this method: \n",
" @dataclass\n",
" class HParams(DemoMethod.HParams):\n",
" \"\"\" Hyperparameters of this new improved method. (Adds ewc params).\"\"\"\n",
" # Coefficient of the ewc-like loss.\n",
" ewc_coefficient: float = 1.0\n",
" # Distance norm used in the ewc loss.\n",
" ewc_p_norm: int = 2\n",
"\n",
" def __init__(self, hparams: HParams):\n",
" super().__init__(hparams=hparams)\n",
" \n",
" def configure(self, setting: ClassIncrementalSetting):\n",
" # Use the improved model, with the added EWC-like term.\n",
" self.model = MyImprovedModel(\n",
" observation_space=setting.observation_space,\n",
" action_space=setting.action_space,\n",
" reward_space=setting.reward_space,\n",
" ewc_coefficient=self.hparams.ewc_coefficient,\n",
" ewc_p_norm = self.hparams.ewc_p_norm,\n",
" )\n",
" self.optimizer = torch.optim.Adam(self.model.parameters(), lr=self.hparams.learning_rate)\n",
"\n",
" def on_task_switch(self, task_id: Optional[int]):\n",
" self.model.on_task_switch(task_id)"
]
},
{
"source": [
"## Running the \"Improved\" method"
],
"cell_type": "markdown",
"metadata": {}
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"2021-02-25:17:29:31,526 INFO [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:184] Starting training on task 0.\n",
"2021-02-25:17:29:31,580 INFO [/home/fabrice/repos/Sequoia/sequoia/settings/passive/cl/class_incremental_setting.py:433] Number of train tasks: 5.\n",
"2021-02-25:17:29:31,581 INFO [/home/fabrice/repos/Sequoia/sequoia/settings/passive/cl/class_incremental_setting.py:434] Number of test tasks: 5.\n",
"Training Epoch 0: 0%| | 0/300 [00:00, ?it/s]Starting the first task, no EWC update.\n",
"Training Epoch 0: 100%|██████████| 300/300 [00:03<00:00, 79.82it/s, accuracy=1, ewc_loss=0]\n",
"Validation Epoch 0: 100%|██████████| 75/75 [00:00<00:00, 147.76it/s, accuracy=1, ewc_loss=0, val_loss=tensor(3.3188)]\n",
"2021-02-25:17:29:35,880 INFO [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:212] Finished Training on task 0.\n",
"2021-02-25:17:29:35,921 INFO [/home/fabrice/repos/Sequoia/sequoia/settings/passive/cl/class_incremental_setting.py:433] Number of train tasks: 5.\n",
"2021-02-25:17:29:35,921 INFO [/home/fabrice/repos/Sequoia/sequoia/settings/passive/cl/class_incremental_setting.py:434] Number of test tasks: 5.\n",
"2021-02-25:17:29:35,950 INFO [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:347] Will query the method for actions at each step, since it doesn't implement a `test` method.\n",
"Test: 14%|█▍ | 43/312 [00:00<00:01, 211.59it/s]Switching tasks: None -> None: \n",
"Updating the EWC 'anchor' weights.\n",
"Switching tasks: None -> None: \n",
"Updating the EWC 'anchor' weights.\n",
"Switching tasks: None -> None: \n",
"Updating the EWC 'anchor' weights.\n",
"Switching tasks: None -> None: \n",
"Updating the EWC 'anchor' weights.\n",
"Switching tasks: None -> None: \n",
"Updating the EWC 'anchor' weights.\n",
"Test: 100%|██████████| 312/312 [00:01<00:00, 239.22it/s]\n",
"2021-02-25:17:29:37,352 INFO [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:217] Resulting objective of Test Loop: 0.690505\n",
"2021-02-25:17:29:37,353 INFO [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:184] Starting training on task 1.\n",
"Training Epoch 0: 0%| | 0/300 [00:00, ?it/s]Switching tasks: None -> 1: \n",
"Updating the EWC 'anchor' weights.\n",
"Training Epoch 0: 100%|██████████| 300/300 [00:05<00:00, 59.70it/s, accuracy=0.875, ewc_loss=tensor(0.2296, grad_fn=)]\n",
"Validation Epoch 0: 100%|██████████| 75/75 [00:00<00:00, 143.94it/s, accuracy=0.969, ewc_loss=tensor(0.2221), val_loss=tensor(33.0478)]\n",
"2021-02-25:17:29:42,905 INFO [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:212] Finished Training on task 1.\n",
"2021-02-25:17:29:42,909 INFO [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:347] Will query the method for actions at each step, since it doesn't implement a `test` method.\n",
"Test: 12%|█▎ | 39/312 [00:00<00:01, 190.68it/s]Switching tasks: 1 -> None: \n",
"Updating the EWC 'anchor' weights.\n",
"Switching tasks: None -> None: \n",
"Updating the EWC 'anchor' weights.\n",
"Switching tasks: None -> None: \n",
"Updating the EWC 'anchor' weights.\n",
"Switching tasks: None -> None: \n",
"Updating the EWC 'anchor' weights.\n",
"Switching tasks: None -> None: \n",
"Updating the EWC 'anchor' weights.\n",
"Test: 100%|██████████| 312/312 [00:01<00:00, 218.28it/s]\n",
"2021-02-25:17:29:44,441 INFO [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:217] Resulting objective of Test Loop: 0.745092\n",
"2021-02-25:17:29:44,442 INFO [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:184] Starting training on task 2.\n",
"Training Epoch 0: 0%| | 0/300 [00:00, ?it/s]Switching tasks: None -> 2: \n",
"Updating the EWC 'anchor' weights.\n",
"Training Epoch 0: 100%|██████████| 300/300 [00:05<00:00, 54.67it/s, accuracy=0.906, ewc_loss=tensor(0.3728, grad_fn=)]\n",
"Validation Epoch 0: 100%|██████████| 75/75 [00:00<00:00, 162.51it/s, accuracy=0.906, ewc_loss=tensor(0.3689), val_loss=tensor(43.5458)]\n",
"2021-02-25:17:29:50,398 INFO [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:212] Finished Training on task 2.\n",
"2021-02-25:17:29:50,402 INFO [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:347] Will query the method for actions at each step, since it doesn't implement a `test` method.\n",
"Test: 15%|█▍ | 46/312 [00:00<00:01, 231.12it/s]Switching tasks: 2 -> None: \n",
"Updating the EWC 'anchor' weights.\n",
"Switching tasks: None -> None: \n",
"Updating the EWC 'anchor' weights.\n",
"Switching tasks: None -> None: \n",
"Updating the EWC 'anchor' weights.\n",
"Switching tasks: None -> None: \n",
"Updating the EWC 'anchor' weights.\n",
"Switching tasks: None -> None: \n",
"Updating the EWC 'anchor' weights.\n",
"Test: 100%|██████████| 312/312 [00:01<00:00, 239.81it/s]\n",
"2021-02-25:17:29:51,801 INFO [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:217] Resulting objective of Test Loop: 0.915665\n",
"2021-02-25:17:29:51,801 INFO [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:184] Starting training on task 3.\n",
"Training Epoch 0: 0%| | 0/300 [00:00, ?it/s]Switching tasks: None -> 3: \n",
"Updating the EWC 'anchor' weights.\n",
"Training Epoch 0: 100%|██████████| 300/300 [00:05<00:00, 54.25it/s, accuracy=1, ewc_loss=tensor(0.0175, grad_fn=)]\n",
"Validation Epoch 0: 100%|██████████| 75/75 [00:00<00:00, 144.31it/s, accuracy=0.969, ewc_loss=tensor(0.0182), val_loss=tensor(8.4141)]\n",
"2021-02-25:17:29:57,857 INFO [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:212] Finished Training on task 3.\n",
"2021-02-25:17:29:57,861 INFO [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:347] Will query the method for actions at each step, since it doesn't implement a `test` method.\n",
"Test: 13%|█▎ | 42/312 [00:00<00:01, 211.24it/s]Switching tasks: 3 -> None: \n",
"Updating the EWC 'anchor' weights.\n",
"Switching tasks: None -> None: \n",
"Updating the EWC 'anchor' weights.\n",
"Switching tasks: None -> None: \n",
"Updating the EWC 'anchor' weights.\n",
"Switching tasks: None -> None: \n",
"Updating the EWC 'anchor' weights.\n",
"Switching tasks: None -> None: \n",
"Updating the EWC 'anchor' weights.\n",
"Test: 100%|██████████| 312/312 [00:01<00:00, 231.53it/s]\n",
"2021-02-25:17:29:59,316 INFO [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:217] Resulting objective of Test Loop: 0.917368\n",
"2021-02-25:17:29:59,317 INFO [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:184] Starting training on task 4.\n",
"Training Epoch 0: 0%| | 0/300 [00:00, ?it/s]Switching tasks: None -> 4: \n",
"Updating the EWC 'anchor' weights.\n",
"Training Epoch 0: 100%|██████████| 300/300 [00:05<00:00, 55.17it/s, accuracy=1, ewc_loss=tensor(0.0487, grad_fn=)]\n",
"Validation Epoch 0: 100%|██████████| 75/75 [00:00<00:00, 147.18it/s, accuracy=0.938, ewc_loss=tensor(0.0635), val_loss=tensor(14.3717)]\n",
"2021-02-25:17:30:05,271 INFO [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:212] Finished Training on task 4.\n",
"2021-02-25:17:30:05,276 INFO [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:347] Will query the method for actions at each step, since it doesn't implement a `test` method.\n",
"Test: 14%|█▍ | 45/312 [00:00<00:01, 219.80it/s]Switching tasks: 4 -> None: \n",
"Updating the EWC 'anchor' weights.\n",
"Switching tasks: None -> None: \n",
"Updating the EWC 'anchor' weights.\n",
"Switching tasks: None -> None: \n",
"Updating the EWC 'anchor' weights.\n",
"Switching tasks: None -> None: \n",
"Updating the EWC 'anchor' weights.\n",
"Switching tasks: None -> None: \n",
"Updating the EWC 'anchor' weights.\n",
"Test: 100%|██████████| 312/312 [00:01<00:00, 219.23it/s]\n",
"2021-02-25:17:30:06,803 INFO [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:217] Resulting objective of Test Loop: 0.90605\n",
"2021-02-25:17:30:06,804 INFO [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:237] Finished main loop in 36.293361921000006 seconds.\n",
"2021-02-25:17:30:06,894 INFO [/home/fabrice/repos/Sequoia/sequoia/settings/assumptions/incremental.py:257] {\n",
"\t\"Task 0\": {\n",
"\t\t\"Task 0\": {\n",
"\t\t\t\"n_samples\": 1984,\n",
"\t\t\t\"accuracy\": 0.981351\n",
"\t\t},\n",
"\t\t\"Task 1\": {\n",
"\t\t\t\"n_samples\": 2016,\n",
"\t\t\t\"accuracy\": 0.752976\n",
"\t\t},\n",
"\t\t\"Task 2\": {\n",
"\t\t\t\"n_samples\": 1984,\n",
"\t\t\t\"accuracy\": 0.53125\n",
"\t\t},\n",
"\t\t\"Task 3\": {\n",
"\t\t\t\"n_samples\": 2016,\n",
"\t\t\t\"accuracy\": 0.640377\n",
"\t\t},\n",
"\t\t\"Task 4\": {\n",
"\t\t\t\"n_samples\": 1984,\n",
"\t\t\t\"accuracy\": 0.546371\n",
"\t\t}\n",
"\t},\n",
"\t\"Task 1\": {\n",
"\t\t\"Task 0\": {\n",
"\t\t\t\"n_samples\": 1984,\n",
"\t\t\t\"accuracy\": 0.927419\n",
"\t\t},\n",
"\t\t\"Task 1\": {\n",
"\t\t\t\"n_samples\": 2016,\n",
"\t\t\t\"accuracy\": 0.896825\n",
"\t\t},\n",
"\t\t\"Task 2\": {\n",
"\t\t\t\"n_samples\": 1984,\n",
"\t\t\t\"accuracy\": 0.457157\n",
"\t\t},\n",
"\t\t\"Task 3\": {\n",
"\t\t\t\"n_samples\": 2016,\n",
"\t\t\t\"accuracy\": 0.700397\n",
"\t\t},\n",
"\t\t\"Task 4\": {\n",
"\t\t\t\"n_samples\": 1984,\n",
"\t\t\t\"accuracy\": 0.741935\n",
"\t\t}\n",
"\t},\n",
"\t\"Task 2\": {\n",
"\t\t\"Task 0\": {\n",
"\t\t\t\"n_samples\": 1984,\n",
"\t\t\t\"accuracy\": 0.970766\n",
"\t\t},\n",
"\t\t\"Task 1\": {\n",
"\t\t\t\"n_samples\": 2016,\n",
"\t\t\t\"accuracy\": 0.780258\n",
"\t\t},\n",
"\t\t\"Task 2\": {\n",
"\t\t\t\"n_samples\": 1984,\n",
"\t\t\t\"accuracy\": 0.94254\n",
"\t\t},\n",
"\t\t\"Task 3\": {\n",
"\t\t\t\"n_samples\": 2016,\n",
"\t\t\t\"accuracy\": 0.990079\n",
"\t\t},\n",
"\t\t\"Task 4\": {\n",
"\t\t\t\"n_samples\": 1984,\n",
"\t\t\t\"accuracy\": 0.895665\n",
"\t\t}\n",
"\t},\n",
"\t\"Task 3\": {\n",
"\t\t\"Task 0\": {\n",
"\t\t\t\"n_samples\": 1984,\n",
"\t\t\t\"accuracy\": 0.972278\n",
"\t\t},\n",
"\t\t\"Task 1\": {\n",
"\t\t\t\"n_samples\": 2016,\n",
"\t\t\t\"accuracy\": 0.770833\n",
"\t\t},\n",
"\t\t\"Task 2\": {\n",
"\t\t\t\"n_samples\": 1984,\n",
"\t\t\t\"accuracy\": 0.939516\n",
"\t\t},\n",
"\t\t\"Task 3\": {\n",
"\t\t\t\"n_samples\": 2016,\n",
"\t\t\t\"accuracy\": 0.990575\n",
"\t\t},\n",
"\t\t\"Task 4\": {\n",
"\t\t\t\"n_samples\": 1984,\n",
"\t\t\t\"accuracy\": 0.914819\n",
"\t\t}\n",
"\t},\n",
"\t\"Task 4\": {\n",
"\t\t\"Task 0\": {\n",
"\t\t\t\"n_samples\": 1984,\n",
"\t\t\t\"accuracy\": 0.970766\n",
"\t\t},\n",
"\t\t\"Task 1\": {\n",
"\t\t\t\"n_samples\": 2016,\n",
"\t\t\t\"accuracy\": 0.708333\n",
"\t\t},\n",
"\t\t\"Task 2\": {\n",
"\t\t\t\"n_samples\": 1984,\n",
"\t\t\t\"accuracy\": 0.88004\n",
"\t\t},\n",
"\t\t\"Task 3\": {\n",
"\t\t\t\"n_samples\": 2016,\n",
"\t\t\t\"accuracy\": 0.989583\n",
"\t\t},\n",
"\t\t\"Task 4\": {\n",
"\t\t\t\"n_samples\": 1984,\n",
"\t\t\t\"accuracy\": 0.983367\n",
"\t\t}\n",
"\t},\n",
"\t\"Final/Average Online Performance\": 0,\n",
"\t\"Final/Average Final Performance\": 0.90605,\n",
"\t\"Final/Runtime (seconds)\": 36.293361921000006,\n",
"\t\"Final/CL Score\": 0.74363\n",
"}\n",
"\n",
"2021-02-25:17:30:06,997 INFO [/home/fabrice/repos/Sequoia/sequoia/settings/passive/cl/class_incremental_setting.py:395] {\n",
"\t\"Task 0\": {\n",
"\t\t\"Task 0\": {\n",
"\t\t\t\"n_samples\": 1984,\n",
"\t\t\t\"accuracy\": 0.981351\n",
"\t\t},\n",
"\t\t\"Task 1\": {\n",
"\t\t\t\"n_samples\": 2016,\n",
"\t\t\t\"accuracy\": 0.752976\n",
"\t\t},\n",
"\t\t\"Task 2\": {\n",
"\t\t\t\"n_samples\": 1984,\n",
"\t\t\t\"accuracy\": 0.53125\n",
"\t\t},\n",
"\t\t\"Task 3\": {\n",
"\t\t\t\"n_samples\": 2016,\n",
"\t\t\t\"accuracy\": 0.640377\n",
"\t\t},\n",
"\t\t\"Task 4\": {\n",
"\t\t\t\"n_samples\": 1984,\n",
"\t\t\t\"accuracy\": 0.546371\n",
"\t\t}\n",
"\t},\n",
"\t\"Task 1\": {\n",
"\t\t\"Task 0\": {\n",
"\t\t\t\"n_samples\": 1984,\n",
"\t\t\t\"accuracy\": 0.927419\n",
"\t\t},\n",
"\t\t\"Task 1\": {\n",
"\t\t\t\"n_samples\": 2016,\n",
"\t\t\t\"accuracy\": 0.896825\n",
"\t\t},\n",
"\t\t\"Task 2\": {\n",
"\t\t\t\"n_samples\": 1984,\n",
"\t\t\t\"accuracy\": 0.457157\n",
"\t\t},\n",
"\t\t\"Task 3\": {\n",
"\t\t\t\"n_samples\": 2016,\n",
"\t\t\t\"accuracy\": 0.700397\n",
"\t\t},\n",
"\t\t\"Task 4\": {\n",
"\t\t\t\"n_samples\": 1984,\n",
"\t\t\t\"accuracy\": 0.741935\n",
"\t\t}\n",
"\t},\n",
"\t\"Task 2\": {\n",
"\t\t\"Task 0\": {\n",
"\t\t\t\"n_samples\": 1984,\n",
"\t\t\t\"accuracy\": 0.970766\n",
"\t\t},\n",
"\t\t\"Task 1\": {\n",
"\t\t\t\"n_samples\": 2016,\n",
"\t\t\t\"accuracy\": 0.780258\n",
"\t\t},\n",
"\t\t\"Task 2\": {\n",
"\t\t\t\"n_samples\": 1984,\n",
"\t\t\t\"accuracy\": 0.94254\n",
"\t\t},\n",
"\t\t\"Task 3\": {\n",
"\t\t\t\"n_samples\": 2016,\n",
"\t\t\t\"accuracy\": 0.990079\n",
"\t\t},\n",
"\t\t\"Task 4\": {\n",
"\t\t\t\"n_samples\": 1984,\n",
"\t\t\t\"accuracy\": 0.895665\n",
"\t\t}\n",
"\t},\n",
"\t\"Task 3\": {\n",
"\t\t\"Task 0\": {\n",
"\t\t\t\"n_samples\": 1984,\n",
"\t\t\t\"accuracy\": 0.972278\n",
"\t\t},\n",
"\t\t\"Task 1\": {\n",
"\t\t\t\"n_samples\": 2016,\n",
"\t\t\t\"accuracy\": 0.770833\n",
"\t\t},\n",
"\t\t\"Task 2\": {\n",
"\t\t\t\"n_samples\": 1984,\n",
"\t\t\t\"accuracy\": 0.939516\n",
"\t\t},\n",
"\t\t\"Task 3\": {\n",
"\t\t\t\"n_samples\": 2016,\n",
"\t\t\t\"accuracy\": 0.990575\n",
"\t\t},\n",
"\t\t\"Task 4\": {\n",
"\t\t\t\"n_samples\": 1984,\n",
"\t\t\t\"accuracy\": 0.914819\n",
"\t\t}\n",
"\t},\n",
"\t\"Task 4\": {\n",
"\t\t\"Task 0\": {\n",
"\t\t\t\"n_samples\": 1984,\n",
"\t\t\t\"accuracy\": 0.970766\n",
"\t\t},\n",
"\t\t\"Task 1\": {\n",
"\t\t\t\"n_samples\": 2016,\n",
"\t\t\t\"accuracy\": 0.708333\n",
"\t\t},\n",
"\t\t\"Task 2\": {\n",
"\t\t\t\"n_samples\": 1984,\n",
"\t\t\t\"accuracy\": 0.88004\n",
"\t\t},\n",
"\t\t\"Task 3\": {\n",
"\t\t\t\"n_samples\": 2016,\n",
"\t\t\t\"accuracy\": 0.989583\n",
"\t\t},\n",
"\t\t\"Task 4\": {\n",
"\t\t\t\"n_samples\": 1984,\n",
"\t\t\t\"accuracy\": 0.983367\n",
"\t\t}\n",
"\t},\n",
"\t\"Final/Average Online Performance\": 0,\n",
"\t\"Final/Average Final Performance\": 0.90605,\n",
"\t\"Final/Runtime (seconds)\": 36.293361921000006,\n",
"\t\"Final/CL Score\": 0.74363\n",
"}\n",
"\n"
]
}
],
"source": [
"improved_method = ImprovedDemoMethod(hparams=ImprovedDemoMethod.HParams())\n",
"setting = DomainIncrementalSetting(dataset=\"fashionmnist\")\n",
"improved_results = setting.apply(improved_method)"
]
},
{
"source": [
"## Improved Results"
],
"cell_type": "code",
"metadata": {},
"execution_count": 10,
"outputs": []
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"{\n\t\"Task 0\": {\n\t\t\"Task 0\": {\n\t\t\t\"n_samples\": 1984,\n\t\t\t\"accuracy\": 0.981351\n\t\t},\n\t\t\"Task 1\": {\n\t\t\t\"n_samples\": 2016,\n\t\t\t\"accuracy\": 0.752976\n\t\t},\n\t\t\"Task 2\": {\n\t\t\t\"n_samples\": 1984,\n\t\t\t\"accuracy\": 0.53125\n\t\t},\n\t\t\"Task 3\": {\n\t\t\t\"n_samples\": 2016,\n\t\t\t\"accuracy\": 0.640377\n\t\t},\n\t\t\"Task 4\": {\n\t\t\t\"n_samples\": 1984,\n\t\t\t\"accuracy\": 0.546371\n\t\t}\n\t},\n\t\"Task 1\": {\n\t\t\"Task 0\": {\n\t\t\t\"n_samples\": 1984,\n\t\t\t\"accuracy\": 0.927419\n\t\t},\n\t\t\"Task 1\": {\n\t\t\t\"n_samples\": 2016,\n\t\t\t\"accuracy\": 0.896825\n\t\t},\n\t\t\"Task 2\": {\n\t\t\t\"n_samples\": 1984,\n\t\t\t\"accuracy\": 0.457157\n\t\t},\n\t\t\"Task 3\": {\n\t\t\t\"n_samples\": 2016,\n\t\t\t\"accuracy\": 0.700397\n\t\t},\n\t\t\"Task 4\": {\n\t\t\t\"n_samples\": 1984,\n\t\t\t\"accuracy\": 0.741935\n\t\t}\n\t},\n\t\"Task 2\": {\n\t\t\"Task 0\": {\n\t\t\t\"n_samples\": 1984,\n\t\t\t\"accuracy\": 0.970766\n\t\t},\n\t\t\"Task 1\": {\n\t\t\t\"n_samples\": 2016,\n\t\t\t\"accuracy\": 0.780258\n\t\t},\n\t\t\"Task 2\": {\n\t\t\t\"n_samples\": 1984,\n\t\t\t\"accuracy\": 0.94254\n\t\t},\n\t\t\"Task 3\": {\n\t\t\t\"n_samples\": 2016,\n\t\t\t\"accuracy\": 0.990079\n\t\t},\n\t\t\"Task 4\": {\n\t\t\t\"n_samples\": 1984,\n\t\t\t\"accuracy\": 0.895665\n\t\t}\n\t},\n\t\"Task 3\": {\n\t\t\"Task 0\": {\n\t\t\t\"n_samples\": 1984,\n\t\t\t\"accuracy\": 0.972278\n\t\t},\n\t\t\"Task 1\": {\n\t\t\t\"n_samples\": 2016,\n\t\t\t\"accuracy\": 0.770833\n\t\t},\n\t\t\"Task 2\": {\n\t\t\t\"n_samples\": 1984,\n\t\t\t\"accuracy\": 0.939516\n\t\t},\n\t\t\"Task 3\": {\n\t\t\t\"n_samples\": 2016,\n\t\t\t\"accuracy\": 0.990575\n\t\t},\n\t\t\"Task 4\": {\n\t\t\t\"n_samples\": 1984,\n\t\t\t\"accuracy\": 0.914819\n\t\t}\n\t},\n\t\"Task 4\": {\n\t\t\"Task 0\": {\n\t\t\t\"n_samples\": 1984,\n\t\t\t\"accuracy\": 0.970766\n\t\t},\n\t\t\"Task 1\": {\n\t\t\t\"n_samples\": 2016,\n\t\t\t\"accuracy\": 0.708333\n\t\t},\n\t\t\"Task 2\": {\n\t\t\t\"n_samples\": 1984,\n\t\t\t\"accuracy\": 0.88004\n\t\t},\n\t\t\"Task 3\": {\n\t\t\t\"n_samples\": 2016,\n\t\t\t\"accuracy\": 0.989583\n\t\t},\n\t\t\"Task 4\": {\n\t\t\t\"n_samples\": 1984,\n\t\t\t\"accuracy\": 0.983367\n\t\t}\n\t},\n\t\"Final/Average Online Performance\": 0,\n\t\"Final/Average Final Performance\": 0.90605,\n\t\"Final/Runtime (seconds)\": 36.293361921000006,\n\t\"Final/CL Score\": 0.74363\n}\n\n"
]
}
],
"source": [
"print(improved_results.summary())"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"{'task_metrics': }"
]
},
"metadata": {},
"execution_count": 12
},
{
"output_type": "display_data",
"data": {
"text/plain": "",
"image/svg+xml": "\n\n\n\n",
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAYIAAAEWCAYAAABrDZDcAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAAAcv0lEQVR4nO3de7xUdb3/8dd76ybBS0RCyUUxDyoXE3GHpNmxLNJtiYimmFodf2IXTEXzaL/0qGEXO4QHo6NmHryDphUZikSURxJ1k4ggoWgkFwskhGRUbp/zx1rosNmX2ciaYe/1fj4e83DWmu+s9VkI857v97vWGkUEZmaWX1WVLsDMzCrLQWBmlnMOAjOznHMQmJnlnIPAzCznHARmZjnnILDckTRB0uhK12G2s3AQ2E5P0utFj82S3iha/kKZapggaaOkfcqxP7NychDYTi8i9tjyAF4GPle07q6s9y9pd2AYsAY4M+v91dv3ruXcn+WTg8BaLUkDJT0u6TVJr0j6saR26WuSNFbSCklrJT0rqV8D29hT0gxJ4ySpkV0NA14DrgG+WO/9nST9j6TlklZL+mXRa0MkzUn3/6Kk49L1iyV9qqjdVZLuTJ/3lBSSzpH0MvC7dP19kv4maY2kRyX1LXp/e0ljJP01ff2xdN1vJJ1fr965koa24I/ZcsBBYK3ZJuAiYG/go8CxwNfS1wYDHwcOBN4LfB5YVfxmSe8HpgMzI+Ib0fj9Vr4I3ANMBA6WdHjRa3cAHYC+QBdgbLrtgcDtwDeBjmkti1twbP8K9AY+ky4/BPRK9/EnoLgn9J/A4cCRQCfgUmAzcBtFPRhJhwLdgN+0oA7LAQeBtVoRMTsiZkXExohYDNxE8gEKsAHYEzgYUEQsiIhXit7eFfgDcF9EfLuxfUjaF/gEcHdE/J0kOM5OX9sHOB74SkSsjogNEfGH9K3nALdGxLSI2BwRyyLizy04vKsiYl1EvJEe660R8c+IeAu4CjhU0nslVQH/BlyQ7mNTRPwxbTcZOFBSr3SbZwGTImJ9C+pA0gWS5kmaL+nCdN2haW/sWUm/lrRXuv6otNdRt2W/kjpKeiSt1XZC/h9jrZakAyU9mA6ZrAW+S9I7ICJ+B/wYGA+skHTzlg+r1AlAe+DGZnZzFrAgIuaky3cBZ0iqBnoA/4iI1Q28rwfw4nYeGsCSLU8k7SLp++nw0lre6VnsnT52a2hfEfEmMAk4M/0QHk7SgylZOpx2LjAQOBT4rKR/AW4BLouIQ4BfkPR8AC4GaoELga+k674NfDciNrdk31Y+DgJrzf4b+DPQKyL2Ar4FvD3OHxHjIuJwoA/JENE3i977U+BhYEo6GdyYs4EPpWHzN+BHJB++tSQf1p0kdWzgfUuAAxrZ5jqS4aQtPthAm+JhqjOAIcCnSIa5eqbrBbwKvNnEvm4DvkAybFaIiMcbadeY3sATEVGIiI0kvaiTSf48H03bTCOZR4GkJ9YhfWyQdADQIyJ+38L97jQa6RH1lzQrnQOqS4cCkTQsbfe/6dAjkg6QNKmCh9AsB4G1ZnsCa4HXJR0MfHXLC5I+IumI9Jv7OpIPy/rfSEcCC4FfS2pff+OSPkryATsQ6J8++gF3A2enQ00PAT+R9D5J1ZI+nr79Z8CXJR0rqUpSt7RGgDnA6Wn7GuCUEo7zLZI5jg4kPR8A0m/ZtwI/ktQ17T18VNJ70tcfT497DC3sDaTmAUdLer+kDiQB2AOYTxJOAKem6wC+RzI3cjlJj+xakh5Bq9REj+g64OqI6A9cmS4DnA98hGSY8ox03Wh28j8DB4G1ZpeQ/GP7J8k3/OJvXXul61YDfyX5EP1h8ZvTyeERwFLgV5J2q7f9LwK/iohnI+JvWx7Af5F8IHQiGTraQNIzWUEyJEJEPAl8mWTyeA3JN+n90u1eQRIwq4GrSYKlKbenx7AMeA6Y1cCfw7PAU8A/gB+w9b/t24FDgDub2c82ImJBur1HSHpQc0gm6f8N+Jqk2SRBtT5tPyciBkXEJ4APAa+QnMQ1SdKdkj7Q0hoqrLEeUZD8HYOkl7Y8fb4ZeA/v9IiOBv4WES+Ut+wWigg/SnwAF5B8Q5oPXJium0Tyj2MOydjtnHT9UcBcoI5k6AKSs0ceAaoqfSx+5OdBMrz12A7a1neBr9VbdyDwZL11Sv+udyKZV9mPZCL/2kr/ebTweHsDzwPvJ/lwfxy4IV3/MskQ4DJgv7T9p4HZwK9JAuIRoFOlj6O5hy9WKVG9LuJ64GFJD0bEaUVtxpB8+4N3Js16kkyaXYwnzazM0uGcrwE/eRfb6BIRK9IzqE4GBhWtqyL5e11/0v1sYEpE/COtYXP66EArEhELJG3pEa3jnR7RV4GLIuJ+SZ8nGQr8VERMI5kzQdLZwBSSM7cuIekBXhARhfIfSdMyGxqSdKuSi3nmNfK6lFzEsyg93WxAVrXsII11EYHkeEjOVb8nXdXmJs2sdZH0GWAl8HeaH35qyv2SniP5lvv1iHgNGC7peZIhseXA/xTttwPwJZIztiCZYJ8CXE/zZ2ntdCLiZxFxeER8nOTD/HmSYcMH0ib3kXxBfFu9P4Or0/aPkUzc73wy7FJ9HBgAzGvk9VqSiTYBg0g+ZCveRWppF7He8dYVLfcnGcudAXQnuRipV6WPww8//GjZA+iS/ndfkuDrCCwAjknXHwvMrvee/wBOSp8/mn5mnEXSI6j4MdV/ZDY0FBGPSurZRJMhwO2R/EnNSi862Se2vuhnpxGNdxG3GM47vQEiOe98EEB6Jsnbk2YkvYWLI7lAycx2bvenp4JuIO0RSToX+C8l94J6k+SkAwAkdQUGRsTV6aobSCbyXwNOKmfhpVKaWNlsPAmCByOioXu8PAh8PyIeS5enA/8eEXUNtB1B+ge9++67H37wwQfXb1J2y5Yto7q6mi5duhARzJ07l969e9OuXbut2kUEL7zwAh/60IdYsmQJXbt2Zf369axdu5Zu3bpVqHozy5vZs2e/GhGdG3qtVUwWR8TNwM0ANTU1UVe3TVaUxYoVK+jSpQsvv/wygwcPZtasWXTs2JGHH36Y733ve/zhD3/Y5j233XYbq1ev5sILL2To0KGMGzeOxYsX88ADDzB27NgKHIWZ5ZGkvzb2WiWDYBnvXIQCyTj6sgrVUpJhw4axatUqqqurGT9+PB07dgRg4sSJDB8+fJv2hUKBCRMm8MgjjwAwatQoamtradeuHXff/W7m7szMdpxKDg2dQHJlZy1wBDAuIgbWb1dfJXsEZq1Vz8vaxg1HF3//hEqX0GpJmh0RNQ29llmPQNI9wDHA3pKWksyiVwNExI0kp5PVAouAAslVmGZmO1RbCUHILgizPGto27GSrV8P4OtZ7d/MzErjew2ZmeVcqzhraEdxF9HMbFvuEZiZ5ZyDwMws5xwEZmY55yAwM8s5B4GZWc45CMzMcs5BYFaCsWPH0rdvX/r168fw4cN58803mT59OgMGDKB///587GMfY9GiRQDccMMN9OvXj9raWtavXw/AY489xkUXXVTJQzBrlIPArBnLli1j3Lhx1NXVMW/ePDZt2sTEiRP56le/yl133cWcOXM444wzGD16NAB33XUXc+fO5cgjj2Tq1KlEBN/5zne44oorKnwkZg1zEJiVYOPGjbzxxhts3LiRQqFA165dkcTatWsBWLNmDV27dgWS36DYsGEDhUKB6upq7rzzTo4//ng6depUyUMwa1Suriw22x7dunXjkksuYd9996V9+/YMHjyYwYMHc8stt1BbW0v79u3Za6+9mDVrFgAjR45k0KBB9O3bl6OOOoohQ4YwderUCh+FWePcIzBrxurVq/nVr37FX/7yF5YvX866deu48847GTt2LFOmTGHp0qV8+ctfZtSoUQCcddZZPP3002+3+cY3vsFDDz3EKaecwkUXXcTmzZsrfERmW3MQmDXjt7/9Lfvvvz+dO3emurqak08+mZkzZ/LMM89wxBFHAHDaaafxxz/+cav3LV++nCeffJKTTjqJMWPGMGnSJDp27Mj06dMrcRhmjXIQmDVj3333ZdasWRQKBSKC6dOn06dPH9asWcPzzz8PwLRp0+jdu/dW77viiiu45pprAHjjjTeQRFVVFYVCoezHYNYUzxGYNeOII47glFNOYcCAAey6664cdthhjBgxgu7duzNs2DCqqqp43/vex6233vr2e55++mkABgwYAMAZZ5zBIYccQo8ePbj00ksrchxmjcn0pyqz8G5+qtK3oba8ait/97fn731bOXZ4d//um/qpSg8NmZnlnIPAzCznHARmZjnnyWLLBY8TmzXOPQIzs5xzEJiZ5ZyDwMws5xwEZmY55yAwM8s5B4GZWc45CMzMcs5BYGaWcw4CM7OccxCYmeWcg8DMLOccBGZmOecgMDPLOQeBmVnOZRoEko6TtFDSIkmXNfD6vpJmSHpa0lxJtVnWY2Zm28osCCTtAowHjgf6AMMl9anX7NvAvRFxGHA68JOs6jEzs4Zl2SMYCCyKiJciYj0wERhSr00Ae6XP3wssz7AeMzNrQJZB0A1YUrS8NF1X7CrgTElLgSnA+Q1tSNIISXWS6lauXJlFrWZmuVXpyeLhwISI6A7UAndI2qamiLg5ImoioqZz585lL9LMrC3LMgiWAT2Klrun64qdA9wLEBGPA7sBe2dYk5mZ1ZNlEDwF9JK0v6R2JJPBk+u1eRk4FkBSb5Ig8NiPmVkZZRYEEbERGAlMBRaQnB00X9I1kk5Mm10MnCvpGeAe4EsREVnVZGZm29o1y41HxBSSSeDidVcWPX8OOCrLGszMrGmVniw2M7MKcxCYmeWcg8BKsnDhQvr37//2Y6+99uL666/nvvvuo2/fvlRVVVFXV/d2+5kzZ/LhD3+YmpoaXnjhBQBee+01Bg8ezObNmyt1GGbWgEznCKztOOigg5gzZw4AmzZtolu3bgwdOpRCocADDzzAeeedt1X7MWPGMGXKFBYvXsyNN97ImDFjGD16NN/61reoqvL3D7OdiYPAWmz69OkccMAB7Lfffo22qa6uplAoUCgUqK6u5sUXX2TJkiUcc8wx5SvUzEriILAWmzhxIsOHD2+yzeWXX87ZZ59N+/btueOOO7jkkksYPXp0mSo0s5ZwH91aZP369UyePJlTTz21yXb9+/dn1qxZzJgxg5deeol99tmHiOC0007jzDPP5O9//3uZKjaz5rhHYC3y0EMPMWDAAD7wgQ+U1D4iGD16NBMnTuT888/nuuuuY/HixYwbN45rr70242rNrBTuEViL3HPPPc0OCxW7/fbbqa2tpVOnThQKBaqqqqiqqqJQKGRYpZm1hHsEVrJ169Yxbdo0brrpprfX/eIXv+D8889n5cqVnHDCCfTv35+pU6cCUCgUmDBhAo888ggAo0aNora2lnbt2nH33XdX5BjMbFsOAivZ7rvvzqpVq7ZaN3ToUIYOHdpg+w4dOjBjxoy3l48++mieffbZTGs0s5bz0JCZWc45CMzMcs5BYGaWc54jyJGel/2m0iXsEIu/f0KlSzBrU9wjMDPLOQeBmVnOOQjMzHLOQWBmlnMOAjOznHMQmJnlnIPAzCznHARmZjnnIDAzyzkHgZlZzjkIzMxyzkFgZpZzDgIzs5xzEJiZ5ZyDwMws5xwEZmY55yAwM8s5B4GZWc45CMzMci7TIJB0nKSFkhZJuqyRNp+X9Jyk+ZLuzrIeMzPbVmY/Xi9pF2A88GlgKfCUpMkR8VxRm17A5cBREbFaUpes6jEzs4Zl2SMYCCyKiJciYj0wERhSr825wPiIWA0QESsyrMfMzBqQZRB0A5YULS9N1xU7EDhQ0kxJsyQd19CGJI2QVCepbuXKlRmVa2aWT5WeLN4V6AUcAwwHfiqpY/1GEXFzRNRERE3nzp3LW6GZWRvXbBBI+pyk7QmMZUCPouXu6bpiS4HJEbEhIv4CPE8SDGZmVialfMCfBrwg6TpJB7dg208BvSTtL6kdcDowuV6bX5L0BpC0N8lQ0Ust2IeZmb1LzQZBRJwJHAa8CEyQ9Hg6Zr9nM+/bCIwEpgILgHsjYr6kaySdmDabCqyS9BwwA/hmRKx6F8djZmYtVNLpoxGxVtLPgfbAhcBQ4JuSxkXEDU28bwowpd66K4ueBzAqfZiZWQWUMkdwoqRfAL8HqoGBEXE8cChwcbblmZlZ1krpEQwDxkbEo8UrI6Ig6ZxsyjIzs3IpJQiuAl7ZsiCpPfCBiFgcEdOzKszMzMqjlLOG7gM2Fy1vSteZmVkbUEoQ7JreIgKA9Hm77EoyM7NyKiUIVhad7omkIcCr2ZVkZmblVMocwVeAuyT9GBDJ/YPOzrQqMzMrm2aDICJeBAZJ2iNdfj3zqszMrGxKuqBM0glAX2A3SQBExDUZ1mVmZmVSygVlN5Lcb+h8kqGhU4H9Mq7LzMzKpJTJ4iMj4mxgdURcDXyU5OZwZmbWBpQSBG+m/y1I6gpsAPbJriQzMyunUuYIfp3+WMwPgT8BAfw0y6LMzKx8mgyC9AdppkfEa8D9kh4EdouINeUozszMstfk0FBEbAbGFy2/5RAwM2tbSpkjmC5pmLacN2pmZm1KKUFwHslN5t6StFbSPyWtzbguMzMrk1KuLG7yJynNzKx1azYIJH28ofX1f6jGzMxap1JOH/1m0fPdgIHAbOCTmVRkZmZlVcrQ0OeKlyX1AK7PqiAzMyuvUiaL61sK9N7RhZiZWWWUMkdwA8nVxJAER3+SK4zNzKwNKGWOoK7o+UbgnoiYmVE9ZmZWZqUEwc+BNyNiE4CkXSR1iIhCtqWZmVk5lHRlMdC+aLk98NtsyjEzs3IrJQh2K/55yvR5h+xKMjOzciolCNZJGrBlQdLhwBvZlWRmZuVUyhzBhcB9kpaT/FTlB0l+utLMzNqAUi4oe0rSwcBB6aqFEbEh27LMzKxcSvnx+q8Du0fEvIiYB+wh6WvZl2ZmZuVQyhzBuekvlAEQEauBczOryMzMyqqUINil+EdpJO0CtMuuJDMzK6dSJosfBiZJuildPg94KLuSzMysnEoJgn8HRgBfSZfnkpw5ZGZmbUCzQ0PpD9g/ASwm+S2CTwILStm4pOMkLZS0SNJlTbQbJikk1ZRWtpmZ7SiN9ggkHQgMTx+vApMAIuITpWw4nUsYD3ya5NbVT0maHBHP1Wu3J3ABSdiYmVmZNdUj+DPJt//PRsTHIuIGYFMLtj0QWBQRL0XEemAiMKSBdt8BfgC82YJtm5nZDtJUEJwMvALMkPRTSceSXFlcqm7AkqLlpem6t6W3rugREb9pakOSRkiqk1S3cuXKFpRgZmbNaTQIIuKXEXE6cDAwg+RWE10k/bekwe92x5KqgB8BFzfXNiJujoiaiKjp3Lnzu921mZkVKWWyeF1E3J3+dnF34GmSM4maswzoUbTcPV23xZ5AP+D3khYDg4DJnjA2MyuvFv1mcUSsTr+dH1tC86eAXpL2l9QOOB2YXLStNRGxd0T0jIiewCzgxIioa3hzZmaWhe358fqSRMRGYCQwleR003sjYr6kaySdmNV+zcysZUq5oGy7RcQUYEq9dVc20vaYLGsxM7OGZdYjMDOz1sFBYGaWcw4CM7OccxCYmeWcg8DMLOccBGZmOecgMDPLOQeBmVnOOQjMzHLOQWBmlnMOAjOznHMQmJnlnIPAzCznHARmZjnnIDAzyzkHgZlZzjkIzMxyzkFgZpZzDgIzs5xzEJiZ5ZyDwMws5xwEZmY55yAwM8s5B4GZWc45CMzMcs5BYGaWcw4CM7OccxCYmeWcg8DMLOccBGZmOecgMDPLOQeBmVnOOQjMzHLOQWBmlnOZBoGk4yQtlLRI0mUNvD5K0nOS5kqaLmm/LOsxM7NtZRYEknYBxgPHA32A4ZL61Gv2NFATER8Gfg5cl1U9ZmbWsCx7BAOBRRHxUkSsByYCQ4obRMSMiCiki7OA7hnWY2ZmDcgyCLoBS4qWl6brGnMO8FBDL0gaIalOUt3KlSt3YIlmZrZTTBZLOhOoAX7Y0OsRcXNE1ERETefOnctbnJlZG7drhtteBvQoWu6ertuKpE8B/x/414h4K8N6zMysAVn2CJ4CeknaX1I74HRgcnEDSYcBNwEnRsSKDGsxM7NGZBYEEbERGAlMBRYA90bEfEnXSDoxbfZDYA/gPklzJE1uZHNmZpaRLIeGiIgpwJR6664sev6pLPdvZmbN2ykmi83MrHIcBGZmOecgMDPLOQeBmVnOOQjMzHLOQWBmlnMOAjOznHMQmJnlnIPAzCznHARmZjnnIDAzyzkHgZlZzjkIzMxyzkFgZpZzDgIzs5xzEJiZ5ZyDwMws5xwEZmY55yAwM8s5B4GZWc45CMzMcs5BYGaWcw4CM7OccxCYmeWcg8DMLOccBGZmOecgMDPLOQeBmVnOOQjMzHLOQWBmlnMOAjOznHMQmJnlnIPAzCznHARmZjnnIDAzy7lMg0DScZIWSlok6bIGXn+PpEnp609I6pllPWZmtq3MgkDSLsB44HigDzBcUp96zc4BVkfEvwBjgR9kVY+ZmTUsyx7BQGBRRLwUEeuBicCQem2GALelz38OHCtJGdZkZmb1KCKy2bB0CnBcRPy/dPks4IiIGFnUZl7aZmm6/GLa5tV62xoBjEgXDwIWZlL0jrM38GqzrdomH3t+5fn4W8Ox7xcRnRt6YddyV7I9IuJm4OZK11EqSXURUVPpOirBx57PY4d8H39rP/Ysh4aWAT2Klrun6xpsI2lX4L3AqgxrMjOzerIMgqeAXpL2l9QOOB2YXK/NZOCL6fNTgN9FVmNVZmbWoMyGhiJio6SRwFRgF+DWiJgv6RqgLiImAz8D7pC0CPgHSVi0Ba1mGCsDPvb8yvPxt+pjz2yy2MzMWgdfWWxmlnMOAjOznHMQ7EDN3VKjLZN0q6QV6bUhuSKph6QZkp6TNF/SBZWuqVwk7SbpSUnPpMd+daVrqgRJu0h6WtKDla5lezgIdpASb6nRlk0Ajqt0ERWyEbg4IvoAg4Cv5+j//VvAJyPiUKA/cJykQZUtqSIuABZUuojt5SDYcUq5pUabFRGPkpz5lTsR8UpE/Cl9/k+SD4Rula2qPCLxerpYnT5ydQaKpO7ACcAtla5lezkIdpxuwJKi5aXk5MPA3pHeQfcw4IkKl1I26bDIHGAFMC0icnPsqeuBS4HNFa5juzkIzHYQSXsA9wMXRsTaStdTLhGxKSL6k9w9YKCkfhUuqWwkfRZYERGzK13Lu+Eg2HFKuaWGtVGSqklC4K6IeKDS9VRCRLwGzCBfc0VHASdKWkwyHPxJSXdWtqSWcxDsOKXcUsPaoPTW6T8DFkTEjypdTzlJ6iypY/q8PfBp4M8VLaqMIuLyiOgeET1J/s3/LiLOrHBZLeYg2EEiYiOw5ZYaC4B7I2J+ZasqH0n3AI8DB0laKumcStdURkcBZ5F8G5yTPmorXVSZ7APMkDSX5MvQtIholadQ5plvMWFmlnPuEZiZ5ZyDwMws5xwEZmY55yAwM8s5B4GZWc61ih+vN6skSe8HpqeLHwQ2ASvT5YHpvaWaev+XgJqIGJlZkWbvgoPArBkRsYrkzppIugp4PSL+s5I1me1IHhoy2w6SzpX0VHof/vsldUjXnyppXrr+0Qbed4KkxyXtXf6qzRrmIDDbPg9ExEfS+/AvALZcSX0l8Jl0/YnFb5A0FLgMqI2IV8tarVkTPDRktn36SRoNdAT2ILm1CMBMYIKke4Him899EqgBBufpzqTWOrhHYLZ9JgAjI+IQ4GpgN4CI+ArwbZI70c5OJ5oBXgT2BA4sf6lmTXMQmG2fPYFX0ttPf2HLSkkHRMQTEXElyZlFW25N/ldgGHC7pL5lr9asCQ4Cs+1zBcmvkM1k69su/1DSs5LmAX8EntnyQkT8mSQ07pN0QDmLNWuK7z5qZpZz7hGYmeWcg8DMLOccBGZmOecgMDPLOQeBmVnOOQjMzHLOQWBmlnP/B0iPrwaXcQuCAAAAAElFTkSuQmCC\n"
},
"metadata": {
"needs_background": "light"
}
}
],
"source": [
"improved_results.make_plots()"
]
},
{
"source": [
"## Final Results\n"
],
"cell_type": "markdown",
"metadata": {}
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"{'task_metrics': }"
]
},
"metadata": {},
"execution_count": 13
},
{
"output_type": "display_data",
"data": {
"text/plain": "",
"image/svg+xml": "\n\n\n\n",
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAYIAAAEWCAYAAABrDZDcAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAAAdO0lEQVR4nO3de7wVdd328c+1YXPLVu+QQCXQSINQCXe6RTuY3CpEaHqTGeKBDj7QCStPBSqmhlooeaRb8cmbNExNyVBRKNuJ8oiAhoqSCUaCmghBHrah6Pf5YwZcbPZhbWDWYu+53q/Xejnzm9+a9Z3lZq41v5k1SxGBmZnlV0W5CzAzs/JyEJiZ5ZyDwMws5xwEZmY55yAwM8s5B4GZWc45CCx3JE2RNL7cdZhtLxwEtt2T9EbB4z1JbxXMn1SiGqZIWi+pWylez6yUHAS23YuInTY8gBeALxS0Tc369SXtCBwH/As4OevXq/fa7Uv5epZPDgJrtST1l/SIpLWSXpZ0raQO6TJJukLSSkmvSXpKUt8G1rGzpFpJV0tSIy91HLAWuAj4Sr3nd5b0v5JekrRG0l0Fy46VtDB9/aWSBqftyyQdWdDvAkm/Sqd7SgpJp0p6Afhj2v4bSf+Q9C9JsyXtV/D8jpImSvp7uvzhtO1eSafVq/dJSUNb8DZbDjgIrDV7Fzgd6AJ8EjgC+Ha6bBDwWaA38AHgy8DqwidL+iDwADAnIr4bjd9v5SvAr4FbgT6SDixYdjNQBewH7Apcka67P3ATcDbQKa1lWQu27TBgH+Bz6fx9QK/0NR4HCo+ELgcOBD4FdAZ+ALwH/JKCIxhJ+wPdgXtbUAeSvidpkaSnJX1/w7rSEH5K0t2S/jNt/3QaNgsk9UrbOkmaJalV7G8k3Zh+gFhU0NZZ0u8lPZf+d5e0XemHiCXpdh+Qtn9M0mNp2yfTtvaS/iCpqjxb1oSI8MOPVvMg2Zke2ciy7wO/TacPB/4KHAJU1Os3BbgRWASc3czr7UmyU61O52cCV6XT3dJluzTwvOuBK4rZBuAC4FfpdE8ggL2aqKlT2ucDJB/m3gL2b6DfDsAaoFc6fznw8xa+333T96kKaA/8AfgoMB84LO3zdeDH6fQ0oAfwGWBiwesOKPffTgu2+bPAAcCigrYJwJh0egzw03R6CElIK/1bezRt/1n6HvQA7kzbTgO+Wu7ta+jRKhLarCGSeku6Jx0yeQ24hOTogIj4I3AtMAlYKWnyhk+tqaOAjsB1zbzMKcDiiFiYzk8FTpRUCewB/DMi1jTwvD2ApVu4aQDLN0xIaifpJ+nw0mu8f2TRJX3s0NBrRcS/gduAk9NP48NJjmBaYh+SnVtdRKwHHgS+SHKkNTvt83uS4TOAd0hCowp4R9LewB4R8acWvm7ZRMRs4J/1mo8lOcIi/e9/F7TfFIm5QKf0goL670Mn4AskR4nbHQeBtWb/A/yF5BPvfwLnkHwyAyAiro6IA4F9SXZcZxc89wbgfmBGejK4MSOAvdKw+QfJJ70uJJ8ElwOd03/k9S0H9m5knW+S7CA22L2BPoXDVCeS7HCOJDkK6Jm2C1gF/LuJ1/olcBLJsFldRDzSSL/GLAIOlfTBdEhjCEnIPZ3WBHB82gZwKcnObixJEF8MnNfC19we7RYRL6fT/wB2S6e7UxDawIq0bRLJ3+MvST6gjAMuiYj3SlNuyzgIrDXbGXgNeENSH+BbGxZIOkjSwekn9zdJdpb1/xGOBp4F7pbUsf7K07HdvYH+QHX66AvcAoxIdwz3AT+XtIukSkmfTZ/+C+Brko6QVCGpe1ojwELghLR/DfClIrZzHck5jiqSHQsA6Y7lRuBnkj6UHj18UtJ/pMsfSbd7Ii0/GiAiFgM/BWaRBOdCknMzXwe+LemxtL630/4LI+KQiPgvYC/gZZKh9Nsk/UrSbg28TKsSyThPk/fvj4gXImJARHwSqCMZIlos6eb0vehdilqLVu6xKT/8aMmDgvF1krHcvwBvAA+RXNXzcLrsCODJdNkqkiGdndJlU4Dx6XQFySfYWcAO9V7rOtLx3Xrt/Ul2zJ3Txy+BV0jG46cV9Bua1vA6sAT4XNq+F/BoWtu9wNVsfo6gfcF6dgJ+l67n7yRHKQF8NF3eEbgSeJHkEtfZQMeC559HM+cdWvD+XwJ8u15bb2BevTal72nn9L3/MMkJ8IvL/TdU5Hb2ZNNzBM8C3dLpbsCz6fT1wPCG+hW03UZyov/i9D34MDC13NtY+FBaqJm1UZJGAKMi4jNb+PxdI2KlpD1Jdu6HAB3StgqSYP1TRNxY8JyvkJxEv1LSb4HvkuxcvxgRp2/dFmVPUk/gnojom85fBqyOiJ9IGgN0jogfSDqK5MhyCHAwcHVE9C9Yz2HAf0fE6ZKuIDmZviztt91cxusvq5i1Yem4/reBn2/Fau5ML7V9B/hORKxNLyn9Trp8GvC/9V7zqySX8EJyXmUGyfDRiVtRR0lI+jUwAOgiaQXwI+AnwO2STiU5Kvty2n0GSQgsIRkC+lrBekRyNDYsbZpMcnTUnoJhzO1BZkcEkm4EjgZWbkjVessFXEXyJtaRXFb1eCbFmOWQpM+R7KT/ABwXyVU/ZpvJ8mTxFGBwE8s/TzJu1gsYRXIFiJltIxExMyJ2jIhjHQLWlMyCIBq+FrdQY9ffmplZCZXzHEFj19++XL+jpFEkRw3suOOOB/bp06d+FzMza8Jjjz22KiK6NrSsVZwsjojJJCdaqKmpiQULFpS5IjNrLXqOadGtlbZry35y1BY/V9LfG1tWziB4kfe/jQjJFy5eLFMtZm1aW9kZbs2O0BpXzm8WTwdGpHfvOwT4V7z/FW4zMyuRzI4IGrkWtxIgIq6jietvzcysdDILgogY3szyAL7TVB8zM8uebzpnVoSrrrqKvn37st9++3HllVcCcPbZZ9OnTx/69evH0KFDWbt2LQBz5syhX79+1NTU8NxzzwGwdu1aBg0axHvvbZc3n7SccxCYNWPRokXccMMNzJs3jyeeeIJ77rmHJUuWMHDgQBYtWsSTTz5J7969ufTSSwGYOHEiM2bM4Morr+S665KfOxg/fjznnHMOFRX+J2fbH/9VmjVj8eLFHHzwwVRVVdG+fXsOO+wwpk2bxqBBg2jfPhldPeSQQ1ixYgUAlZWV1NXVUVdXR2VlJUuXLmX58uUMGDCgjFth1rhW8T0Cs3Lq27cv5557LqtXr6Zjx47MmDGDmpqaTfrceOONDBuW3Fts7NixjBgxgo4dO3LzzTdz1llnMX78+HKUblYUB4FZM/bZZx9++MMfMmjQIHbccUeqq6tp167dxuUXX3wx7du356STTgKgurqauXPnAjB79my6detGRDBs2DAqKyuZOHEiu+3W6n+fxdoQDw2ZFeHUU0/lscceY/bs2eyyyy707p38wNSUKVO45557mDp1KskNdd8XEYwfP55x48Zx4YUXMmHCBEaOHMnVV19djk0wa5SPCMyKsHLlSnbddVdeeOEFpk2bxty5c7n//vuZMGECDz74IFVVVZs956abbmLIkCF07tyZuro6KioqqKiooK6urgxbYNY4B4FZEY477jhWr15NZWUlkyZNolOnTowePZp169YxcOBAIDlhvOEqobq6OqZMmcKsWbMAOOOMMxgyZAgdOnTglltuKdt2mDXEQWBWhIceemiztiVLljTav6qqitra2o3zhx56KE899VQmtZltLZ8jMDPLOQeBmVnOOQjMzHLO5wgsF9rK/fjB9+S3bc9HBGZmOecgMDPLOQeBmVnOOQjMzHLOQWBmlnMOAjOznHMQmJnlnIPAzCznHARmZjnnIDAzyzkHgZlZzjkIzMxyzkFgZpZzDgIzs5xzEJiZ5ZyDwMws5xwEZmY55yAwM8s5B4GZWc45CMzMcs5BYGaWcw4CM7OcyzQIJA2W9KykJZLGNLB8T0m1kv4s6UlJQ7Ksx8zMNpdZEEhqB0wCPg/sCwyXtG+9bucBt0fEJ4ATgJ9nVY+ZmTUsyyOC/sCSiHg+It4GbgWOrdcngP9Mpz8AvJRhPWZm1oAsg6A7sLxgfkXaVugC4GRJK4AZwGkNrUjSKEkLJC149dVXs6jVzCy3yn2yeDgwJSJ6AEOAmyVtVlNETI6Imoio6dq1a8mLNDNry7IMgheBPQrme6RthU4FbgeIiEeAHYAuGdZkZmb1ZBkE84Fekj4iqQPJyeDp9fq8ABwBIGkfkiDw2I+ZWQllFgQRsR4YDcwEFpNcHfS0pIskHZN2OxMYKekJ4NfAVyMisqrJzMw21z7LlUfEDJKTwIVt5xdMPwN8OssazMysaeU+WWxmZmXmIDAzyzkHgZlZzjkIzMxyzkFgZpZzDgIzs5xzEJiZ5ZyDwMws5xwEZmY55yAwM8s5B4GZWc45CMzMcs5BYGaWcw4CM7OccxCYmeWcg8DMLOccBGZmOecgMDPLOQeBmVnOOQjMzHLOQWBmlnMOAjOznHMQmJnlnIPAzCznHARmZjnnIDAzyzkHgZlZzjkIzMxyzkFgZpZzDgIzs5xzEJiZ5ZyDwMws5xwEZmY55yAwM8u5TINA0mBJz0paImlMI32+LOkZSU9LuiXLeszMbHPts1qxpHbAJGAgsAKYL2l6RDxT0KcXMBb4dESskbRrVvWYmVnDsjwi6A8siYjnI+Jt4Fbg2Hp9RgKTImINQESszLAe20o9e/bk4x//ONXV1dTU1ABwwQUX0L17d6qrq6murmbGjBkAzJkzh379+lFTU8Nzzz0HwNq1axk0aBDvvfde2bbBzDaX2REB0B1YXjC/Aji4Xp/eAJLmAO2ACyLi/vorkjQKGAWw5557ZlKsFae2tpYuXbps0nb66adz1llnbdI2ceJEZsyYwbJly7juuuuYOHEi48eP55xzzqGiwqemzLYn5f4X2R7oBQwAhgM3SOpUv1NETI6Imoio6dq1a2krtC1SWVlJXV0ddXV1VFZWsnTpUpYvX86AAQPKXZqZ1dNsEEj6gqQtCYwXgT0K5nukbYVWANMj4p2I+BvwV5Jg2C41NDSywcSJE5HEqlWrALjzzjvZb7/9OPTQQ1m9ejUAS5cuZdiwYSWve1uRxKBBgzjwwAOZPHnyxvZrr72Wfv368fWvf501a9YAMHbsWEaMGMGll17K6NGjOffccxk/fny5SjezJhSzgx8GPCdpgqQ+LVj3fKCXpI9I6gCcAEyv1+cukqMBJHUhGSp6vgWvUXK1tbUsXLiQBQsWbGxbvnw5s2bN2mTY6pprrmH+/Pl84xvf4JZbkouhzjvvvFa9M3z44Yd5/PHHue+++5g0aRKzZ8/mW9/6FkuXLmXhwoV069aNM888E4Dq6mrmzp1LbW0tzz//PN26dSMiGDZsGCeffDKvvPJKmbfGzDZoNggi4mTgE8BSYIqkRySNkrRzM89bD4wGZgKLgdsj4mlJF0k6Ju02E1gt6RmgFjg7IlZvxfaUxemnn86ECROQtLGtoqKCdevWbRwaeeihh9h9993p1Wu7PeBpVvfu3QHYddddGTp0KPPmzWO33XajXbt2VFRUMHLkSObNm7fJcyKC8ePHM27cOC688EImTJjAyJEjufrqq8uxCWbWgKKGfCLiNeAOkit/ugFDgcclndbM82ZERO+I2DsiLk7bzo+I6el0RMQZEbFvRHw8Im7dqq3JWENDI7/73e/o3r07+++//yZ9x44dy5FHHsndd9/N8OHD+fGPf8y4cePKUfY28eabb/L6669vnJ41axZ9+/bl5Zdf3tjnt7/9LX379t3keTfddBNDhgyhc+fO1NXVUVFRQUVFBXV1dSWt38wa1+xVQ+mn968BHwVuAvpHxEpJVcAzwDXZlrj9ePjhh+nevTsrV65k4MCB9OnTh0suuYRZs2Zt1nfgwIEMHDgQeH9n+Ne//pXLL7+cXXbZhauuuoqqqqpSb8IWe+WVVxg6dCgA69ev58QTT2Tw4MGccsopLFy4EEn07NmT66+/fuNz6urqmDJlysb354wzzmDIkCF06NBh43CZmZVfMZePHgdcERGzCxsjok7SqdmUtX2qPzTy4IMP8re//W3j0cCKFSs44IADmDdvHrvvvjvw/s5w5syZHH300UybNo077riDqVOnMnLkyLJtS0vttddePPHEE5u133zzzY0+p6qqitra2o3zhx56KE899VQm9ZnZlitmaOgCYOPAr6SOknoCRMQD2ZS1/WloaOSggw5i5cqVLFu2jGXLltGjRw8ef/zxjSEAcNlll/Hd736XyspK3nrrLSR5aMTMtivFHBH8BvhUwfy7adtBmVS0nWpsaKQpL730EvPmzeNHP/oRAKeddhoHHXQQnTp14q677sq6ZDOzohQTBO3TW0QAEBFvp5eD5kpjQyOFli1btsn8hz70Ie69996N88cffzzHH398FuWZmW2xYoLgVUnHbLjSR9KxwKpsy7Is9Bxzb/OdWoFlPzmq3CWYtSnFBME3gamSrgVEcv+gEZlWZWZmJdNsEETEUuAQSTul829kXpWZmZVMUXcflXQUsB+ww4Zvz0bERRnWlYm2MjQCHh4xs22nmJvOXUdyv6HTSIaGjgc+nHFdZmZWIsV8j+BTETECWBMRFwKfJP0dATMza/2KCYJ/p/+tk/Qh4B2S+w2ZmVkbUMw5grvTH4u5DHgcCOCGLIsyM7PSaTII0h+keSAi1gJ3SroH2CEi/lWK4szMLHtNDg1FxHvApIL5dQ4BM7O2pZhzBA9IOk6Fv7piZmZtRjFB8A2Sm8ytk/SapNclvZZxXWZmViLFfLO4yZ+kNDOz1q2YXyj7bEPt9X+oxszMWqdiLh89u2B6B6A/8BhweCYVmZlZSRUzNPSFwnlJewBXZlWQmZmVVjEni+tbAeyzrQsxM7PyKOYcwTUk3yaGJDiqSb5hbGZmbUAx5wgWFEyvB34dEXMyqsfMzEqsmCC4A/h3RLwLIKmdpKqIqMu2NDMzK4WivlkMdCyY7wj8IZtyzMys1IoJgh0Kf54yna7KriQzMyulYoLgTUkHbJiRdCDwVnYlmZlZKRVzjuD7wG8kvUTyU5W7k/x0pZmZtQHFfKFsvqQ+wMfSpmcj4p1syzIzs1Ip5sfrvwPsGBGLImIRsJOkb2dfmpmZlUIx5whGpr9QBkBErAFGZlaRmZmVVDFB0K7wR2kktQM6ZFeSmZmVUjEni+8HbpN0fTr/DeC+7EoyM7NSKiYIfgiMAr6Zzj9JcuWQmZm1Ac0ODaU/YP8osIzktwgOBxYXs3JJgyU9K2mJpDFN9DtOUkiqKa5sMzPbVho9IpDUGxiePlYBtwFExH8Vs+L0XMIkYCDJravnS5oeEc/U67cz8D2SsDEzsxJr6ojgLySf/o+OiM9ExDXAuy1Yd39gSUQ8HxFvA7cCxzbQ78fAT4F/t2DdZma2jTQVBF8EXgZqJd0g6QiSbxYXqzuwvGB+Rdq2UXrrij0i4t6mViRplKQFkha8+uqrLSjBzMya02gQRMRdEXEC0AeoJbnVxK6S/kfSoK19YUkVwM+AM5vrGxGTI6ImImq6du26tS9tZmYFijlZ/GZE3JL+dnEP4M8kVxI150Vgj4L5HmnbBjsDfYE/SVoGHAJM9wljM7PSatFvFkfEmvTT+RFFdJ8P9JL0EUkdgBOA6QXr+ldEdImInhHRE5gLHBMRCxpenZmZZWFLfry+KBGxHhgNzCS53PT2iHha0kWSjsnqdc3MrGWK+ULZFouIGcCMem3nN9J3QJa1mJlZwzI7IjAzs9bBQWBmlnMOAjOznHMQmJnlnIPAzCznHARmZjnnIDAzyzkHgZlZzjkIzMxyzkFgZpZzDgIzs5xzEJiZ5ZyDwMws5xwEZmY55yAwM8s5B4GZWc45CMzMcs5BYGaWcw4CM7OccxCYmeWcg8DMLOccBGZmOecgMDPLOQeBmVnOOQjMzHLOQWBmlnMOAjOznHMQmJnlnIPAzCznHARmZjnnIDAzyzkHgZlZzjkIzMxyzkFgZpZzmQaBpMGSnpW0RNKYBpafIekZSU9KekDSh7Osx8zMNpdZEEhqB0wCPg/sCwyXtG+9bn8GaiKiH3AHMCGreszMrGFZHhH0B5ZExPMR8TZwK3BsYYeIqI2IunR2LtAjw3rMzKwBWQZBd2B5wfyKtK0xpwL3NbRA0ihJCyQtePXVV7dhiWZmtl2cLJZ0MlADXNbQ8oiYHBE1EVHTtWvX0hZnZtbGtc9w3S8CexTM90jbNiHpSOBc4LCIWJdhPWZm1oAsjwjmA70kfURSB+AEYHphB0mfAK4HjomIlRnWYmZmjcgsCCJiPTAamAksBm6PiKclXSTpmLTbZcBOwG8kLZQ0vZHVmZlZRrIcGiIiZgAz6rWdXzB9ZJavb2ZmzdsuThabmVn5OAjMzHLOQWBmlnMOAjOznHMQmJnlnIPAzCznHARmZjnnIDAzyzkHgZlZzjkIzMxyzkFgZpZzDgIzs5xzEJiZ5ZyDwMws5xwEZmY55yAwM8s5B4GZWc45CMzMcs5BYGaWcw4CM7OccxCYmeWcg8DMLOccBGZmOecgMDPLOQeBmVnOOQjMzHLOQWBmlnMOAjOznHMQmJnlnIPAzCznHARmZjnnIDAzyzkHgZlZzjkIzMxyzkFgZpZzmQaBpMGSnpW0RNKYBpb/h6Tb0uWPSuqZZT1mZra5zIJAUjtgEvB5YF9guKR963U7FVgTER8FrgB+mlU9ZmbWsCyPCPoDSyLi+Yh4G7gVOLZen2OBX6bTdwBHSFKGNZmZWT2KiGxWLH0JGBwR/yedPwU4OCJGF/RZlPZZkc4vTfusqreuUcCodPZjwLOZFL3tdAFWNdurbfK251eet781bPuHI6JrQwval7qSLRERk4HJ5a6jWJIWRERNuesoB297Prcd8r39rX3bsxwaehHYo2C+R9rWYB9J7YEPAKszrMnMzOrJMgjmA70kfURSB+AEYHq9PtOBr6TTXwL+GFmNVZmZWYMyGxqKiPWSRgMzgXbAjRHxtKSLgAURMR34BXCzpCXAP0nCoi1oNcNYGfC251eet79Vb3tmJ4vNzKx18DeLzcxyzkFgZpZzDoJtqLlbarRlkm6UtDL9bkiuSNpDUq2kZyQ9Lel75a6pVCTtIGmepCfSbb+w3DWVg6R2kv4s6Z5y17IlHATbSJG31GjLpgCDy11EmawHzoyIfYFDgO/k6P/9OuDwiNgfqAYGSzqkvCWVxfeAxeUuYks5CLadYm6p0WZFxGySK79yJyJejojH0+nXSXYI3ctbVWlE4o10tjJ95OoKFEk9gKOA/1vuWraUg2Db6Q4sL5hfQU52Bva+9A66nwAeLXMpJZMOiywEVgK/j4jcbHvqSuAHwHtlrmOLOQjMthFJOwF3At+PiNfKXU+pRMS7EVFNcveA/pL6lrmkkpF0NLAyIh4rdy1bw0Gw7RRzSw1royRVkoTA1IiYVu56yiEi1gK15Otc0aeBYyQtIxkOPlzSr8pbUss5CLadYm6pYW1Qeuv0XwCLI+Jn5a6nlCR1ldQpne4IDAT+UtaiSigixkZEj4joSfJv/o8RcXKZy2oxB8E2EhHrgQ231FgM3B4RT5e3qtKR9GvgEeBjklZIOrXcNZXQp4FTSD4NLkwfQ8pdVIl0A2olPUnyYej3EdEqL6HMM99iwsws53xEYGaWcw4CM7OccxCYmeWcg8DMLOccBGZmOdcqfrzerJwkfRB4IJ3dHXgXeDWd75/eW6qp538VqImI0ZkVabYVHARmzYiI1SR31kTSBcAbEXF5OWsy25Y8NGS2BSSNlDQ/vQ//nZKq0vbjJS1K22c38LyjJD0iqUvpqzZrmIPAbMtMi4iD0vvwLwY2fJP6fOBzafsxhU+QNBQYAwyJiFUlrdasCR4aMtsyfSWNBzoBO5HcWgRgDjBF0u1A4c3nDgdqgEF5ujOptQ4+IjDbMlOA0RHxceBCYAeAiPgmcB7JnWgfS080AywFdgZ6l75Us6Y5CMy2zM7Ay+ntp0/a0Chp74h4NCLOJ7myaMOtyf8OHAfcJGm/kldr1gQHgdmWGUfyK2Rz2PS2y5dJekrSIuD/AU9sWBARfyEJjd9I2ruUxZo1xXcfNTPLOR8RmJnlnIPAzCznHARmZjnnIDAzyzkHgZlZzjkIzMxyzkFgZpZz/x/jOYg2+yx1FwAAAABJRU5ErkJggg==\n"
},
"metadata": {
"needs_background": "light"
}
},
{
"output_type": "display_data",
"data": {
"text/plain": "",
"image/svg+xml": "\n\n\n\n",
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAYIAAAEWCAYAAABrDZDcAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAAAcv0lEQVR4nO3de7xUdb3/8dd76ybBS0RCyUUxDyoXE3GHpNmxLNJtiYimmFodf2IXTEXzaL/0qGEXO4QHo6NmHryDphUZikSURxJ1k4ggoWgkFwskhGRUbp/zx1rosNmX2ciaYe/1fj4e83DWmu+s9VkI857v97vWGkUEZmaWX1WVLsDMzCrLQWBmlnMOAjOznHMQmJnlnIPAzCznHARmZjnnILDckTRB0uhK12G2s3AQ2E5P0utFj82S3iha/kKZapggaaOkfcqxP7NychDYTi8i9tjyAF4GPle07q6s9y9pd2AYsAY4M+v91dv3ruXcn+WTg8BaLUkDJT0u6TVJr0j6saR26WuSNFbSCklrJT0rqV8D29hT0gxJ4ySpkV0NA14DrgG+WO/9nST9j6TlklZL+mXRa0MkzUn3/6Kk49L1iyV9qqjdVZLuTJ/3lBSSzpH0MvC7dP19kv4maY2kRyX1LXp/e0ljJP01ff2xdN1vJJ1fr965koa24I/ZcsBBYK3ZJuAiYG/go8CxwNfS1wYDHwcOBN4LfB5YVfxmSe8HpgMzI+Ib0fj9Vr4I3ANMBA6WdHjRa3cAHYC+QBdgbLrtgcDtwDeBjmkti1twbP8K9AY+ky4/BPRK9/EnoLgn9J/A4cCRQCfgUmAzcBtFPRhJhwLdgN+0oA7LAQeBtVoRMTsiZkXExohYDNxE8gEKsAHYEzgYUEQsiIhXit7eFfgDcF9EfLuxfUjaF/gEcHdE/J0kOM5OX9sHOB74SkSsjogNEfGH9K3nALdGxLSI2BwRyyLizy04vKsiYl1EvJEe660R8c+IeAu4CjhU0nslVQH/BlyQ7mNTRPwxbTcZOFBSr3SbZwGTImJ9C+pA0gWS5kmaL+nCdN2haW/sWUm/lrRXuv6otNdRt2W/kjpKeiSt1XZC/h9jrZakAyU9mA6ZrAW+S9I7ICJ+B/wYGA+skHTzlg+r1AlAe+DGZnZzFrAgIuaky3cBZ0iqBnoA/4iI1Q28rwfw4nYeGsCSLU8k7SLp++nw0lre6VnsnT52a2hfEfEmMAk4M/0QHk7SgylZOpx2LjAQOBT4rKR/AW4BLouIQ4BfkPR8AC4GaoELga+k674NfDciNrdk31Y+DgJrzf4b+DPQKyL2Ar4FvD3OHxHjIuJwoA/JENE3i977U+BhYEo6GdyYs4EPpWHzN+BHJB++tSQf1p0kdWzgfUuAAxrZ5jqS4aQtPthAm+JhqjOAIcCnSIa5eqbrBbwKvNnEvm4DvkAybFaIiMcbadeY3sATEVGIiI0kvaiTSf48H03bTCOZR4GkJ9YhfWyQdADQIyJ+38L97jQa6RH1lzQrnQOqS4cCkTQsbfe/6dAjkg6QNKmCh9AsB4G1ZnsCa4HXJR0MfHXLC5I+IumI9Jv7OpIPy/rfSEcCC4FfS2pff+OSPkryATsQ6J8++gF3A2enQ00PAT+R9D5J1ZI+nr79Z8CXJR0rqUpSt7RGgDnA6Wn7GuCUEo7zLZI5jg4kPR8A0m/ZtwI/ktQ17T18VNJ70tcfT497DC3sDaTmAUdLer+kDiQB2AOYTxJOAKem6wC+RzI3cjlJj+xakh5Bq9REj+g64OqI6A9cmS4DnA98hGSY8ox03Wh28j8DB4G1ZpeQ/GP7J8k3/OJvXXul61YDfyX5EP1h8ZvTyeERwFLgV5J2q7f9LwK/iohnI+JvWx7Af5F8IHQiGTraQNIzWUEyJEJEPAl8mWTyeA3JN+n90u1eQRIwq4GrSYKlKbenx7AMeA6Y1cCfw7PAU8A/gB+w9b/t24FDgDub2c82ImJBur1HSHpQc0gm6f8N+Jqk2SRBtT5tPyciBkXEJ4APAa+QnMQ1SdKdkj7Q0hoqrLEeUZD8HYOkl7Y8fb4ZeA/v9IiOBv4WES+Ut+wWigg/SnwAF5B8Q5oPXJium0Tyj2MOydjtnHT9UcBcoI5k6AKSs0ceAaoqfSx+5OdBMrz12A7a1neBr9VbdyDwZL11Sv+udyKZV9mPZCL/2kr/ebTweHsDzwPvJ/lwfxy4IV3/MskQ4DJgv7T9p4HZwK9JAuIRoFOlj6O5hy9WKVG9LuJ64GFJD0bEaUVtxpB8+4N3Js16kkyaXYwnzazM0uGcrwE/eRfb6BIRK9IzqE4GBhWtqyL5e11/0v1sYEpE/COtYXP66EArEhELJG3pEa3jnR7RV4GLIuJ+SZ8nGQr8VERMI5kzQdLZwBSSM7cuIekBXhARhfIfSdMyGxqSdKuSi3nmNfK6lFzEsyg93WxAVrXsII11EYHkeEjOVb8nXdXmJs2sdZH0GWAl8HeaH35qyv2SniP5lvv1iHgNGC7peZIhseXA/xTttwPwJZIztiCZYJ8CXE/zZ2ntdCLiZxFxeER8nOTD/HmSYcMH0ib3kXxBfFu9P4Or0/aPkUzc73wy7FJ9HBgAzGvk9VqSiTYBg0g+ZCveRWppF7He8dYVLfcnGcudAXQnuRipV6WPww8//GjZA+iS/ndfkuDrCCwAjknXHwvMrvee/wBOSp8/mn5mnEXSI6j4MdV/ZDY0FBGPSurZRJMhwO2R/EnNSi862Se2vuhnpxGNdxG3GM47vQEiOe98EEB6Jsnbk2YkvYWLI7lAycx2bvenp4JuIO0RSToX+C8l94J6k+SkAwAkdQUGRsTV6aobSCbyXwNOKmfhpVKaWNlsPAmCByOioXu8PAh8PyIeS5enA/8eEXUNtB1B+ge9++67H37wwQfXb1J2y5Yto7q6mi5duhARzJ07l969e9OuXbut2kUEL7zwAh/60IdYsmQJXbt2Zf369axdu5Zu3bpVqHozy5vZs2e/GhGdG3qtVUwWR8TNwM0ANTU1UVe3TVaUxYoVK+jSpQsvv/wygwcPZtasWXTs2JGHH36Y733ve/zhD3/Y5j233XYbq1ev5sILL2To0KGMGzeOxYsX88ADDzB27NgKHIWZ5ZGkvzb2WiWDYBnvXIQCyTj6sgrVUpJhw4axatUqqqurGT9+PB07dgRg4sSJDB8+fJv2hUKBCRMm8MgjjwAwatQoamtradeuHXff/W7m7szMdpxKDg2dQHJlZy1wBDAuIgbWb1dfJXsEZq1Vz8vaxg1HF3//hEqX0GpJmh0RNQ29llmPQNI9wDHA3pKWksyiVwNExI0kp5PVAouAAslVmGZmO1RbCUHILgizPGto27GSrV8P4OtZ7d/MzErjew2ZmeVcqzhraEdxF9HMbFvuEZiZ5ZyDwMws5xwEZmY55yAwM8s5B4GZWc45CMzMcs5BYFaCsWPH0rdvX/r168fw4cN58803mT59OgMGDKB///587GMfY9GiRQDccMMN9OvXj9raWtavXw/AY489xkUXXVTJQzBrlIPArBnLli1j3Lhx1NXVMW/ePDZt2sTEiRP56le/yl133cWcOXM444wzGD16NAB33XUXc+fO5cgjj2Tq1KlEBN/5zne44oorKnwkZg1zEJiVYOPGjbzxxhts3LiRQqFA165dkcTatWsBWLNmDV27dgWS36DYsGEDhUKB6upq7rzzTo4//ng6depUyUMwa1Suriw22x7dunXjkksuYd9996V9+/YMHjyYwYMHc8stt1BbW0v79u3Za6+9mDVrFgAjR45k0KBB9O3bl6OOOoohQ4YwderUCh+FWePcIzBrxurVq/nVr37FX/7yF5YvX866deu48847GTt2LFOmTGHp0qV8+ctfZtSoUQCcddZZPP3002+3+cY3vsFDDz3EKaecwkUXXcTmzZsrfERmW3MQmDXjt7/9Lfvvvz+dO3emurqak08+mZkzZ/LMM89wxBFHAHDaaafxxz/+cav3LV++nCeffJKTTjqJMWPGMGnSJDp27Mj06dMrcRhmjXIQmDVj3333ZdasWRQKBSKC6dOn06dPH9asWcPzzz8PwLRp0+jdu/dW77viiiu45pprAHjjjTeQRFVVFYVCoezHYNYUzxGYNeOII47glFNOYcCAAey6664cdthhjBgxgu7duzNs2DCqqqp43/vex6233vr2e55++mkABgwYAMAZZ5zBIYccQo8ePbj00ksrchxmjcn0pyqz8G5+qtK3oba8ait/97fn731bOXZ4d//um/qpSg8NmZnlnIPAzCznHARmZjnnyWLLBY8TmzXOPQIzs5xzEJiZ5ZyDwMws5xwEZmY55yAwM8s5B4GZWc45CMzMcs5BYGaWcw4CM7OccxCYmeWcg8DMLOccBGZmOecgMDPLOQeBmVnOZRoEko6TtFDSIkmXNfD6vpJmSHpa0lxJtVnWY2Zm28osCCTtAowHjgf6AMMl9anX7NvAvRFxGHA68JOs6jEzs4Zl2SMYCCyKiJciYj0wERhSr00Ae6XP3wssz7AeMzNrQJZB0A1YUrS8NF1X7CrgTElLgSnA+Q1tSNIISXWS6lauXJlFrWZmuVXpyeLhwISI6A7UAndI2qamiLg5ImoioqZz585lL9LMrC3LMgiWAT2Klrun64qdA9wLEBGPA7sBe2dYk5mZ1ZNlEDwF9JK0v6R2JJPBk+u1eRk4FkBSb5Ig8NiPmVkZZRYEEbERGAlMBRaQnB00X9I1kk5Mm10MnCvpGeAe4EsREVnVZGZm29o1y41HxBSSSeDidVcWPX8OOCrLGszMrGmVniw2M7MKcxCYmeWcg8BKsnDhQvr37//2Y6+99uL666/nvvvuo2/fvlRVVVFXV/d2+5kzZ/LhD3+YmpoaXnjhBQBee+01Bg8ezObNmyt1GGbWgEznCKztOOigg5gzZw4AmzZtolu3bgwdOpRCocADDzzAeeedt1X7MWPGMGXKFBYvXsyNN97ImDFjGD16NN/61reoqvL3D7OdiYPAWmz69OkccMAB7Lfffo22qa6uplAoUCgUqK6u5sUXX2TJkiUcc8wx5SvUzEriILAWmzhxIsOHD2+yzeWXX87ZZ59N+/btueOOO7jkkksYPXp0mSo0s5ZwH91aZP369UyePJlTTz21yXb9+/dn1qxZzJgxg5deeol99tmHiOC0007jzDPP5O9//3uZKjaz5rhHYC3y0EMPMWDAAD7wgQ+U1D4iGD16NBMnTuT888/nuuuuY/HixYwbN45rr70242rNrBTuEViL3HPPPc0OCxW7/fbbqa2tpVOnThQKBaqqqqiqqqJQKGRYpZm1hHsEVrJ169Yxbdo0brrpprfX/eIXv+D8889n5cqVnHDCCfTv35+pU6cCUCgUmDBhAo888ggAo0aNora2lnbt2nH33XdX5BjMbFsOAivZ7rvvzqpVq7ZaN3ToUIYOHdpg+w4dOjBjxoy3l48++mieffbZTGs0s5bz0JCZWc45CMzMcs5BYGaWc54jyJGel/2m0iXsEIu/f0KlSzBrU9wjMDPLOQeBmVnOOQjMzHLOQWBmlnMOAjOznHMQmJnlnIPAzCznHARmZjnnIDAzyzkHgZlZzjkIzMxyzkFgZpZzDgIzs5xzEJiZ5ZyDwMws5xwEZmY55yAwM8s5B4GZWc45CMzMci7TIJB0nKSFkhZJuqyRNp+X9Jyk+ZLuzrIeMzPbVmY/Xi9pF2A88GlgKfCUpMkR8VxRm17A5cBREbFaUpes6jEzs4Zl2SMYCCyKiJciYj0wERhSr825wPiIWA0QESsyrMfMzBqQZRB0A5YULS9N1xU7EDhQ0kxJsyQd19CGJI2QVCepbuXKlRmVa2aWT5WeLN4V6AUcAwwHfiqpY/1GEXFzRNRERE3nzp3LW6GZWRvXbBBI+pyk7QmMZUCPouXu6bpiS4HJEbEhIv4CPE8SDGZmVialfMCfBrwg6TpJB7dg208BvSTtL6kdcDowuV6bX5L0BpC0N8lQ0Ust2IeZmb1LzQZBRJwJHAa8CEyQ9Hg6Zr9nM+/bCIwEpgILgHsjYr6kaySdmDabCqyS9BwwA/hmRKx6F8djZmYtVNLpoxGxVtLPgfbAhcBQ4JuSxkXEDU28bwowpd66K4ueBzAqfZiZWQWUMkdwoqRfAL8HqoGBEXE8cChwcbblmZlZ1krpEQwDxkbEo8UrI6Ig6ZxsyjIzs3IpJQiuAl7ZsiCpPfCBiFgcEdOzKszMzMqjlLOG7gM2Fy1vSteZmVkbUEoQ7JreIgKA9Hm77EoyM7NyKiUIVhad7omkIcCr2ZVkZmblVMocwVeAuyT9GBDJ/YPOzrQqMzMrm2aDICJeBAZJ2iNdfj3zqszMrGxKuqBM0glAX2A3SQBExDUZ1mVmZmVSygVlN5Lcb+h8kqGhU4H9Mq7LzMzKpJTJ4iMj4mxgdURcDXyU5OZwZmbWBpQSBG+m/y1I6gpsAPbJriQzMyunUuYIfp3+WMwPgT8BAfw0y6LMzKx8mgyC9AdppkfEa8D9kh4EdouINeUozszMstfk0FBEbAbGFy2/5RAwM2tbSpkjmC5pmLacN2pmZm1KKUFwHslN5t6StFbSPyWtzbguMzMrk1KuLG7yJynNzKx1azYIJH28ofX1f6jGzMxap1JOH/1m0fPdgIHAbOCTmVRkZmZlVcrQ0OeKlyX1AK7PqiAzMyuvUiaL61sK9N7RhZiZWWWUMkdwA8nVxJAER3+SK4zNzKwNKGWOoK7o+UbgnoiYmVE9ZmZWZqUEwc+BNyNiE4CkXSR1iIhCtqWZmVk5lHRlMdC+aLk98NtsyjEzs3IrJQh2K/55yvR5h+xKMjOzciolCNZJGrBlQdLhwBvZlWRmZuVUyhzBhcB9kpaT/FTlB0l+utLMzNqAUi4oe0rSwcBB6aqFEbEh27LMzKxcSvnx+q8Du0fEvIiYB+wh6WvZl2ZmZuVQyhzBuekvlAEQEauBczOryMzMyqqUINil+EdpJO0CtMuuJDMzK6dSJosfBiZJuildPg94KLuSzMysnEoJgn8HRgBfSZfnkpw5ZGZmbUCzQ0PpD9g/ASwm+S2CTwILStm4pOMkLZS0SNJlTbQbJikk1ZRWtpmZ7SiN9ggkHQgMTx+vApMAIuITpWw4nUsYD3ya5NbVT0maHBHP1Wu3J3ABSdiYmVmZNdUj+DPJt//PRsTHIuIGYFMLtj0QWBQRL0XEemAiMKSBdt8BfgC82YJtm5nZDtJUEJwMvALMkPRTSceSXFlcqm7AkqLlpem6t6W3rugREb9pakOSRkiqk1S3cuXKFpRgZmbNaTQIIuKXEXE6cDAwg+RWE10k/bekwe92x5KqgB8BFzfXNiJujoiaiKjp3Lnzu921mZkVKWWyeF1E3J3+dnF34GmSM4maswzoUbTcPV23xZ5AP+D3khYDg4DJnjA2MyuvFv1mcUSsTr+dH1tC86eAXpL2l9QOOB2YXLStNRGxd0T0jIiewCzgxIioa3hzZmaWhe358fqSRMRGYCQwleR003sjYr6kaySdmNV+zcysZUq5oGy7RcQUYEq9dVc20vaYLGsxM7OGZdYjMDOz1sFBYGaWcw4CM7OccxCYmeWcg8DMLOccBGZmOecgMDPLOQeBmVnOOQjMzHLOQWBmlnMOAjOznHMQmJnlnIPAzCznHARmZjnnIDAzyzkHgZlZzjkIzMxyzkFgZpZzDgIzs5xzEJiZ5ZyDwMws5xwEZmY55yAwM8s5B4GZWc45CMzMcs5BYGaWcw4CM7OccxCYmeWcg8DMLOccBGZmOecgMDPLOQeBmVnOOQjMzHLOQWBmlnOZBoGk4yQtlLRI0mUNvD5K0nOS5kqaLmm/LOsxM7NtZRYEknYBxgPHA32A4ZL61Gv2NFATER8Gfg5cl1U9ZmbWsCx7BAOBRRHxUkSsByYCQ4obRMSMiCiki7OA7hnWY2ZmDcgyCLoBS4qWl6brGnMO8FBDL0gaIalOUt3KlSt3YIlmZrZTTBZLOhOoAX7Y0OsRcXNE1ERETefOnctbnJlZG7drhtteBvQoWu6ertuKpE8B/x/414h4K8N6zMysAVn2CJ4CeknaX1I74HRgcnEDSYcBNwEnRsSKDGsxM7NGZBYEEbERGAlMBRYA90bEfEnXSDoxbfZDYA/gPklzJE1uZHNmZpaRLIeGiIgpwJR6664sev6pLPdvZmbN2ykmi83MrHIcBGZmOecgMDPLOQeBmVnOOQjMzHLOQWBmlnMOAjOznHMQmJnlnIPAzCznHARmZjnnIDAzyzkHgZlZzjkIzMxyzkFgZpZzDgIzs5xzEJiZ5ZyDwMws5xwEZmY55yAwM8s5B4GZWc45CMzMcs5BYGaWcw4CM7OccxCYmeWcg8DMLOccBGZmOecgMDPLOQeBmVnOOQjMzHLOQWBmlnMOAjOznHMQmJnlnIPAzCznHARmZjnnIDAzy7lMg0DScZIWSlok6bIGXn+PpEnp609I6pllPWZmtq3MgkDSLsB44HigDzBcUp96zc4BVkfEvwBjgR9kVY+ZmTUsyx7BQGBRRLwUEeuBicCQem2GALelz38OHCtJGdZkZmb1KCKy2bB0CnBcRPy/dPks4IiIGFnUZl7aZmm6/GLa5tV62xoBjEgXDwIWZlL0jrM38GqzrdomH3t+5fn4W8Ox7xcRnRt6YddyV7I9IuJm4OZK11EqSXURUVPpOirBx57PY4d8H39rP/Ysh4aWAT2Klrun6xpsI2lX4L3AqgxrMjOzerIMgqeAXpL2l9QOOB2YXK/NZOCL6fNTgN9FVmNVZmbWoMyGhiJio6SRwFRgF+DWiJgv6RqgLiImAz8D7pC0CPgHSVi0Ba1mGCsDPvb8yvPxt+pjz2yy2MzMWgdfWWxmlnMOAjOznHMQ7EDN3VKjLZN0q6QV6bUhuSKph6QZkp6TNF/SBZWuqVwk7SbpSUnPpMd+daVrqgRJu0h6WtKDla5lezgIdpASb6nRlk0Ajqt0ERWyEbg4IvoAg4Cv5+j//VvAJyPiUKA/cJykQZUtqSIuABZUuojt5SDYcUq5pUabFRGPkpz5lTsR8UpE/Cl9/k+SD4Rula2qPCLxerpYnT5ydQaKpO7ACcAtla5lezkIdpxuwJKi5aXk5MPA3pHeQfcw4IkKl1I26bDIHGAFMC0icnPsqeuBS4HNFa5juzkIzHYQSXsA9wMXRsTaStdTLhGxKSL6k9w9YKCkfhUuqWwkfRZYERGzK13Lu+Eg2HFKuaWGtVGSqklC4K6IeKDS9VRCRLwGzCBfc0VHASdKWkwyHPxJSXdWtqSWcxDsOKXcUsPaoPTW6T8DFkTEjypdTzlJ6iypY/q8PfBp4M8VLaqMIuLyiOgeET1J/s3/LiLOrHBZLeYg2EEiYiOw5ZYaC4B7I2J+ZasqH0n3AI8DB0laKumcStdURkcBZ5F8G5yTPmorXVSZ7APMkDSX5MvQtIholadQ5plvMWFmlnPuEZiZ5ZyDwMws5xwEZmY55yAwM8s5B4GZWc61ih+vN6skSe8HpqeLHwQ2ASvT5YHpvaWaev+XgJqIGJlZkWbvgoPArBkRsYrkzppIugp4PSL+s5I1me1IHhoy2w6SzpX0VHof/vsldUjXnyppXrr+0Qbed4KkxyXtXf6qzRrmIDDbPg9ExEfS+/AvALZcSX0l8Jl0/YnFb5A0FLgMqI2IV8tarVkTPDRktn36SRoNdAT2ILm1CMBMYIKke4Him899EqgBBufpzqTWOrhHYLZ9JgAjI+IQ4GpgN4CI+ArwbZI70c5OJ5oBXgT2BA4sf6lmTXMQmG2fPYFX0ttPf2HLSkkHRMQTEXElyZlFW25N/ldgGHC7pL5lr9asCQ4Cs+1zBcmvkM1k69su/1DSs5LmAX8EntnyQkT8mSQ07pN0QDmLNWuK7z5qZpZz7hGYmeWcg8DMLOccBGZmOecgMDPLOQeBmVnOOQjMzHLOQWBmlnP/B0iPrwaXcQuCAAAAAElFTkSuQmCC\n"
},
"metadata": {
"needs_background": "light"
}
}
],
"source": [
"results.make_plots()\n",
"improved_results.make_plots()"
]
}
]
}
================================================
FILE: examples/basic/quick_demo.py
================================================
""" Demo: Creates a simple new method and applies it to a single CL setting.
"""
import sys
from argparse import Namespace
from collections import defaultdict
from dataclasses import dataclass
from pathlib import Path
from typing import Dict, List, Optional, Tuple, Type
import gym
import pandas as pd
import torch
import tqdm
from gym import spaces
from numpy import inf
from simple_parsing import ArgumentParser
from torch import Tensor, nn
from sequoia import Method, Setting
from sequoia.common import Config
from sequoia.settings import Environment
from sequoia.settings.sl import DomainIncrementalSLSetting
from sequoia.settings.sl.environment import PassiveEnvironment
from sequoia.settings.sl.incremental.objects import Actions, Observations, Rewards
from sequoia.settings.sl.incremental.results import IncrementalSLResults as Results
class MyModel(nn.Module):
"""Simple classification model without any CL-related mechanism.
To keep things simple, this demo model is designed for supervised
(classification) settings where observations have shape [3, 28, 28] (ie the
MNIST variants: Mnist, FashionMnist, RotatedMnist, EMnist, etc.)
NOTE: You are free to use whatever kind of Model you want, or even not to use one
at all! This is just an example to help you get started quickly.
"""
def __init__(
self,
observation_space: gym.Space,
action_space: gym.Space,
reward_space: gym.Space,
):
super().__init__()
image_shape = observation_space["x"].shape
assert image_shape == (3, 28, 28), "this example only works on mnist-like data"
assert isinstance(action_space, spaces.Discrete)
assert action_space == reward_space
n_classes = action_space.n
image_channels = image_shape[0]
self.encoder = nn.Sequential(
nn.Conv2d(image_channels, 6, 5),
nn.ReLU(),
nn.MaxPool2d(2),
nn.Conv2d(6, 16, 5),
nn.ReLU(),
nn.MaxPool2d(2),
)
self.classifier = nn.Sequential(
nn.Flatten(),
nn.Linear(256, 120),
nn.ReLU(),
nn.Linear(120, 84),
nn.ReLU(),
nn.Linear(84, n_classes),
)
self.loss = nn.CrossEntropyLoss()
def forward(self, observations: Observations) -> Tensor:
# NOTE: here we don't make use of the task labels.
x = observations.x
task_labels = observations.task_labels
features = self.encoder(x)
logits = self.classifier(features)
return logits
def shared_step(
self, batch: Tuple[Observations, Optional[Rewards]], environment: Environment
) -> Tuple[Tensor, Dict]:
"""Shared step used for both training and validation.
Parameters
----------
batch : Tuple[Observations, Optional[Rewards]]
Batch containing Observations, and optional Rewards. When the Rewards are
None, it means that we'll need to provide the Environment with actions
before we can get the Rewards (e.g. image labels) back.
This happens for example when being applied in a Setting which cares about
sample efficiency or training performance, for example.
environment : Environment
The environment we're currently interacting with. Used to provide the
rewards when they aren't already part of the batch (as mentioned above).
Returns
-------
Tuple[Tensor, Dict]
The Loss tensor, and a dict of metrics to be logged.
"""
# Since we're training on a Passive environment, we will get both observations
# and rewards, unless we're being evaluated based on our training performance,
# in which case we will need to send actions to the environments before we can
# get the corresponding rewards (image labels).
observations: Observations = batch[0]
rewards: Optional[Rewards] = batch[1]
# Get the predictions:
logits = self(observations)
y_pred = logits.argmax(-1)
if rewards is None:
# If the rewards in the batch is None, it means we're expected to give
# actions before we can get rewards back from the environment.
rewards = environment.send(Actions(y_pred))
assert rewards is not None
image_labels = rewards.y
loss = self.loss(logits, image_labels)
accuracy = (y_pred == image_labels).sum().float() / len(image_labels)
metrics_dict = {"accuracy": accuracy.item()}
return loss, metrics_dict
class DemoMethod(Method, target_setting=DomainIncrementalSLSetting):
"""Minimal example of a Method targetting the Class-Incremental CL setting.
For a quick intro to dataclasses, see examples/dataclasses_example.py
"""
@dataclass
class HParams:
"""Hyper-parameters of the demo model."""
# Learning rate of the optimizer.
learning_rate: float = 0.001
def __init__(self, hparams: HParams = None):
self.hparams: DemoMethod.HParams = hparams or self.HParams()
self.max_epochs: int = 1
self.early_stop_patience: int = 2
# We will create those when `configure` will be called, before training.
self.model: MyModel
self.optimizer: torch.optim.Optimizer
def configure(self, setting: DomainIncrementalSLSetting):
"""Called before the method is applied on a setting (before training).
You can use this to instantiate your model, for instance, since this is
where you get access to the observation & action spaces.
"""
self.model = MyModel(
observation_space=setting.observation_space,
action_space=setting.action_space,
reward_space=setting.reward_space,
)
self.optimizer = torch.optim.Adam(
self.model.parameters(),
lr=self.hparams.learning_rate,
)
def fit(self, train_env: PassiveEnvironment, valid_env: PassiveEnvironment):
"""Example train loop.
You can do whatever you want with train_env and valid_env here.
NOTE: In the Settings where task boundaries are known (in this case all
the supervised CL settings), this will be called once per task.
"""
# configure() will have been called by the setting before we get here.
best_val_loss = inf
best_epoch = 0
for epoch in range(self.max_epochs):
self.model.train()
print(f"Starting epoch {epoch}")
postfix = {}
# Training loop:
with tqdm.tqdm(train_env) as train_pbar:
train_pbar.set_description(f"Training Epoch {epoch}")
for i, batch in enumerate(train_pbar):
loss, metrics_dict = self.model.shared_step(batch, environment=train_env)
self.optimizer.zero_grad()
loss.backward()
self.optimizer.step()
postfix.update(metrics_dict)
train_pbar.set_postfix(postfix)
# Validation loop:
self.model.eval()
torch.set_grad_enabled(False)
with tqdm.tqdm(valid_env) as val_pbar:
val_pbar.set_description(f"Validation Epoch {epoch}")
epoch_val_loss = 0.0
for i, batch in enumerate(val_pbar):
batch_val_loss, metrics_dict = self.model.shared_step(
batch, environment=valid_env
)
epoch_val_loss += batch_val_loss
postfix.update(metrics_dict, val_loss=epoch_val_loss)
val_pbar.set_postfix(postfix)
torch.set_grad_enabled(True)
if epoch_val_loss < best_val_loss:
best_val_loss = epoch_val_loss
best_epoch = epoch
if epoch - best_epoch > self.early_stop_patience:
print(f"Early stopping at epoch {i}.")
break
def get_actions(self, observations: Observations, action_space: gym.Space) -> Actions:
"""Get a batch of predictions (aka actions) for these observations."""
with torch.no_grad():
logits = self.model(observations)
# Get the predicted classes
y_pred = logits.argmax(dim=-1)
return self.target_setting.Actions(y_pred)
@classmethod
def add_argparse_args(cls, parser: ArgumentParser):
"""Adds command-line arguments for this Method to an argument parser."""
parser.add_arguments(cls.HParams, "hparams")
@classmethod
def from_argparse_args(cls, args: Namespace):
"""Creates an instance of this Method from the parsed arguments."""
hparams: cls.HParams = args.hparams
return cls(hparams=hparams)
def demo_simple():
"""Simple demo: Creating and applying a Method onto a Setting."""
from sequoia.settings.sl import DomainIncrementalSLSetting
## 1. Creating the setting:
setting = DomainIncrementalSLSetting(dataset="fashionmnist", batch_size=32)
## 2. Creating the Method
method = DemoMethod()
# (Optional): You can also create a Config, which holds other fields like
# `log_dir`, `debug`, `device`, etc. which aren't specific to either the
# Setting or the Method.
config = Config(debug=True, render=False, device="cpu")
## 3. Applying the method to the setting: (optionally passing a Config to
# use for that run)
results = setting.apply(method, config=config)
print(results.summary())
print(f"objective: {results.objective}")
def demo_command_line():
"""Run this quick demo from the command-line."""
parser = ArgumentParser(description=__doc__)
# Add command-line arguments for the Method and the Setting.
DemoMethod.add_argparse_args(parser)
# Add command-line arguments for the Setting and the Config (an object with
# options like log_dir, debug, etc, which are not part of the Setting or the
# Method) using simple-parsing.
parser.add_arguments(DomainIncrementalSLSetting, "setting")
parser.add_arguments(Config, "config")
args = parser.parse_args()
# Create the Method from the parsed arguments
method: DemoMethod = DemoMethod.from_argparse_args(args)
# Extract the Setting and Config from the args.
setting: DomainIncrementalSLSetting = args.setting
config: Config = args.config
# Run the demo, applying that DemoMethod on the given setting.
results: Results = setting.apply(method, config=config)
print(results.summary())
print(f"objective: {results.objective}")
if __name__ == "__main__":
# Example: Evaluate a Method on a single CL setting:
###
### First option: Run the demo, creating the Setting and Method directly.
###
# demo_simple()
##
## Second part of the demo: Same as before, but customize the options for
## the Setting and the Method from the command-line.
##
demo_command_line()
##
## As a little bonus: Evaluate on *ALL* the applicable settings, and
## aggregate the results in a nice little LaTeX-formatted table.
##
# from examples.demo_utils import demo_all_settings
# all_results = demo_all_settings(DemoMethod)
================================================
FILE: examples/basic/quick_demo_ewc.py
================================================
""" Example script: Defines a new Method based on the DemoMethod from the
quick_demo.py script, adding an EWC-like loss to prevent the weights from
changing too much between tasks.
"""
import sys
from copy import deepcopy
from dataclasses import dataclass
from typing import ClassVar, Dict, Optional, Tuple
import gym
import torch
from torch import Tensor
from examples.basic.quick_demo import DemoMethod, MyModel
from sequoia.settings import DomainIncrementalSLSetting
from sequoia.settings.sl.incremental.objects import Observations, Rewards
from sequoia.utils.utils import dict_intersection
from sequoia.utils.logging_utils import get_logger
logger = get_logger(__name__)
class MyImprovedModel(MyModel):
"""Adds an ewc-like penalty to the demo model."""
def __init__(
self,
observation_space: gym.Space,
action_space: gym.Space,
reward_space: gym.Space,
ewc_coefficient: float = 1.0,
ewc_p_norm: int = 2,
):
super().__init__(
observation_space,
action_space,
reward_space,
)
self.ewc_coefficient = ewc_coefficient
self.ewc_p_norm = ewc_p_norm
self.previous_model_weights: Dict[str, Tensor] = {}
self._previous_task: Optional[int] = None
self._n_switches: int = 0
def shared_step(self, batch: Tuple[Observations, Rewards], *args, **kwargs):
base_loss, metrics = super().shared_step(batch, *args, **kwargs)
ewc_loss = self.ewc_coefficient * self.ewc_loss()
metrics["ewc_loss"] = ewc_loss
return base_loss + ewc_loss, metrics
def on_task_switch(self, task_id: int) -> None:
"""Executed when the task switches (to either a known or unknown task)."""
if self._previous_task is None and self._n_switches == 0:
logger.debug("Starting the first task, no EWC update.")
elif task_id is None or task_id != self._previous_task:
# NOTE: We also switch between unknown tasks.
logger.debug(
f"Switching tasks: {self._previous_task} -> {task_id}: "
f"Updating the EWC 'anchor' weights."
)
self._previous_task = task_id
self.previous_model_weights.clear()
self.previous_model_weights.update(
deepcopy({k: v.detach() for k, v in self.named_parameters()})
)
self._n_switches += 1
def ewc_loss(self) -> Tensor:
"""Gets an 'ewc-like' regularization loss.
NOTE: This is a simplified version of EWC where the loss is the P-norm
between the current weights and the weights as they were on the begining
of the task.
"""
if self._previous_task is None:
# We're in the first task: do nothing.
return 0.0
old_weights: Dict[str, Tensor] = self.previous_model_weights
new_weights: Dict[str, Tensor] = dict(self.named_parameters())
loss = 0.0
for weight_name, (new_w, old_w) in dict_intersection(new_weights, old_weights):
loss += torch.dist(new_w, old_w.type_as(new_w), p=self.ewc_p_norm)
return loss
class ImprovedDemoMethod(DemoMethod):
"""Improved version of the demo method, that adds an ewc-like regularizer."""
# Name of this method:
name: ClassVar[str] = "demo_ewc"
@dataclass
class HParams(DemoMethod.HParams):
"""Hyperparameters of this new improved method. (Adds ewc params)."""
# Coefficient of the ewc-like loss.
ewc_coefficient: float = 1.0
# Distance norm used in the ewc loss.
ewc_p_norm: int = 2
def __init__(self, hparams: HParams = None):
super().__init__(hparams=hparams or self.HParams.from_args())
def configure(self, setting: DomainIncrementalSLSetting):
# Use the improved model, with the added EWC-like term.
self.model = MyImprovedModel(
observation_space=setting.observation_space,
action_space=setting.action_space,
reward_space=setting.reward_space,
ewc_coefficient=self.hparams.ewc_coefficient,
ewc_p_norm=self.hparams.ewc_p_norm,
)
self.optimizer = torch.optim.Adam(
self.model.parameters(),
lr=self.hparams.learning_rate,
)
def on_task_switch(self, task_id: Optional[int]):
self.model.on_task_switch(task_id)
def demo_ewc():
"""Demo: Comparing two methods on the same setting:"""
## 1. Create the Setting (same as in quick_demo.py)
setting = DomainIncrementalSLSetting(dataset="fashionmnist", nb_tasks=5, batch_size=64)
# setting = DomainIncrementalSLSetting.from_args()
# 2.1: Get the results for the base method
base_method = DemoMethod()
base_results = setting.apply(base_method)
# 2.2: Get the results for the 'improved' method:
new_method = ImprovedDemoMethod()
new_results = setting.apply(new_method)
# Compare the two results:
print(
f"\n\nComparison: DemoMethod vs ImprovedDemoMethod - (DomainIncrementalSLSetting, dataset=fashionmnist):"
)
print(base_results.summary())
print(new_results.summary())
exit()
if __name__ == "__main__":
# Example: Comparing two methods on the same setting:
from sequoia.settings import DomainIncrementalSLSetting
## 1. Create the Setting (same as in quick_demo.py)
setting = DomainIncrementalSLSetting(
dataset="fashionmnist", nb_tasks=5, monitor_training_performance=True
)
# setting = DomainIncrementalSLSetting.from_args()
# Get the results for the base method:
base_method = DemoMethod()
base_results = setting.apply(base_method)
# Get the results for the 'improved' method:
new_method = ImprovedDemoMethod()
new_results = setting.apply(new_method)
print(
f"\n\nComparison: DemoMethod vs ImprovedDemoMethod - (DomainIncrementalSLSetting, dataset=fashionmnist):"
)
print(base_results.summary())
print(new_results.summary())
exit()
##
## As a little bonus: Evaluate *both* methods on *ALL* their applicable
## settings, and aggregate the results in a nice LaTeX-formatted table.
##
from examples.demo_utils import compare_results, demo_all_settings
base_results = demo_all_settings(DemoMethod, datasets=["mnist", "fashionmnist"])
improved_results = demo_all_settings(
ImprovedDemoMethod,
datasets=["mnist", "fashionmnist"],
monitor_training_performance=True,
)
compare_results(
{
DemoMethod: base_results,
ImprovedDemoMethod: improved_results,
}
)
================================================
FILE: examples/basic/quick_demo_packnet.py
================================================
from sequoia.methods.packnet_method import PackNetMethod
from sequoia.settings.sl import TaskIncrementalSLSetting
if __name__ == "__main__":
setting = TaskIncrementalSLSetting(dataset="mnist", nb_tasks=2)
my_method = PackNetMethod()
results = setting.apply(my_method)
================================================
FILE: examples/basic/quick_demo_test.py
================================================
""" TODO: Write tests that check that the examples are working correctly.
"""
import contextlib
import sys
import pytest
from examples.basic.quick_demo import demo_command_line, demo_simple
from sequoia.settings import ClassIncrementalSetting, Results
@pytest.mark.timeout(120)
def test_quick_demo(monkeypatch):
"""Test that runs the quick demo and checks that the results correspond to
what you'd expect.
"""
results: ClassIncrementalSetting.Results = None
summary_method = ClassIncrementalSetting.Results.summary
def summary(self: ClassIncrementalSetting.Results):
nonlocal results
results = self
return summary_method(self)
monkeypatch.setattr(ClassIncrementalSetting.Results, "summary", summary)
demo_simple()
from sequoia.common.metrics import ClassificationMetrics
# NOTE: Results aren't going to give *exactly* the same results, so we can't
# test like this directly:
# assert results.average_metrics_per_task == [
# ClassificationMetrics(n_samples=1984, accuracy=0.500504),
# ClassificationMetrics(n_samples=2016, accuracy=0.499504),
# ClassificationMetrics(n_samples=1984, accuracy=0.817036),
# ClassificationMetrics(n_samples=2016, accuracy=0.835317),
# ClassificationMetrics(n_samples=1984, accuracy=0.99748),
# ]
assert results.final_performance_metrics[0].n_samples == 1984
assert results.final_performance_metrics[1].n_samples == 2016
assert results.final_performance_metrics[2].n_samples == 1984
assert results.final_performance_metrics[3].n_samples == 2016
assert results.final_performance_metrics[4].n_samples == 1984
assert 0.48 <= results.final_performance_metrics[0].accuracy <= 0.55
assert 0.48 <= results.final_performance_metrics[1].accuracy <= 0.70
assert 0.60 <= results.final_performance_metrics[2].accuracy <= 1.00
assert 0.70 <= results.final_performance_metrics[3].accuracy <= 1.00
assert 0.99 <= results.final_performance_metrics[4].accuracy <= 1.00
================================================
FILE: examples/clcomp21/README.md
================================================
## Example Submissions for CLVision Workshop
Examples in this folder are aimed at solving the supervised learning track of the competition.
Each example builds on top of the previous, in a manner that improves the overall performance you can expect on any given CL setting.
As such, it is recommended that you take a look at the examples in the following order:
0. [DummyMethod](dummy_method.py)
Non-parametric method that simply returns a random prediction for each observation.
1. [Simple Classifier](classifier.py):
Standard neural net classifier without any CL-related mechanism. Works in the SL track, but has very poor performance.
2. [Multi-Head / Task Inference Classifier](multihead_classifier.py):
Performs multi-head prediction, and a simple form of task inference. Gets better results that the example.
3. [CL Regularized Classifier](regularization_example.py):
Adds a simple CL regularization loss to the multihead classifier above.
## RL Examples:
For RL, you can take a look at these examples:
- [A2C Example](a2c_example.py):
Example where A2C is implemented from scratch as a Method for the RL track. The code for A2C was adapted from [this blogpost.](https://towardsdatascience.com/understanding-actor-critic-methods-931b97b6df3f)
- [SB3 Example](sb3_example.py):
Example of how we can extend an existing Method from Stable-Baselines3.
================================================
FILE: examples/clcomp21/__init__.py
================================================
================================================
FILE: examples/clcomp21/a2c_example.py
================================================
from argparse import Namespace
from dataclasses import dataclass
from pathlib import Path
from typing import Dict, List, Optional, Tuple
import gym
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from gym import spaces
from gym.spaces.utils import flatdim
# TODO: Migrate stuff to directly import simple-parsing's hparams module.
# from simple_parsing.helpers.hparams import HyperParameters
from simple_parsing import ArgumentParser
from torch import Tensor
from torch.distributions import Categorical
from sequoia.common.hparams import HyperParameters, log_uniform
from sequoia.common.spaces import Image
from sequoia.methods import Method
from sequoia.settings.rl import ActiveEnvironment, RLSetting
class ActorCritic(nn.Module):
def __init__(
self,
observation_space: gym.Space,
action_space: gym.Space,
hidden_size: int,
):
super().__init__()
self.observation_space = observation_space
# NOTE: See note below for why we don't use the task label portion of the space
# here.
self.num_inputs = flatdim(self.observation_space.x)
self.hidden_size = hidden_size
if not isinstance(action_space, spaces.Discrete):
raise NotImplementedError("This example only works with discrete action spaces.")
self.action_space = action_space
self.num_actions = self.action_space.n
if self.num_inputs < 100:
# If we have a reasonably-small input space, use an MLP architecture.
self.critic = nn.Sequential(
nn.Flatten(),
nn.Linear(self.num_inputs, self.hidden_size),
nn.ReLU(inplace=True),
nn.Linear(self.hidden_size, 1),
)
self.actor = nn.Sequential(
nn.Flatten(),
nn.Linear(self.num_inputs, self.hidden_size),
nn.ReLU(inplace=True),
nn.Linear(self.hidden_size, self.num_actions),
)
else:
assert isinstance(self.observation_space.x, Image)
channels = self.observation_space.x.channels
self.encoder = nn.Sequential(
nn.Conv2d(channels, 6, kernel_size=5, stride=1, padding=1, bias=False),
nn.BatchNorm2d(6),
nn.ReLU(inplace=True),
nn.Conv2d(6, 16, kernel_size=5, stride=1, padding=1, bias=False),
nn.BatchNorm2d(16),
nn.ReLU(inplace=True),
nn.Conv2d(16, 16, kernel_size=3, stride=1, padding=1, bias=False),
nn.BatchNorm2d(16),
nn.AdaptiveAvgPool2d(output_size=(8, 8)), # [16, 8, 8]
nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=0, bias=False),
nn.BatchNorm2d(32), # [32, 6, 6]
nn.ReLU(inplace=True),
nn.Conv2d(32, 32, kernel_size=3, stride=1, padding=0, bias=False),
nn.BatchNorm2d(32), # [32, 4, 4]
nn.Flatten(),
)
# NOTE: Here we share the encoder for both the actor and critic.
self.critic = nn.Sequential(
self.encoder,
nn.Linear(512, self.hidden_size),
nn.ReLU(inplace=True),
nn.Linear(self.hidden_size, 1),
)
self.actor = nn.Sequential(
self.encoder,
nn.Linear(512, self.hidden_size),
nn.ReLU(inplace=True),
nn.Linear(self.hidden_size, self.num_actions),
)
def forward(self, observation: RLSetting.Observations) -> Tuple[Tensor, Categorical]:
x = observation.x
state = torch.as_tensor(x, dtype=torch.float)
# NOTE: Here you could for instance concatenate the task labels onto the state
# to make the model multi-task! However if you target the IncrementalRLSetting
# or above, you might not have these task labels at test-time, so that would
# have to be taken into consideration (e.g. can't concat None to a Tensor)
# task_labels = observation.task_labels
x_space = self.observation_space.x
batched_inputs = state.ndim > len(x_space.shape)
if not batched_inputs:
# Add a batch dimension if necessary.
state = state.unsqueeze(0)
value = self.critic(state)
policy_logits = self.actor(state)
if not batched_inputs:
# Remove the batch dimension from the predictions if necessary.
value = value.squeeze(0)
policy_logits = policy_logits.squeeze(0)
policy_dist = Categorical(logits=policy_logits)
# policy_dist = F.relu(self.actor_linear1(state))
# policy_dist = F.softmax(self.actor_linear2(policy_dist), dim=1)
return value, policy_dist
class ExampleA2CMethod(Method, target_setting=RLSetting):
"""Example A2C method.
Most of the code here was taken from:
https://towardsdatascience.com/understanding-actor-critic-methods-931b97b6df3f
"""
@dataclass
class HParams(HyperParameters):
"""Hyper-Parameters of the model, as a dataclass.
Fields get command-line arguments with simple-parsing.
"""
# Hidden size (representation size).
hidden_size: int = 256
# Learning rate of the optimizer.
learning_rate: float = log_uniform(1e-6, 1e-2, default=3e-4)
# Discount factor
gamma: float = 0.99
# Coefficient for the entropy term in the loss formula.
entropy_term_coefficient: float = 0.001
# Maximum length of an episode, when desired. (Generally not needed).
max_episode_steps: Optional[int] = None
def __init__(self, hparams: HParams = None, render: bool = False):
self.hparams = hparams or self.HParams()
self.task: int = 0
self.plots_dir: Path = Path("plots")
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
self.render = render
def configure(self, setting: RLSetting):
self.actor_critic = ActorCritic(
observation_space=setting.observation_space,
action_space=setting.action_space,
hidden_size=self.hparams.hidden_size,
).to(self.device)
self.ac_optimizer = optim.Adam(
self.actor_critic.parameters(), lr=self.hparams.learning_rate
)
# If there is a limit on the number of steps per task, then observe that limit.
self.max_training_steps = setting.steps_per_phase
def fit(self, train_env: ActiveEnvironment, valid_env: ActiveEnvironment):
assert isinstance(train_env, gym.Env) # Just to illustrate that it's a gym Env.
# NOTE: This example only works if the environment isn't vectorized.
all_lengths: List[int] = []
average_lengths: List[float] = []
all_rewards: List[float] = []
episode = 0
total_steps = 0
while not train_env.is_closed() and total_steps < self.max_training_steps:
episode += 1
log_probs: List[Tensor] = []
values: List[Tensor] = []
rewards: List[Tensor] = []
entropy_term = 0
observation: RLSetting.Observations = train_env.reset()
# Convert numpy arrays in the observation into Tensors on the right device.
observation = observation.torch(device=self.device)
done = False
episode_steps = 0
while not done and total_steps < self.max_training_steps:
episode_steps += 1
value, policy_dist = self.actor_critic.forward(observation)
value = value.cpu().detach().numpy()
action = policy_dist.sample()
log_prob = policy_dist.log_prob(action)
entropy = policy_dist.entropy()
# NOTE: 'correct' thing to do would be to pass Actions objects of the
# right type. This is for future-proofing this Method so it can
# still function in the future if new settings are added.
action = RLSetting.Actions(y_pred=action.cpu().detach().numpy())
if self.render:
train_env.render()
new_observation: RLSetting.Observations
reward: RLSetting.Rewards
new_observation, reward, done, _ = train_env.step(action)
new_observation = new_observation.torch(device=self.device)
total_steps += 1
# Likewise, in order to support different future settings, we receive a
# Rewards object, which contains the reward value (the float when the
# env isn't batched.).
reward_value: float = reward.y
rewards.append(reward_value)
values.append(value)
log_probs.append(log_prob)
entropy_term += entropy
observation = new_observation
Qval, _ = self.actor_critic.forward(new_observation)
Qval = Qval.detach().cpu().numpy()
all_rewards.append(np.sum(rewards))
all_lengths.append(episode_steps)
average_lengths.append(np.mean(all_lengths[-10:]))
if episode % 10 == 0:
print(
f"step {total_steps}/{self.max_training_steps}, "
f"episode: {episode}, "
f"reward: {np.sum(rewards)}, "
f"total length: {episode_steps}, "
f"average length: {average_lengths[-1]} \n"
)
if total_steps >= self.max_training_steps:
print(f"Reached the limit of {self.max_training_steps} steps.")
break
# compute Q values
Q_values = np.zeros_like(values)
# Use the last value from the critic as the final value estimate.
q_value = Qval
for t, reward in reversed(list(enumerate(rewards))):
q_value = reward + self.hparams.gamma * q_value
Q_values[t] = q_value
# update actor critic
values = torch.as_tensor(values, dtype=torch.float, device=self.device)
Q_values = torch.as_tensor(Q_values, dtype=torch.float, device=self.device)
log_probs = torch.stack(log_probs)
advantage = Q_values - values
actor_loss = (-log_probs * advantage).mean()
critic_loss = 0.5 * advantage.pow(2).mean()
ac_loss = (
actor_loss + critic_loss + self.hparams.entropy_term_coefficient * entropy_term
)
self.ac_optimizer.zero_grad()
ac_loss.backward()
self.ac_optimizer.step()
# Plot results
smoothed_rewards = pd.Series.rolling(pd.Series(all_rewards), 10).mean()
smoothed_rewards = [elem for elem in smoothed_rewards]
plt.plot(all_rewards)
plt.plot(smoothed_rewards)
plt.plot()
plt.xlabel("Episode")
plt.ylabel("Reward")
self.plots_dir.mkdir(parents=True, exist_ok=True)
plt.savefig(self.plots_dir / f"task_{self.task}_0.png")
# plt.show()
plt.plot(all_lengths)
plt.plot(average_lengths)
plt.xlabel("Episode")
plt.ylabel("Episode length")
plt.savefig(self.plots_dir / f"task_{self.task}_1.png")
# plt.show()
def get_actions(
self, observations: RLSetting.Observations, action_space: gym.Space
) -> RLSetting.Actions:
# Move the observations to the right device, converting numpy arrays to tensors.
observations = observations.torch(device=self.device)
value, action_dist = self.actor_critic(observations)
return RLSetting.Actions(y_pred=action_dist.sample())
# The methods below aren't required, but are good to add.
def on_task_switch(self, task_id: Optional[int]) -> None:
"""Called by the Setting when switching between tasks.
Parameters
----------
task_id : Optional[int]
the id of the new task. When None, we are
basically being informed that there is a task boundary, but without
knowing what task we're switching to.
"""
if isinstance(task_id, int):
self.task = task_id
@classmethod
def add_argparse_args(cls, parser: ArgumentParser):
parser.add_arguments(cls.HParams, dest="hparams")
@classmethod
def from_argparse_args(cls, args: Namespace):
hparams: ExampleA2CMethod.HParams = args.hparams
return cls(hparams=hparams)
def get_search_space(self, setting: RLSetting) -> Dict:
return self.hparams.get_orion_space()
def adapt_to_new_hparams(self, new_hparams: Dict) -> None:
self.hparams = self.HParams.from_dict(new_hparams)
if __name__ == "__main__":
# Create the Setting.
# CartPole for debugging:
from sequoia.settings.rl import TraditionalRLSetting
setting = TraditionalRLSetting(dataset="CartPole-v0", nb_tasks=1, train_max_steps=10_000)
# OR: Incremental CartPole:
from sequoia.settings.rl import IncrementalRLSetting
setting = IncrementalRLSetting(dataset="CartPole-v0", nb_tasks=5, train_steps_per_task=10_000)
# OR: Setting of the RL Track of the competition:
# setting = IncrementalRLSetting.load_benchmark("rl_track")
# Create the Method:
method = ExampleA2CMethod(render=True)
# Apply the Method onto the Setting to get Results.
results = setting.apply(method)
print(results.summary())
# BONUS: Running a hyper-parameter sweep:
# method.hparam_sweep(setting)
================================================
FILE: examples/clcomp21/a2c_example_test.py
================================================
import pytest
from sequoia.client.setting_proxy import SettingProxy
from sequoia.conftest import slow
from sequoia.settings.rl import IncrementalRLSetting, RLSetting
from sequoia.settings.sl import ClassIncrementalSetting
from .a2c_example import ExampleA2CMethod
from .dummy_method import DummyMethod
@slow
@pytest.mark.timeout(120)
def test_cartpole_state(cartpole_state_setting: SettingProxy[RLSetting]):
"""Applies this Method to a simple cartpole-state setting."""
method = ExampleA2CMethod()
results = cartpole_state_setting.apply(method)
assert results.to_log_dict()
results: RLSetting.Results
# TODO: The example isn't actually performing that well! We should try to get
# something that can easily and reproducibly solve cartpole to 200, if possible.
# assert 150 < results.average_final_performance.mean_episode_length
# TODO: Increase this bound when performance is improved.
assert 5 < results.average_final_performance.mean_episode_length
@slow
@pytest.mark.timeout(120)
def test_incremental_cartpole_state(
incremental_cartpole_state_setting: SettingProxy[IncrementalRLSetting],
):
"""Applies this Method to the class-incremental mnist Setting."""
method = ExampleA2CMethod()
results = incremental_cartpole_state_setting.apply(method)
assert results.to_log_dict()
results: ClassIncrementalSetting.Results
# TODO: Increase this bound
assert 5 <= results.average_online_performance.objective
assert 5 <= results.average_final_performance.objective
@slow
@pytest.mark.timeout(300)
def test_RL_track(rl_track_setting: SettingProxy[IncrementalRLSetting]):
"""Applies this Method to the Setting of the sl track of the competition."""
method = DummyMethod()
results = rl_track_setting.apply(method)
assert results.to_log_dict()
# TODO: Add tests for having a different ordering of test tasks vs train tasks.
results: ClassIncrementalSetting.Results
online_perf = results.average_online_performance
# TODO: get an estimate of the upper bound of the random method on the RL track.
TODO = 1_000 # this is way too large.
assert 0 < online_perf.objective < TODO
final_perf = results.average_final_performance
assert 0 < final_perf.objective < TODO
================================================
FILE: examples/clcomp21/classifier.py
================================================
""" Example Method for the SL track: Uses a simple classifier, without any CL mechanism.
As you'd expect, this Method exhibits complete forgetting of all previous tasks.
You can use this model and method as a jumping off point for your own submission.
"""
from argparse import Namespace
from dataclasses import dataclass
from typing import ClassVar, Dict, List, Optional, Tuple, Type
import gym
import torch
import tqdm
from gym import spaces
from numpy import inf
from simple_parsing import ArgumentParser
from torch import Tensor, nn
from torch.optim.optimizer import Optimizer
from torchvision.models import ResNet, resnet18
from sequoia.common.hparams import HyperParameters, log_uniform
from sequoia.common.spaces import Image
from sequoia.methods import Method
from sequoia.settings import ClassIncrementalSetting
from sequoia.settings.sl import PassiveEnvironment
from sequoia.settings.sl.incremental import Actions, Environment, Observations, Rewards
@dataclass
class HParams(HyperParameters):
"""Hyper-parameters of the demo model."""
# Learning rate of the optimizer.
learning_rate: float = log_uniform(1e-6, 1e-2, default=0.001)
# L2 regularization coefficient.
weight_decay: float = log_uniform(1e-9, 1e-3, default=1e-6)
# Maximum number of training epochs per task.
max_epochs_per_task: int = 10
# Number of epochs with increasing validation loss after which we stop training.
early_stop_patience: int = 2
class Classifier(nn.Module):
"""Simple classification model without any CL-related mechanism.
This example model uses a resnet18 as the encoder, and a single output layer.
"""
HParams: ClassVar[Type[HParams]] = HParams
def __init__(
self,
observation_space: gym.Space,
action_space: gym.Space,
reward_space: gym.Space,
hparams: HParams = None,
):
super().__init__()
self.hparams = hparams or self.HParams()
image_space: Image = observation_space.x
# image_shape = image_space.shape
# This example is intended for classification / discrete action spaces.
assert isinstance(action_space, spaces.Discrete)
assert action_space == reward_space
self.n_classes = action_space.n
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
self.encoder, self.representations_size = self.create_encoder(image_space)
self.output = self.create_output_head()
self.loss = nn.CrossEntropyLoss()
def create_output_head(self) -> nn.Module:
return nn.Linear(self.representations_size, self.n_classes).to(self.device)
def configure_optimizers(self) -> Optimizer:
return torch.optim.Adam(
self.parameters(),
lr=self.hparams.learning_rate,
weight_decay=self.hparams.weight_decay,
)
def create_encoder(self, image_space: Image) -> Tuple[nn.Module, int]:
"""Create an encoder for the given image space.
Returns the encoder, as well as the size of the representations it will produce.
Parameters
----------
image_space : Image
A subclass of `gym.spaces.Box` for images. Represents the space the images
will come from during training and testing. Its attributes of interest
include `c`, `w`, `h`, `shape` and `dype`.
Returns
-------
Tuple[nn.Module, int]
The encoder to be used, (a nn.Module), as well as the size of the
representations it will produce.
Raises
------
NotImplementedError
If no encoder is available for the given image dimensions.
"""
if image_space.width == image_space.height == 28:
# Setup for mnist variants.
# (not part of the competition, but used for debugging below).
encoder = nn.Sequential(
nn.Conv2d(image_space.channels, 6, 5),
nn.ReLU(),
nn.MaxPool2d(2),
nn.Conv2d(6, 16, 5),
nn.ReLU(),
nn.MaxPool2d(2),
nn.Flatten(),
)
features = 256
elif image_space.width == image_space.height == 32:
# Synbols dataset: use a resnet18 by default.
resnet: ResNet = resnet18(pretrained=False)
features = resnet.fc.in_features
# Disable/Remove the last layer.
resnet.fc = nn.Sequential()
encoder = resnet
else:
raise NotImplementedError(
f"TODO: Add an encoder for the given image space {image_space}"
)
return encoder.to(self.device), features
def forward(self, observations: Observations) -> Tensor:
# NOTE: here we don't make use of the task labels.
observations = observations.to(self.device)
x = observations.x
task_labels = observations.task_labels
features = self.encoder(x)
logits = self.output(features)
return logits
def shared_step(
self, batch: Tuple[Observations, Optional[Rewards]], environment: Environment
) -> Tuple[Tensor, Dict]:
"""Shared step used for both training and validation.
Parameters
----------
batch : Tuple[Observations, Optional[Rewards]]
Batch containing Observations, and optional Rewards. When the Rewards are
None, it means that we'll need to provide the Environment with actions
before we can get the Rewards (e.g. image labels) back.
This happens for example when being applied in a Setting which cares about
sample efficiency or training performance, for example.
environment : Environment
The environment we're currently interacting with. Used to provide the
rewards when they aren't already part of the batch (as mentioned above).
Returns
-------
Tuple[Tensor, Dict]
The Loss tensor, and a dict of metrics to be logged.
"""
# Since we're training on a Passive environment, we will get both observations
# and rewards, unless we're being evaluated based on our training performance,
# in which case we will need to send actions to the environments before we can
# get the corresponding rewards (image labels).
observations: Observations = batch[0]
rewards: Optional[Rewards] = batch[1]
# Get the predictions:
logits = self(observations)
y_pred = logits.argmax(-1)
if rewards is None:
# If the rewards in the batch is None, it means we're expected to give
# actions before we can get rewards back from the environment.
rewards = environment.send(Actions(y_pred))
assert rewards is not None
image_labels = rewards.y.to(self.device)
loss = self.loss(logits, image_labels)
accuracy = (y_pred == image_labels).sum().float() / len(image_labels)
metrics_dict = {"accuracy": f"{accuracy.cpu().item():3.2%}"}
return loss, metrics_dict
class ExampleMethod(Method, target_setting=ClassIncrementalSetting):
"""Minimal example of a Method usable only in the SL track of the competition.
This method uses the ExampleModel, which is quite simple.
"""
ModelType: ClassVar[Type[Classifier]] = Classifier
def __init__(self, hparams: HParams = None):
self.hparams: HParams = hparams or HParams()
# We will create those when `configure` will be called, before training.
self.model: Classifier
self.optimizer: torch.optim.Optimizer
def configure(self, setting: ClassIncrementalSetting):
"""Called before the method is applied on a setting (before training).
You can use this to instantiate your model, for instance, since this is
where you get access to the observation & action spaces.
"""
self.model = self.ModelType(
observation_space=setting.observation_space,
action_space=setting.action_space,
reward_space=setting.reward_space,
)
self.optimizer = self.model.configure_optimizers()
def fit(self, train_env: PassiveEnvironment, valid_env: PassiveEnvironment):
"""Example train loop.
You can do whatever you want with train_env and valid_env here.
NOTE: In the Settings where task boundaries are known (in this case all
the supervised CL settings), this will be called once per task.
"""
# configure() will have been called by the setting before we get here.
best_val_loss = inf
best_epoch = 0
for epoch in range(self.hparams.max_epochs_per_task):
self.model.train()
print(f"Starting epoch {epoch}")
# Training loop:
with tqdm.tqdm(train_env) as train_pbar:
postfix = {}
train_pbar.set_description(f"Training Epoch {epoch}")
for i, batch in enumerate(train_pbar):
loss, metrics_dict = self.model.shared_step(batch, environment=train_env)
self.optimizer.zero_grad()
loss.backward()
self.optimizer.step()
postfix.update(metrics_dict)
train_pbar.set_postfix(postfix)
# Validation loop:
self.model.eval()
torch.set_grad_enabled(False)
with tqdm.tqdm(valid_env) as val_pbar:
postfix = {}
val_pbar.set_description(f"Validation Epoch {epoch}")
epoch_val_loss = 0.0
for i, batch in enumerate(val_pbar):
batch_val_loss, metrics_dict = self.model.shared_step(
batch, environment=valid_env
)
epoch_val_loss += batch_val_loss
postfix.update(metrics_dict, val_loss=epoch_val_loss)
val_pbar.set_postfix(postfix)
torch.set_grad_enabled(True)
if epoch_val_loss < best_val_loss:
best_val_loss = epoch_val_loss
best_epoch = epoch
if epoch - best_epoch > self.hparams.early_stop_patience:
print(f"Early stopping at epoch {i}.")
# NOTE: You should probably reload the model weights as they were at the
# best epoch.
break
def get_actions(self, observations: Observations, action_space: gym.Space) -> Actions:
"""Get a batch of predictions (aka actions) for these observations."""
with torch.no_grad():
logits = self.model(observations)
# Get the predicted classes
y_pred = logits.argmax(dim=-1)
return self.target_setting.Actions(y_pred)
@classmethod
def add_argparse_args(cls, parser: ArgumentParser):
"""Adds command-line arguments for this Method to an argument parser."""
parser.add_arguments(cls.ModelType.HParams, "hparams")
@classmethod
def from_argparse_args(cls, args: Namespace):
"""Creates an instance of this Method from the parsed arguments."""
hparams: Classifier.HParams = args.hparams
return cls(hparams=hparams)
if __name__ == "__main__":
# Create the Method:
# - Manually:
# method = ExampleMethod()
# - From the command-line:
from simple_parsing import ArgumentParser
from sequoia.common import Config
from sequoia.settings import ClassIncrementalSetting
parser = ArgumentParser()
ExampleMethod.add_argparse_args(parser)
args = parser.parse_args()
method = ExampleMethod.from_argparse_args(args)
# Create the Setting:
# - "Easy": Domain-Incremental MNIST Setting, useful for quick debugging, but
# beware that the action space is different than in class-incremental!
# (which is the type of Setting used in the SL track!)
# from sequoia.settings.sl.class_incremental.domain_incremental import DomainIncrementalSetting
# setting = DomainIncrementalSetting(
# dataset="mnist", nb_tasks=5, monitor_training_performance=True
# )
# - "Medium": Class-Incremental MNIST Setting, useful for quick debugging:
# setting = ClassIncrementalSetting(
# dataset="mnist",
# nb_tasks=5,
# monitor_training_performance=True,
# known_task_boundaries_at_test_time=False,
# batch_size=32,
# num_workers=4,
# )
# - "HARD": Class-Incremental Synbols, more challenging.
# NOTE: This Setting is very similar to the one used for the SL track of the
# competition.
setting = ClassIncrementalSetting(
dataset="synbols",
nb_tasks=12,
known_task_boundaries_at_test_time=False,
monitor_training_performance=True,
batch_size=32,
num_workers=4,
)
# NOTE: can also use pass a `Config` object to `setting.apply`. This object has some
# configuration options like device, data_dir, etc.
results = setting.apply(method, config=Config(data_dir="data"))
print(results.summary())
================================================
FILE: examples/clcomp21/classifier_test.py
================================================
import pytest
from sequoia.client.setting_proxy import SettingProxy
from sequoia.conftest import slow
from sequoia.settings.sl import ClassIncrementalSetting
from .classifier import Classifier, ExampleMethod
@pytest.mark.timeout(120)
def test_mnist(mnist_setting: SettingProxy[ClassIncrementalSetting]):
"""Applies this Method to the class-incremental mnist Setting."""
method = ExampleMethod(hparams=Classifier.HParams(max_epochs_per_task=1))
results = mnist_setting.apply(method)
assert results.to_log_dict()
results: ClassIncrementalSetting.Results
assert 0.60 <= results.average_online_performance.objective <= 1.00
assert 0.10 <= results.average_final_performance.objective <= 0.30
@slow
@pytest.mark.timeout(300)
def test_SL_track(sl_track_setting: SettingProxy[ClassIncrementalSetting]):
"""Applies this Method to the Setting of the sl track of the competition."""
method = ExampleMethod(hparams=Classifier.HParams(max_epochs_per_task=1))
results = sl_track_setting.apply(method)
assert results.to_log_dict()
# TODO: Add tests for having a different ordering of test tasks vs train tasks.
results: ClassIncrementalSetting.Results
online_perf = results.average_online_performance
assert 0.15 <= online_perf.objective <= 0.30
final_perf = results.average_final_performance
assert 0.01 <= final_perf.objective <= 0.05
================================================
FILE: examples/clcomp21/conftest.py
================================================
import pytest
from sequoia.client.setting_proxy import SettingProxy
from sequoia.settings.rl import IncrementalRLSetting, TraditionalRLSetting
from sequoia.settings.sl import ClassIncrementalSetting, TaskIncrementalSLSetting
@pytest.fixture()
def mnist_setting():
return SettingProxy(
ClassIncrementalSetting,
dataset="mnist",
monitor_training_performance=True,
)
@pytest.fixture()
def task_incremental_mnist_setting():
return SettingProxy(
TaskIncrementalSLSetting,
dataset="mnist",
monitor_training_performance=True,
)
@pytest.fixture()
def fashion_mnist_setting():
return SettingProxy(
ClassIncrementalSetting,
dataset="fashionmnist",
monitor_training_performance=True,
)
@pytest.fixture()
def sl_track_setting():
setting = SettingProxy(
ClassIncrementalSetting,
"sl_track",
# dataset="synbols",
# nb_tasks=12,
# class_order=class_order,
# monitor_training_performance=True,
)
return setting
@pytest.fixture()
def cartpole_state_setting():
setting = SettingProxy(
TraditionalRLSetting,
dataset="cartpole",
train_max_steps=5_000,
test_max_steps=2_000,
nb_tasks=1,
)
return setting
@pytest.fixture()
def incremental_cartpole_state_setting():
setting = SettingProxy(
IncrementalRLSetting,
dataset="cartpole",
train_max_steps=10_000,
nb_tasks=2,
test_max_steps=2_000,
)
return setting
@pytest.fixture()
def rl_track_setting(tmp_path):
# NOTE: Here instead of loading the `rl_track.yaml`, we create instantiate it
# directly, because we want to reduce the length of the task for testing, and it
# isn't currently possible to both pass a preset yaml file and also pass kwargs to
# the SettingProxy.
setting = SettingProxy(
IncrementalRLSetting,
dataset="monsterkong",
train_task_schedule={
0: {"level": 0},
1: {"level": 1},
2: {"level": 10},
3: {"level": 11},
4: {"level": 20},
5: {"level": 21},
6: {"level": 30},
7: {"level": 31},
},
train_steps_per_task=2_000, # Reduced length for testing
test_steps_per_task=2_000,
task_labels_at_train_time=True,
)
assert setting.steps_per_phase == 2000
assert sorted(setting.train_task_schedule.keys()) == list(range(0, 16_000, 2000))
return setting
================================================
FILE: examples/clcomp21/dummy_method.py
================================================
from typing import Optional
import gym
import numpy as np
import tqdm
from torch import Tensor
from sequoia.methods import Method
from sequoia.settings import Actions, Environment, Observations, Setting
from sequoia.settings.sl import SLSetting
class DummyMethod(Method, target_setting=Setting):
"""Dummy method that returns random actions for each observation."""
def __init__(self):
self.max_train_episodes: Optional[int] = None
def configure(self, setting: Setting):
"""Called before the method is applied on a setting (before training).
You can use this to instantiate your model, for instance, since this is
where you get access to the observation & action spaces.
"""
if isinstance(setting, SLSetting):
# Being applied in SL, we will only do one 'epoch" (a.k.a. "episode").
self.max_train_episodes = 1
pass
def fit(self, train_env: Environment, valid_env: Environment):
"""Example train loop.
You can do whatever you want with train_env and valid_env here.
NOTE: In the Settings where task boundaries are known (in this case all
the supervised CL settings), this will be called once per task.
"""
# configure() will have been called by the setting before we get here.
episodes = 0
with tqdm.tqdm(desc="training") as train_pbar:
while not train_env.is_closed():
for i, batch in enumerate(train_env):
if isinstance(batch, Observations):
observations, rewards = batch, None
else:
observations, rewards = batch
batch_size = observations.x.shape[0]
y_pred = train_env.action_space.sample()
# If we're at the last batch, it might have a different size, so w
# give only the required number of values.
if isinstance(y_pred, (np.ndarray, Tensor)):
if y_pred.shape[0] != batch_size:
y_pred = y_pred[:batch_size]
if rewards is None:
rewards = train_env.send(y_pred)
train_pbar.set_postfix(
{
"Episode": episodes,
"Step": i,
}
)
# train as you usually would.
episodes += 1
if self.max_train_episodes and episodes >= self.max_train_episodes:
train_env.close()
break
def get_actions(self, observations: Observations, action_space: gym.Space) -> Actions:
"""Get a batch of predictions (aka actions) for these observations."""
y_pred = action_space.sample()
return self.target_setting.Actions(y_pred)
if __name__ == "__main__":
from sequoia.common import Config
from sequoia.settings import ClassIncrementalSetting
# Create the Method:
# - Manually:
method = DummyMethod()
# NOTE: This Setting is very similar to the one used for the SL track of the
# competition.
from sequoia.client import SettingProxy
setting = SettingProxy(ClassIncrementalSetting, "sl_track")
# setting = SettingProxy(ClassIncrementalSetting,
# dataset="synbols",
# nb_tasks=12,
# known_task_boundaries_at_test_time=False,
# monitor_training_performance=True,
# batch_size=32,
# num_workers=4,
# )
# NOTE: can also use pass a `Config` object to `setting.apply`. This object has some
# configuration options like device, data_dir, etc.
results = setting.apply(method, config=Config(data_dir="data"))
print(results.summary())
================================================
FILE: examples/clcomp21/dummy_method_test.py
================================================
import pytest
from sequoia.client.setting_proxy import SettingProxy
from sequoia.conftest import slow
from sequoia.settings.rl import IncrementalRLSetting
from sequoia.settings.sl import ClassIncrementalSetting
from .dummy_method import DummyMethod
@pytest.mark.timeout(120)
def test_mnist(mnist_setting: SettingProxy[ClassIncrementalSetting]):
"""Applies this Method to the class-incremental mnist Setting."""
method = DummyMethod()
results = mnist_setting.apply(method)
assert results.to_log_dict()
results: ClassIncrementalSetting.Results
assert 0.10 * 0.5 <= results.average_online_performance.objective <= 0.10 * 1.5
assert 0.10 * 0.5 <= results.average_final_performance.objective <= 0.10 * 1.5
@slow
@pytest.mark.timeout(300)
def test_SL_track(sl_track_setting: SettingProxy[ClassIncrementalSetting]):
"""Applies this Method to the Setting of the sl track of the competition."""
method = DummyMethod()
results = sl_track_setting.apply(method)
assert results.to_log_dict()
# TODO: Add tests for having a different ordering of test tasks vs train tasks.
results: ClassIncrementalSetting.Results
online_perf = results.average_online_performance
assert 0.02 <= online_perf.objective <= 0.05
final_perf = results.average_final_performance
assert 0.02 <= final_perf.objective <= 0.05
@slow
@pytest.mark.timeout(300)
def test_RL_track(rl_track_setting: SettingProxy[IncrementalRLSetting]):
"""Applies this Method to the Setting of the sl track of the competition."""
method = DummyMethod()
results = rl_track_setting.apply(method)
assert results.to_log_dict()
# TODO: Add tests for having a different ordering of test tasks vs train tasks.
results: ClassIncrementalSetting.Results
online_perf = results.average_online_performance
# TODO: get an estimate of the upper bound of the random method on the RL track.
TODO = 1_000 # this is way too large.
assert 0 < online_perf.objective < TODO
final_perf = results.average_final_performance
assert 0 < final_perf.objective < TODO
================================================
FILE: examples/clcomp21/multihead_classifier.py
================================================
""" Example Method for the SL track: Multi-Head Classifier with simple task inference.
You can use this model and method as a jumping off point for your own submission.
"""
from dataclasses import dataclass, replace
from logging import getLogger
from typing import ClassVar, Optional, Type
import torch
from gym import Space, spaces
from torch import Tensor, nn
from torch.nn import functional as F
from torch.optim.optimizer import Optimizer
from sequoia.settings.sl.incremental import ClassIncrementalSetting
from sequoia.settings.sl.incremental.objects import Observations
from .classifier import Classifier, ExampleMethod
logger = getLogger(__file__)
class MultiHeadClassifier(Classifier):
@dataclass
class HParams(Classifier.HParams):
pass
def __init__(
self,
observation_space: Space,
action_space: spaces.Discrete,
reward_space: spaces.Discrete,
hparams: "MultiHeadClassifier.HParams" = None,
):
super().__init__(observation_space, action_space, reward_space, hparams=hparams)
# Use one output layer per task, rather than a single layer.
self.output_heads = nn.ModuleList()
# Use the output layer created in the Classifier constructor for task 0.
self.output_heads.append(self.output)
# NOTE: The optimizer will be set here, so that we can add the parameters of any
# new output heads to it later.
self.optimizer: Optional[torch.optim.Optimizer] = None
self.current_task_id: int = 0
def configure_optimizers(self) -> Optimizer:
self.optimizer = super().configure_optimizers()
return self.optimizer
def create_output_head(self) -> nn.Module:
return nn.Linear(self.representations_size, self.n_classes).to(self.device)
def get_or_create_output_head(self, task_id: int) -> nn.Module:
"""Retrieves or creates a new output head for the given task index.
Also stores it in the `output_heads`, and adds its parameters to the
optimizer.
"""
task_output_head: nn.Module
if len(self.output_heads) > task_id:
task_output_head = self.output_heads[task_id]
else:
logger.info(f"Creating a new output head for task {task_id}.")
task_output_head = self.create_output_head()
self.output_heads.append(task_output_head)
assert self.optimizer, "need to set `optimizer` on the model."
self.optimizer.add_param_group({"params": task_output_head.parameters()})
return task_output_head
def forward(self, observations: Observations) -> Tensor:
"""Smart forward pass with multi-head predictions and task inference.
This forward pass can handle three different scenarios, depending on the
contents of `observations.task_labels`:
1. Base case: task labels are present, and all examples are from the same task.
- Perform the 'usual' forward pass (e.g. `super().forward(observations)`).
2. Task labels are present, and the batch contains a mix of samples from
different tasks:
- Create slices of the batch for each task, where all items in each
'sub-batch' come from the same task.
- Perform a forward pass for each task, by calling `forward` recursively
with the sub-batch for each task as an argument (Case 1).
3. Task labels are *not* present. Perform some type of task inference, using
the `task_inference_forward_pass` method. Check its docstring for more info.
Parameters
----------
observations : Observations
Observations from an environment. As of right now, all Settings produce
observations with (at least) the two following attributes:
- x: Tensor (the images/inputs)
- task_labels: Optional[Tensor] (The task labels, when available, else None)
Returns
-------
Tensor
The outputs, which in this case are the classification logits.
All three cases above produce the same kind of outputs.
"""
observations = observations.to(self.device)
task_ids: Optional[Tensor] = observations.task_labels
if task_ids is None:
# Run the forward pass with task inference turned on.
return self.task_inference_forward_pass(observations)
task_ids_present_in_batch = torch.unique(task_ids)
if len(task_ids_present_in_batch) > 1:
# Case 2: The batch contains data from more than one task.
return self.split_forward_pass(observations)
# Base case: "Normal" forward pass, where all items come from the same task.
# - Setup the model for this task, however you want, and then do a forward pass,
# as you normally would.
# NOTE: If you want to reuse this cool multi-headed forward pass in your
# own model, these lines here are what you'd want to change.
task_id: int = task_ids_present_in_batch.item()
# <--------------- Change below ---------------->
if task_id == self.current_task_id:
output_head = self.output
else:
output_head = self.get_or_create_output_head(task_id)
features = self.encoder(observations.x)
logits = output_head(features)
return logits
def split_forward_pass(self, observations: Observations) -> Tensor:
"""Perform a forward pass for a batch of observations from different tasks.
This is called in `forward` when there is more than one unique task label in the
batch.
This will call `forward` for each task id present in the batch, passing it a
slice of the batch, in which all items are from that task.
NOTE: This cannot cause recursion problems, because `forward`(d=2) will be
called with a bach of items, all of which come from the same task. This makes it
so `split_forward_pass` cannot then be called again.
Parameters
----------
observations : Observations
Observations, in which the task labels might not all be the same.
Returns
-------
Tensor
The outputs/logits from each task, re-assembled into a single batch, with
the task ordering from `observations` preserved.
"""
assert observations.task_labels is not None
# We have task labels.
task_labels: Tensor = observations.task_labels
unique_task_ids, inv_indices = torch.unique(task_labels, return_inverse=True)
# There might be more than one task in the batch.
batch_size = observations.batch_size
assert batch_size is not None
all_indices = torch.arange(batch_size, dtype=torch.int64, device=self.device)
# Placeholder for the predicitons for each item in the batch.
task_outputs = [None for _ in range(batch_size)]
for i, task_id in enumerate(unique_task_ids):
# Get the forward pass slice for this task.
# Boolean 'mask' tensor, that selects entries from task `task_id`.
is_from_this_task = inv_indices == i
# Indices of the batch elements that are from task `task_id`.
task_indices = all_indices[is_from_this_task]
# Take a slice of the observations, in which all items come from this task.
task_observations = observations[is_from_this_task]
# Perform a "normal" forward pass (Base case).
task_output = self.forward(task_observations)
# Store the outputs for the items from this task.
for i, index in enumerate(task_indices):
task_outputs[index] = task_output[i]
# Merge the results.
assert all(item is not None for item in task_outputs)
logits = torch.stack(task_outputs)
return logits
def task_inference_forward_pass(self, observations: Observations) -> Tensor:
"""Forward pass with a simple form of task inference."""
# We don't have access to task labels (`task_labels` is None).
# --> Perform a simple kind of task inference:
# 1. Perform a forward pass with each task's output head;
# 2. Merge these predictions into a single prediction somehow.
assert observations.task_labels is None
# NOTE: This assumes that the observations are batched.
# These are used below to indicate the shape of the different tensors.
B = observations.x.shape[0]
T = n_known_tasks = len(self.output_heads)
N = self.n_classes
# Tasks encountered previously and for which we have an output head.
known_task_ids: list[int] = list(range(n_known_tasks))
assert known_task_ids
# Placeholder for the predictions from each output head for each item in the
# batch
task_outputs = [None for _ in known_task_ids] # [T, B, N]
# Get the forward pass for each task.
for task_id in known_task_ids:
# Create 'fake' Observations for this forward pass, with 'fake' task labels.
# NOTE: We do this so we can call `self.forward` and not get an infinite
# recursion.
task_labels = torch.full([B], task_id, device=self.device, dtype=int)
task_observations = replace(observations, task_labels=task_labels)
# Setup the model for task `task_id`, and then do a forward pass.
task_logits = self.forward(task_observations)
task_outputs[task_id] = task_logits
# 'Merge' the predictions from each output head using some kind of task
# inference.
assert all(item is not None for item in task_outputs)
# Stack the predictions (logits) from each output head.
logits_from_each_head: Tensor = torch.stack(task_outputs, dim=1)
assert logits_from_each_head.shape == (B, T, N)
# Normalize the logits from each output head with softmax.
# Example with batch size of 1, output heads = 2, and classes = 4:
# logits from each head: [[[123, 456, 123, 123], [1, 1, 2, 1]]]
# 'probs' from each head: [[[0.1, 0.6, 0.1, 0.1], [0.2, 0.2, 0.4, 0.2]]]
probs_from_each_head = torch.softmax(logits_from_each_head, dim=-1)
assert probs_from_each_head.shape == (B, T, N)
# Simple kind of task inference:
# For each item in the batch, use the class that has the highest probability
# accross all output heads.
max_probs_across_heads, chosen_head_per_class = probs_from_each_head.max(dim=1)
assert max_probs_across_heads.shape == (B, N)
assert chosen_head_per_class.shape == (B, N)
# Example (continued):
# max probs across heads: [[0.2, 0.6, 0.4, 0.2]]
# chosen output heads per class: [[1, 0, 1, 1]]
# Determine which output head has highest "confidence":
max_prob_value, most_probable_class = max_probs_across_heads.max(dim=1)
assert max_prob_value.shape == (B,)
assert most_probable_class.shape == (B,)
# Example (continued):
# max_prob_value: [0.6]
# max_prob_class: [1]
# A bit of boolean trickery to get what we need, which is, for each item, the
# index of the output head that gave the most confident prediction.
mask = F.one_hot(most_probable_class, N).to(dtype=bool, device=self.device)
chosen_output_head_per_item = chosen_head_per_class[mask]
assert mask.shape == (B, N)
assert chosen_output_head_per_item.shape == (B,)
# Example (continued):
# mask: [[False, True, False, True]]
# chosen_output_head_per_item: [0]
# Create a bool tensor to select items associated with the chosen output head.
selected_mask = F.one_hot(chosen_output_head_per_item, T).to(dtype=bool, device=self.device)
assert selected_mask.shape == (B, T)
# Select the logits using the mask:
logits = logits_from_each_head[selected_mask]
assert logits.shape == (B, N)
return logits
def on_task_switch(self, task_id: Optional[int]):
"""Executed when the task switches (to either a known or unknown task)."""
if task_id is not None:
# Switch the output head.
self.current_task_id = task_id
self.output = self.get_or_create_output_head(task_id)
class ExampleTaskInferenceMethod(ExampleMethod):
ModelType: ClassVar[Type[Classifier]] = MultiHeadClassifier
def __init__(self, hparams: MultiHeadClassifier.HParams = None):
super().__init__(hparams=hparams or MultiHeadClassifier.HParams())
self.hparams: MultiHeadClassifier.HParams
def configure(self, setting: ClassIncrementalSetting):
"""Called before the method is applied on a setting (before training).
You can use this to instantiate your model, for instance, since this is
where you get access to the observation & action spaces.
"""
self.model = MultiHeadClassifier(
observation_space=setting.observation_space,
action_space=setting.action_space,
reward_space=setting.reward_space,
hparams=self.hparams,
)
self.optimizer = self.model.configure_optimizers()
# Share a reference to the Optimizer with the model, so it can add new weights
# when needed.
self.model.optimizer = self.optimizer
def on_task_switch(self, task_id: Optional[int]):
self.model.on_task_switch(task_id)
def get_actions(self, observations, action_space):
return super().get_actions(observations, action_space)
if __name__ == "__main__":
# Create the Method, either manually:
# method = ExampleTaskInferenceMethod()
# Or, from the command-line:
from simple_parsing import ArgumentParser
from sequoia.settings.sl.class_incremental import (
ClassIncrementalSetting,
TaskIncrementalSLSetting,
)
parser = ArgumentParser(description=__doc__)
ExampleTaskInferenceMethod.add_argparse_args(parser)
args = parser.parse_args()
method = ExampleTaskInferenceMethod.from_argparse_args(args)
# Create the Setting:
# Simpler Settings (useful for debugging):
# setting = TaskIncrementalSLSetting(
# setting = ClassIncrementalSetting(
# dataset="mnist",
# nb_tasks=5,
# monitor_training_performance=True,
# batch_size=32,
# num_workers=4,
# )
# Very similar setup to the SL Track of the competition:
setting = ClassIncrementalSetting(
dataset="synbols",
nb_tasks=12,
monitor_training_performance=True,
known_task_boundaries_at_test_time=False,
batch_size=32,
num_workers=4,
)
results = setting.apply(method)
================================================
FILE: examples/clcomp21/multihead_classifier_test.py
================================================
import pytest
from sequoia.client.setting_proxy import SettingProxy
from sequoia.conftest import slow
from sequoia.settings import ClassIncrementalSetting, TaskIncrementalSLSetting
from .multihead_classifier import ExampleTaskInferenceMethod, MultiHeadClassifier
@pytest.mark.timeout(120)
def test_task_incremental_mnist(
task_incremental_mnist_setting: SettingProxy[TaskIncrementalSLSetting],
):
"""Applies this Method to the class-incremental mnist Setting."""
mnist_setting = task_incremental_mnist_setting
method = ExampleTaskInferenceMethod(hparams=MultiHeadClassifier.HParams(max_epochs_per_task=1))
results = mnist_setting.apply(method)
assert results.to_log_dict()
results: ClassIncrementalSetting.Results
# There should be an improvement over the Method in `classifier.py`:
assert 0.80 <= results.average_online_performance.objective <= 1.00
assert 0.50 <= results.average_final_performance.objective <= 1.00
@pytest.mark.timeout(120)
def test_mnist(mnist_setting: SettingProxy[ClassIncrementalSetting]):
"""Applies this Method to the class-incremental mnist Setting."""
method = ExampleTaskInferenceMethod(hparams=MultiHeadClassifier.HParams(max_epochs_per_task=1))
results = mnist_setting.apply(method)
assert results.to_log_dict()
results: ClassIncrementalSetting.Results
# There should be an improvement over the Method in `classifier.py`:
assert 0.80 <= results.average_online_performance.objective <= 1.00
assert 0.50 <= results.average_final_performance.objective <= 1.00
@slow
@pytest.mark.timeout(600)
def test_SL_track(sl_track_setting: SettingProxy[ClassIncrementalSetting]):
"""Applies this Method to the Setting of the sl track of the competition."""
method = ExampleTaskInferenceMethod(hparams=MultiHeadClassifier.HParams(max_epochs_per_task=1))
results = sl_track_setting.apply(method)
assert results.to_log_dict()
# TODO: Add tests for having a different ordering of test tasks vs train tasks.
results: ClassIncrementalSetting.Results
assert 0.30 <= results.average_online_performance.objective <= 0.50
assert 0.02 <= results.average_final_performance.objective <= 0.05
================================================
FILE: examples/clcomp21/regularization_example.py
================================================
""" Example: Defines a new Method based on the ExampleMethod, adding an EWC-like loss to
help prevent the weights from changing too much between tasks.
"""
from copy import deepcopy
from dataclasses import dataclass
from typing import ClassVar, Dict, Optional, Tuple, Type
import gym
import torch
from torch import Tensor
from sequoia.common.hparams import uniform
from sequoia.settings import DomainIncrementalSLSetting
from sequoia.settings.sl.incremental.objects import Observations, Rewards
from sequoia.utils.utils import dict_intersection
from sequoia.utils.logging_utils import get_logger
from .multihead_classifier import ExampleTaskInferenceMethod, MultiHeadClassifier
logger = get_logger(__name__)
class RegularizedClassifier(MultiHeadClassifier):
"""Adds an ewc-like penalty to the base classifier, to prevent its weights from
shifting too much during training.
"""
@dataclass
class HParams(MultiHeadClassifier.HParams):
"""Hyperparameters of this improved method.
Adds the hyper-parameters related the 'ewc-like' regularization to those of the
ExampleMethod.
NOTE: These `uniform()` and `log_uniform` and `HyperParameters` are just there
to make it easier to run HPO sweeps for your Method, which isn't required for
the competition.
"""
# Coefficient of the ewc-like loss.
reg_coefficient: float = uniform(0.0, 10.0, default=1.0)
# Distance norm used in the regularization loss.
reg_p_norm: int = 2
def __init__(
self,
observation_space: gym.Space,
action_space: gym.Space,
reward_space: gym.Space,
hparams: "RegularizedClassifier.HParams" = None,
):
super().__init__(
observation_space,
action_space,
reward_space,
hparams=hparams,
)
self.reg_coefficient = self.hparams.reg_coefficient
self.reg_p_norm = self.hparams.reg_p_norm
self.previous_model_weights: Dict[str, Tensor] = {}
self._previous_task: Optional[int] = None
self._n_switches: int = 0
def shared_step(self, batch: Tuple[Observations, Rewards], *args, **kwargs):
base_loss, metrics = super().shared_step(batch, *args, **kwargs)
ewc_loss = self.reg_coefficient * self.ewc_loss()
metrics["ewc_loss"] = ewc_loss
return base_loss + ewc_loss, metrics
def on_task_switch(self, task_id: Optional[int]) -> None:
"""Executed when the task switches (to either a known or unknown task)."""
super().on_task_switch(task_id)
if self._previous_task is None and self._n_switches == 0:
logger.debug("Starting the first task, no EWC update.")
elif task_id is None or task_id != self._previous_task:
# NOTE: We also switch between unknown tasks.
logger.info(
f"Switching tasks: {self._previous_task} -> {task_id}: "
f"Updating the EWC 'anchor' weights."
)
self._previous_task = task_id
self.previous_model_weights.clear()
self.previous_model_weights.update(
deepcopy({k: v.detach() for k, v in self.named_parameters()})
)
self._n_switches += 1
def ewc_loss(self) -> Tensor:
"""Gets an 'ewc-like' regularization loss.
NOTE: This is a simplified version of EWC where the loss is the P-norm
between the current weights and the weights as they were on the begining
of the task.
"""
if self._previous_task is None:
# We're in the first task: do nothing.
return 0.0
old_weights: Dict[str, Tensor] = self.previous_model_weights
new_weights: Dict[str, Tensor] = dict(self.named_parameters())
loss = 0.0
for weight_name, (new_w, old_w) in dict_intersection(new_weights, old_weights):
loss += torch.dist(new_w, old_w.type_as(new_w), p=self.reg_p_norm)
return loss
class ExampleRegMethod(ExampleTaskInferenceMethod):
"""Improved version of the ExampleMethod that uses a `RegularizedClassifier`."""
HParams: ClassVar[Type[HParams]] = RegularizedClassifier.HParams
def __init__(self, hparams: HParams = None):
super().__init__(hparams=hparams or self.HParams.from_args())
def configure(self, setting: DomainIncrementalSLSetting):
# Use the improved model, with the added EWC-like term.
self.model = RegularizedClassifier(
observation_space=setting.observation_space,
action_space=setting.action_space,
reward_space=setting.reward_space,
hparams=self.hparams,
)
self.optimizer = self.model.configure_optimizers()
def on_task_switch(self, task_id: Optional[int]):
self.model.on_task_switch(task_id)
if __name__ == "__main__":
# Create the Method:
# - Manually:
# method = ExampleRegMethod()
# - From the command-line:
from simple_parsing import ArgumentParser
from sequoia.common import Config
from sequoia.settings import ClassIncrementalSetting
parser = ArgumentParser()
ExampleRegMethod.add_argparse_args(parser)
args = parser.parse_args()
method = ExampleRegMethod.from_argparse_args(args)
# Create the Setting:
# - "Easy": Domain-Incremental MNIST Setting, useful for quick debugging, but
# beware that the action space is different than in class-incremental!
# (which is the type of Setting used in the SL track!)
# from sequoia.settings.sl.class_incremental.domain_incremental import DomainIncrementalSLSetting
# setting = DomainIncrementalSLSetting(
# dataset="mnist", nb_tasks=5, monitor_training_performance=True
# )
# - "Medium": Class-Incremental MNIST Setting, useful for quick debugging:
# setting = ClassIncrementalSetting(
# dataset="mnist",
# nb_tasks=5,
# monitor_training_performance=True,
# known_task_boundaries_at_test_time=False,
# batch_size=32,
# num_workes=4,
# )
# - "HARD": Class-Incremental Synbols, more challenging.
# NOTE: This Setting is very similar to the one used for the SL track of the
# competition.
setting = ClassIncrementalSetting(
dataset="synbols",
nb_tasks=12,
known_task_boundaries_at_test_time=False,
monitor_training_performance=True,
batch_size=32,
num_workers=4,
)
# Run the experiment:
results = setting.apply(method, config=Config(debug=True, data_dir="./data"))
print(results.summary())
================================================
FILE: examples/clcomp21/regularization_example_test.py
================================================
import pytest
from sequoia.client.setting_proxy import SettingProxy
from sequoia.conftest import slow
from sequoia.settings import ClassIncrementalSetting
from .regularization_example import ExampleRegMethod, RegularizedClassifier
@pytest.mark.timeout(120)
def test_mnist(mnist_setting: SettingProxy[ClassIncrementalSetting]):
"""Applies this Method to the class-incremental mnist Setting."""
method = ExampleRegMethod(hparams=RegularizedClassifier.HParams(max_epochs_per_task=1))
results = mnist_setting.apply(method)
assert results.to_log_dict()
results: ClassIncrementalSetting.Results
# There should be an improvement over the Method in `multihead_classifier.py`:
assert 0.80 <= results.average_online_performance.objective <= 1.00
assert 0.30 <= results.average_final_performance.objective <= 0.50
@slow
@pytest.mark.timeout(600)
def test_SL_track(sl_track_setting: SettingProxy[ClassIncrementalSetting]):
"""Applies this Method to the Setting of the sl track of the competition."""
method = ExampleRegMethod(hparams=RegularizedClassifier.HParams(max_epochs_per_task=1))
results = sl_track_setting.apply(method)
assert results.to_log_dict()
# TODO: Add tests for having a different ordering of test tasks vs train tasks.
results: ClassIncrementalSetting.Results
assert 0.30 <= results.average_online_performance.objective <= 0.50
assert 0.02 <= results.average_final_performance.objective <= 0.05
================================================
FILE: examples/clcomp21/sb3_example.py
================================================
""" Example where we start from a Method from stable-baselines3 to solve the rl track.
"""
from dataclasses import dataclass
from typing import ClassVar, Dict, Mapping, Optional, Type, Union
import gym
from gym import spaces
from simple_parsing import mutable_field
from sequoia.methods.stable_baselines3_methods.ppo import PPOMethod, PPOModel
from sequoia.settings.rl import ContinualRLSetting
# from stable_baselines3.ppo.policies import ActorCriticCnnPolicy, ActorCriticPolicy
class CustomPPOModel(PPOModel):
@dataclass
class HParams(PPOModel.HParams):
"""Hyper-parameters of the PPO Model."""
@dataclass
class CustomPPOMethod(PPOMethod):
Model: ClassVar[Type[PPOModel]] = PPOModel
# Hyper-parameters of the PPO Model.
hparams: PPOModel.HParams = mutable_field(PPOModel.HParams)
def configure(self, setting: ContinualRLSetting):
super().configure(setting=setting)
def create_model(self, train_env: gym.Env, valid_env: gym.Env) -> PPOModel:
return self.Model(env=train_env, **self.hparams.to_dict())
def fit(self, train_env: gym.Env, valid_env: gym.Env):
super().fit(train_env=train_env, valid_env=valid_env)
def get_actions(
self, observations: ContinualRLSetting.Observations, action_space: spaces.Space
) -> ContinualRLSetting.Actions:
return super().get_actions(
observations=observations,
action_space=action_space,
)
def on_task_switch(self, task_id: Optional[int]) -> None:
"""Called when switching tasks in a CL setting.
If task labels are available, `task_id` will correspond to the index of
the new task. Otherwise, if task labels aren't available, `task_id` will
be `None`.
todo: use this to customize how your method handles task transitions.
"""
def get_search_space(self, setting: ContinualRLSetting) -> Mapping[str, Union[str, Dict]]:
return super().get_search_space(setting)
if __name__ == "__main__":
# Create the Setting.
# CartPole-state for debugging:
from sequoia.settings.rl import RLSetting
setting = RLSetting(dataset="CartPole-v0")
# OR: Incremental CartPole-state:
from sequoia.settings.rl import IncrementalRLSetting
setting = IncrementalRLSetting(
dataset="CartPole-v0",
monitor_training_performance=True,
nb_tasks=1,
train_steps_per_task=1_000,
test_max_steps=2000,
)
# OR: Setting of the RL Track of the competition:
# setting = IncrementalRLSetting.load_benchmark("rl_track")
# Create the Method:
method = CustomPPOMethod()
# Apply the Method onto the Setting to get Results.
results = setting.apply(method)
print(results.summary())
# BONUS: Running a hyper-parameter sweep:
# method.hparam_sweep(setting)
================================================
FILE: examples/clcomp21/sb3_example_test.py
================================================
import pytest
from sequoia.client.setting_proxy import SettingProxy
from sequoia.conftest import slow
from sequoia.settings.rl import IncrementalRLSetting, RLSetting
from sequoia.settings.sl import ClassIncrementalSetting
from .sb3_example import CustomPPOMethod, CustomPPOModel
@pytest.mark.timeout(120)
def test_cartpole_state(cartpole_state_setting: SettingProxy[RLSetting]):
"""Applies this Method to a simple cartpole-state setting."""
method = CustomPPOMethod(hparams=CustomPPOModel.HParams(n_steps=64))
results = cartpole_state_setting.apply(method)
assert results.to_log_dict()
results: RLSetting.Results
# TODO: BUG: The SB3 method uses more than the number of steps allowed, probably
# while filling up its buffer.
assert 150 < results.average_final_performance.mean_episode_length
@pytest.mark.timeout(120)
def test_incremental_cartpole_state(
incremental_cartpole_state_setting: SettingProxy[IncrementalRLSetting],
):
"""Applies this Method to the class-incremental mnist Setting."""
method = CustomPPOMethod()
results = incremental_cartpole_state_setting.apply(method)
assert results.to_log_dict()
results: ClassIncrementalSetting.Results
# TODO: Increase this bound
assert 5 <= results.average_online_performance.objective
assert 5 <= results.average_final_performance.objective
@pytest.mark.timeout(300)
def test_RL_track(rl_track_setting: SettingProxy[IncrementalRLSetting]):
"""Applies this Method to the Setting of the sl track of the competition."""
method = CustomPPOMethod()
results = rl_track_setting.apply(method)
assert results.to_log_dict()
# TODO: Add tests for having a different ordering of test tasks vs train tasks.
results: ClassIncrementalSetting.Results
online_perf = results.average_online_performance
# TODO: get an estimate of the upper bound of the random method on the RL track.
assert 0 < online_perf.objective
final_perf = results.average_final_performance
assert 0 < final_perf.objective
================================================
FILE: examples/demo_utils.py
================================================
from collections import defaultdict
from pathlib import Path
from typing import Dict, List, Type
import pandas as pd
from simple_parsing import ArgumentParser
from sequoia.common.config import Config
from sequoia.settings import Method, Results, RLSetting, Setting, SLSetting
def demo_all_settings(
MethodType: Type[Method],
datasets: List[str] = ["mnist", "fashionmnist"],
**setting_kwargs,
):
"""Evaluates the given Method on all its applicable settings.
NOTE: Only evaluates on the mnist/fashion-mnist datasets for this demo.
"""
# Iterate over all the applicable evaluation settings, using the default
# options for each setting, and store the results inside this dictionary.
all_results: Dict[Type[Setting], Dict[str, Results]] = defaultdict(dict)
# Loop over all the types of settings this method is applicable on, i.e.
# all the nodes in the tree below its target Setting).
for setting_type in MethodType.get_applicable_settings():
# Loop over all the available dataset for each setting:
for dataset in setting_type.get_available_datasets():
if datasets and dataset not in datasets:
print(f"Skipping {setting_type} / {dataset} for now.")
continue
if issubclass(setting_type, RLSetting):
print(f"Skipping {setting_type} (not considering RL settings for this demo).")
continue
# 1. Create a Method of the provided type, so we start fresh every time.
method = MethodType()
# 2. Create the setting
setting = setting_type(dataset=dataset, **setting_kwargs)
# 3. Apply the method on the setting.
results: Results = setting.apply(method)
print(f"Results on setting {setting_type}, dataset {dataset}:")
print(results.summary())
# Save the results in the dict defined above.
all_results[setting_type][dataset] = results
# Create a pandas dataframe with all the results:
result_df: pd.DataFrame = make_result_dataframe(all_results)
csv_path = Path(f"examples/results/results_{method.get_name()}.csv")
csv_path.parent.mkdir(exist_ok=True, parents=True)
result_df.to_csv(csv_path)
print(f"Saved dataframe with results to path {csv_path}")
# BONUS: Display the results in a LaTeX-formatted table!
latex_table_path = Path(f"examples/results/table_{method.get_name()}.tex")
caption = f"Results for method {type(method).__name__} settings."
result_df.to_latex(
buf=latex_table_path,
caption=caption,
na_rep="N/A",
multicolumn=True,
)
print(f"Saved LaTeX table with results to path {latex_table_path}")
return all_results
def make_result_dataframe(all_results):
# Create a LaTeX table with all the results for all the settings.
import pandas as pd
all_settings: List[Type[Setting]] = list(all_results.keys())
all_setting_names: List[str] = [s.get_name() for s in all_settings]
all_datasets: List[str] = []
for setting, dataset_to_results in all_results.items():
all_datasets.extend(dataset_to_results.keys())
all_datasets = list(set(all_datasets))
## Create a multi-index for the dataframe.
# tuples = []
# for setting, dataset_to_results in all_results.items():
# setting_name = setting.get_name()
# tuples.extend((setting_name, dataset) for dataset in dataset_to_results.keys())
# tuples = sorted(list(set(tuples)))
# multi_index = pd.MultiIndex.from_tuples(tuples, names=["setting", "dataset"])
# single_index = pd.Index(["Objective"])
# df = pd.DataFrame(index=multi_index, columns=single_index)
df = pd.DataFrame(index=all_setting_names, columns=all_datasets)
for setting_type, dataset_to_results in all_results.items():
setting_name = setting_type.get_name()
for dataset, result in dataset_to_results.items():
# df["Objective"][setting_name, dataset] = result.objective
df[dataset][setting_name] = result.objective
return df
def compare_results(
all_results: Dict[Type[Method], Dict[Type[Setting], Dict[str, Results]]]
) -> None:
"""Helper function, compares the results of the different methods by
arranging them in a table (pandas dataframe).
"""
# Make one huge dictionary that maps from:
# >>
from .demo_utils import make_comparison_dataframe
comparison_df = make_comparison_dataframe(all_results)
print("----- All Results -------")
print(comparison_df)
csv_path = Path("examples/results/comparison.csv")
latex_path = Path("examples/results/table_comparison.tex")
comparison_df.to_csv(csv_path)
print(f"Saved dataframe with results to path {csv_path}")
caption = f"Comparison of different methods on their applicable settings."
comparison_df.to_latex(latex_path, caption=caption, multicolumn=False, multirow=False)
print(f"Saved LaTeX table with results to path {latex_path}")
def make_comparison_dataframe(
all_results: Dict[Type[Method], Dict[Type[Setting], Dict[str, Results]]]
) -> pd.DataFrame:
"""Helper function: takes in the dictionary with all the results and
re-arranges it into a pandas dataframe.
"""
# Get all the method names.
all_methods: List[Type[Method]] = list(all_results.keys())
all_method_names: List[str] = [m.get_name() for m in all_methods]
# Get all the setting names.
all_settings: List[Type[Setting]] = []
for method_class, setting_to_dataset_to_results in all_results.items():
all_settings.extend(setting_to_dataset_to_results.keys())
all_settings = list(set(all_settings))
all_setting_names: List[str] = [s.get_name() for s in all_settings]
# Get all the dataset names.
all_datasets: List[str] = []
for method_class, setting_to_dataset_to_results in all_results.items():
for setting, dataset_to_results in setting_to_dataset_to_results.items():
all_datasets.extend(dataset_to_results.keys())
all_datasets = list(set(all_datasets))
# Create the a multi-index, so we can later index df[setting, datset][method]
# Option 1: All [settings x all datasets]
# iterables = [all_setting_names, all_datasets]
# columns = pd.MultiIndex.from_product(iterables, names=["setting", "dataset"])
# Option 2: Index will be [Setting, ]
# Create the column index using the tuples that apply.
tuples = []
for method_class, setting_to_dataset_to_results in all_results.items():
for setting, dataset_to_results in setting_to_dataset_to_results.items():
setting_name = setting.get_name()
tuples.extend((setting_name, dataset) for dataset in dataset_to_results.keys())
tuples = sorted(list(set(tuples)))
multi_index = pd.MultiIndex.from_tuples(tuples, names=["setting", "dataset"])
single_index = pd.Index(all_method_names, name="Method")
df = pd.DataFrame(index=multi_index, columns=single_index)
for method_class, setting_to_dataset_to_results in all_results.items():
method_name = method_class.get_name()
for setting, dataset_to_results in setting_to_dataset_to_results.items():
setting_name = setting.get_name()
for dataset, result in dataset_to_results.items():
df[method_name][setting_name, dataset] = result.objective
return df
================================================
FILE: examples/prerequisites/dataclasses_example.py
================================================
""" Example describing dataclasses and how simple-parsing can be used to create
command-line arguments from them.
"""
from dataclasses import dataclass
@dataclass
class Point:
x: float = 1.2
y: float = 4.5
# This generates the following method (among others):
# def __init__(self, x: float = 1.2, y: float = 4.5):
# self.x = x
# self.y = y
if __name__ == "__main__":
p1 = Point(0, 0)
print(p1)
expected = "Point(x=0, y=0)"
#
# Second example: HyperParameters with simple-parsing:
#
from simple_parsing import ArgumentParser
from simple_parsing.helpers import choice
@dataclass
class HParams:
"""Hyper-Parameters of my model."""
# Learning rate.
learning_rate: float = 3e-4
# L2 regularization coefficient.
weight_decay: float = 1e-6
# Choice of optimizer
optimizer: str = choice("adam", "sgd", "rmsprop", default="sgd")
if __name__ == "__main__":
parser = ArgumentParser()
parser.add_arguments(HParams, "hparams")
parser.print_help()
import textwrap
expected += textwrap.dedent(
"""\
usage: dataclasses_example.py [-h] [--learning_rate float]
[--weight_decay float]
[--optimizer {adam,sgd,rmsprop}]
optional arguments:
-h, --help show this help message and exit
HParams ['hparams']:
Hyper-Parameters of my model.
--learning_rate float, --hparams.learning_rate float
Learning rate. (default: 0.0003)
--weight_decay float, --hparams.weight_decay float
L2 regularization coefficient. (default: 1e-06)
--optimizer {adam,sgd,rmsprop}, --hparams.optimizer {adam,sgd,rmsprop}
Choice of optimizer (default: sgd)
"""
)
args = parser.parse_args("")
hparams: HParams = args.hparams
print(hparams)
expected += """\
HParams(learning_rate=0.0003, weight_decay=1e-06, optimizer='sgd')
"""
================================================
FILE: mypy.ini
================================================
# Global options:
[mypy]
python_version = 3.7
warn_return_any = True
warn_unused_configs = True
follow_imports = normal
================================================
FILE: pytest.ini
================================================
[pytest]
timeout = 30
testpaths =
sequoia
examples
addopts =
--doctest-modules
norecursedirs =
methods/d3rlpy_methods
settings/offline_rl
examples/advances/procgen_example
================================================
FILE: requirements.txt
================================================
# Fork of gym with more flexible utility functions.
gym @ git+https://www.github.com/openai/gym@8819d561132082f6130d4a2388c68a963f41ec4f#egg=gym
# nngeometry module used in the EWC method
nngeometry @ git+https://github.com/oleksost/nngeometry.git#egg=nngeometry
# Temporary fix for issue#128
pyyaml!=5.4.*,>=5.1
simple_parsing==0.1.2.post1
# matplotlib==3.2.2
matplotlib
# NOTE: @lebrice: PyTorch suddenly got really picky about type annotations in 1.9.0 for
# some reason, and they really don't do a great job at evaluating them, so removing it
# for now.
torch==1.8.1
torchvision==0.9.1
scikit-learn
tqdm
continuum==1.0.19
# Only required for the current demo:
wandb
plotly
pandas
# Only for python < 3.8
singledispatchmethod;python_version<'3.8'
# NOTE: PyTorch-Lightning version 1.4.0 is "working" but raises lots of warnings.
pytorch-lightning==1.5.9
lightning-bolts==0.5.0
# Requirements for running tests:
pytest-timeout
pytest-xdist
pytest-xvfb # Prevents the gym popups from displaying during tests.
# Required for the RL methods
pyvirtualdisplay
# Required for the synbols dataset to work.
h5py
================================================
FILE: scripts/eai/cancel_all_queuing.sh
================================================
all_ids=$(eai job ls --state queuing -c "$1" --fields id --no-header)
for id in $all_ids
do
eai job kill $id
done
================================================
FILE: scripts/eai/cancel_all_running.sh
================================================
all_ids=$(eai job ls --state running -c "$1" --fields id --no-header)
for id in $all_ids
do
eai job kill $id
done
================================================
FILE: scripts/eai/job.sh
================================================
#!/bin/bash
set -o errexit # Used to exit upon error, avoiding cascading errors
set -o errtrace # Show error trace
set -o pipefail # Unveils hidden failures
# set -o nounset # Exposes unset variables
# Get organization name
ORG_NAME=$(eai organization get --field name)
# Get account name
ACCOUNT_NAME=$(eai account get --field name)
ACCOUNT_ID=$ORG_NAME.$ACCOUNT_NAME
EAI_Registry=${EAI_Registry:-"registry.console.elementai.com/$ACCOUNT_ID"}
echo "Using registry $EAI_Registry"
CURRENT_BRANCH="`git branch --show-current`"
BRANCH=${BRANCH:-$CURRENT_BRANCH}
export WANDB_API_KEY=${WANDB_API_KEY?"Need to pass the wandb api key or have it set in the environment variables."}
echo "Building eai-specific container for branch $BRANCH"
if [ "$NO_BUILD" ]; then
echo "skipping build."
else
echo "building"
# TODO: There is something wrong here: How can they possibly build their job, if
# they don't have the eai dockerfile?
source dockers/eai/build.sh
fi
# The image we're using is going to be called sequoai_eai:$BRANCH, and will have been
# pushed to the user's eai registry.
eai job submit \
--restartable \
--data $ACCOUNT_ID.home:/mnt/home \
--data $ACCOUNT_ID.data:/mnt/data \
--data $ACCOUNT_ID.results:/mnt/results \
--env WANDB_API_KEY="$WANDB_API_KEY" \
--env HOME=/home/toolkit \
--image $EAI_Registry/sequoia_eai:$BRANCH \
--gpu 1 --cpu 8 --mem 12 \
-- "$@"
# eai job submit \
# --restartable \
# --data $ACCOUNT_ID.home:/mnt/home \
# --data $ACCOUNT_ID.data:/mnt/data \
# --data $ACCOUNT_ID.results:/mnt/results \
# --env WANDB_API_KEY="$WANDB_API_KEY" \
# --env HOME=/home/toolkit \
# --image $EAI_Registry/sequoia_eai:$BRANCH \
# --gpu 1 --cpu 8 --mem 12 --gpu-model-filter 12gb \
# -- "$@"
================================================
FILE: scripts/eai/rl_sweep.sh
================================================
#!/bin/bash
set -o errexit # Used to exit upon error, avoiding cascading errors
set -o errtrace # Show error trace
set -o pipefail # Unveils hidden failures
set -o nounset # Exposes unset variables
export WANDB_API_KEY=${WANDB_API_KEY?"Need to pass the wandb api key or have it set in the environment variables."}
source dockers/eai/build.sh
export NO_BUILD=1
# Number of runs per combination.
MAX_RUNS=20
PROJECT="crl_study"
SETTINGS=(
"continual_rl"
"discrete_task_agnostic_rl"
"incremental_rl"
"task_incremental_rl"
"multi_task_rl"
"traditional_rl"
)
METHODS=(
"ppo"
"a2c"
"dqn"
"ddpg"
"sac"
"td3"
"baseline"
"methods.ewc"
)
BENCHMARKS=(
"cartpole"
"monsterkong_mix"
"mountaincar_continuous"
)
# "half_cheetah"
for METHOD in "${METHODS[@]}"; do
for SETTING in "${SETTINGS[@]}"; do
for BENCHMARK in "${BENCHMARKS[@]}"; do
# Share the trials from different datasets, hopefully reusing something?
DATABASE_PATH="/mnt/home/${SETTING}_${METHOD}.pkl"
scripts/eai/job.sh sequoia_sweep \
--max_runs $MAX_RUNS --database_path $DATABASE_PATH \
--setting $SETTING --benchmark $BENCHMARK --project $PROJECT \
--method $METHOD \
"$@"
done
done
done
# source scripts/eai/job.sh sequoia_sweep --max_runs 20 --database_path /mnt/home/orion_db.pkl --setting class_incremental --dataset cifar10 --project csl_study --method baseline
# source scripts/eai/job.sh sequoia_sweep --max_runs 20 --database_path /mnt/home/orion_db.pkl --setting class_incremental --dataset cifar100 --project csl_study --nb_tasks 20 --method baseline
# source scripts/eai/job.sh sequoia_sweep --max_runs 20 --database_path /mnt/home/orion_db.pkl --setting class_incremental --dataset synbols --project csl_study --nb_tasks 12 --method baseline
================================================
FILE: scripts/eai/shell_job.sh
================================================
#!/bin/bash
set -o errexit # Used to exit upon error, avoiding cascading errors
set -o errtrace # Show error trace
# set -o pipefail # Unveils hidden failures
# set -o nounset # Exposes unset variables
# Get organization name
ORG_NAME=$(eai organization get --field name)
# Get account name
ACCOUNT_NAME=$(eai account get --field name)
ACCOUNT_ID=$ORG_NAME.$ACCOUNT_NAME
EAI_Registry=registry.console.elementai.com/$ACCOUNT_ID
CURRENT_BRANCH="`git branch --show-current`"
BRANCH=${BRANCH:-$CURRENT_BRANCH}
existing_interactive_job_id=`eai job ls --state alive --fields id,interactive | grep true | awk '{print $1}'`
if [ $existing_interactive_job_id ]; then
echo "Found existing interactive job, with id $existing_interactive_job_id"
eai job kill $existing_interactive_job_id
echo "Sleeping for 5 seconds, just to give the job a chance to change its status."
sleep 5
fi;
if [ "$NO_BUILD" ]; then
echo "skipping build."
else
echo "building"
# TODO: There is something wrong here: How can they possibly build their job, if
# they don't have the eai dockerfile?
source dockers/eai/build.sh
fi
# The image we're using is going to be called sequoai_eai:$BRANCH, and will have been
# pushed to the user's eai registry.
eai job submit \
--interactive \
--data $ACCOUNT_ID.home:/mnt/home \
--data $ACCOUNT_ID.data:/mnt/data \
--data $ACCOUNT_ID.results:/mnt/results \
--env WANDB_API_KEY="$WANDB_API_KEY" \
--env HOME=/home/toolkit \
--image $EAI_Registry/sequoia_eai:$BRANCH \
--gpu 1 --cpu 8 --mem 12 --gpu-model-filter 12gb
================================================
FILE: scripts/eai/sl_sweep.sh
================================================
#!/bin/bash
set -o errexit # Used to exit upon error, avoiding cascading errors
set -o errtrace # Show error trace
set -o pipefail # Unveils hidden failures
set -o nounset # Exposes unset variables
export WANDB_API_KEY=${WANDB_API_KEY?"Need to pass the wandb api key or have it set in the environment variables."}
source dockers/eai/build.sh
export NO_BUILD=1
# Number of runs per combination.
MAX_RUNS=20
PROJECT="csl_study"
SETTINGS=(
"continual_sl"
"discrete_task_agnostic_sl"
"incremental_sl"
"task_incremental_sl"
"multi_task_sl"
"traditional_sl"
)
METHODS=(
# "random_baseline"
"gdumb"
"agem"
"ar1"
"cwr_star"
"gem"
"lwf"
"replay"
"synaptic_intelligence"
"avalanche.ewc"
"baseline"
"methods.ewc"
"experience_replay"
"hat"
"pnn"
)
DATASETS=(
"synbols --nb_tasks 12"
"cifar10"
"cifar100 --nb_tasks 10"
"mnist"
)
for METHOD in "${METHODS[@]}"; do
for SETTING in "${SETTINGS[@]}"; do
for DATASET in "${DATASETS[@]}"; do
# Share the trials from different datasets, hopefully reusing something?
DABASE_PATH="/mnt/home/${SETTING}_${METHOD}.pkl"
scripts/eai/job.sh sequoia_sweep \
--max_runs $MAX_RUNS --database_path $DABASE_PATH \
--setting $SETTING --dataset $DATASET --project $PROJECT \
--method $METHOD --monitor_training_performance True \
"$@"
done
done
done
# source scripts/eai/job.sh sequoia_sweep --max_runs 20 --database_path /mnt/home/orion_db.pkl --setting class_incremental --dataset cifar10 --project csl_study --method baseline
# source scripts/eai/job.sh sequoia_sweep --max_runs 20 --database_path /mnt/home/orion_db.pkl --setting class_incremental --dataset cifar100 --project csl_study --nb_tasks 20 --method baseline
# source scripts/eai/job.sh sequoia_sweep --max_runs 20 --database_path /mnt/home/orion_db.pkl --setting class_incremental --dataset synbols --project csl_study --nb_tasks 12 --method baseline
================================================
FILE: scripts/slurm/launch_many_sweeps.sh
================================================
#!/bin/bash
set -o errexit # Used to exit upon error, avoiding cascading errors
set -o errtrace # Show error trace
set -o pipefail # Unveils hidden failures
set -o nounset # Exposes unset variables
export WANDB_API_KEY=${WANDB_API_KEY?"Need to pass the wandb api key or have it set in the environment variables."}
module load anaconda/3
conda activate sequoia
cd ~/Sequoia
pip install -e .[hpo,monsterkong]
# Number of runs per combination.
MAX_RUNS=20
PROJECT="csl_study"
SETTINGS=("class_incremental" "task_incremental" "multi_task" "iid")
METHODS=(
"gdumb" "random_baseline" "pnn" "agem"
"ar1" "cwr_star" "gem" "gdumb" "lwf" "replay" "synaptic_intelligence"
"avalanche.ewc" "methods.ewc" "experience_replay" "hat" "baseline"
)
DATASETS=(
"synbols --nb_tasks 12"
"cifar10"
"cifar100 --nb_tasks 10"
"mnist"
)
for METHOD in "${METHODS[@]}"; do
for SETTING in "${SETTINGS[@]}"; do
for DATASET in "${DATASETS[@]}"; do
# Share the trials from different datasets, hopefully reusing something?
DABASE_PATH="/mnt/home/${SETTING}_${METHOD}.pkl"
scripts/slurm/sweep.sh \
--max_runs $MAX_RUNS --database_path $DABASE_PATH \
--setting $SETTING --dataset $DATASET --project $PROJECT \
--WANDB_API_KEY $WANDB_API_KEY \
--method $METHOD \
"$@"
done
done
done
================================================
FILE: scripts/slurm/run.sh
================================================
#!/bin/bash
#SBATCH --array=0-3%2
#SBATCH --cpus-per-task=2
#SBATCH --gres=gpu:1
#SBATCH --mem=10GB
#SBATCH --time=11:59:00
module load anaconda/3
conda activate sequoia
cd ~/Sequoia
pip install -e .[hpo,monsterkong,avalanche]
sequoia --data_dir $SLURM_TMPDIR "$@"
================================================
FILE: scripts/slurm/sweep.sh
================================================
#!/bin/bash
#SBATCH --array=0-10%2
#SBATCH --cpus-per-task=2
#SBATCH --gres=gpu:1
#SBATCH --mem=10GB
#SBATCH --time=11:59:00
set -o errexit # Used to exit upon error, avoiding cascading errors
set -o errtrace # Show error trace
set -o pipefail # Unveils hidden failures
module load anaconda/3
conda activate sequoia
cd ~/Sequoia
# TODO: Set data_dir in Config to `DATA_DIR` as a priority, and then as SLURM_TMPDIR/DATA (not just SLURM_TMPDIR!)
cp -r data $SLURM_TMPDIR/
export DATA_DIR=$SLURM_TMPDIR/data
#pip install -e .[hpo,monsterkong,avalanche]
# TODO: Change the setting, the number of tasks, the method, etc.
/home/mila/n/normandf/.conda/envs/sequoia/bin/sequoia_sweep --data_dir $SLURM_TMPDIR/data "$@"
================================================
FILE: sequoia/README.md
================================================
# sequoia
## Packages:
- [settings](settings): definitions for the settings (machine learning problems).
- [methods](methods): Contains the methods (which can be applied to settings).
- [common](common): utilities such as metrics, transforms, layers, gym wrappers configuration classes, etc. that are used by Settings and Methods.
- [utils](utils): miscelaneous utility functions (logging, command-line parsing, etc)
- [experiments](experiments): Command-line interface entry-points, via the `Experiment` class.
- [client (wip)](client): defines a proxy to a Setting and its environments, in order to further isolate the Method and Setting from each other (used for the CLVision competition).
================================================
FILE: sequoia/__init__.py
================================================
""" Sequoia - The Research Tree """
from ._version import get_versions
from .settings import Environment, Method, Setting
# from .experiments import Experiment
__version__ = get_versions()["version"]
del get_versions
================================================
FILE: sequoia/_version.py
================================================
# This file helps to compute a version number in source trees obtained from
# git-archive tarball (such as those provided by githubs download-from-tag
# feature). Distribution tarballs (built by setup.py sdist) and build
# directories (produced by setup.py build) will contain a much shorter file
# that just contains the computed version number.
# This file is released into the public domain. Generated by
# versioneer-0.19 (https://github.com/python-versioneer/python-versioneer)
"""Git implementation of _version.py."""
import errno
import os
import re
import subprocess
import sys
def get_keywords():
"""Get the keywords needed to look up the version information."""
# these strings will be replaced by git during git-archive.
# setup.py/versioneer.py will grep for the variable names, so they must
# each be defined on a line of their own. _version.py will just call
# get_keywords().
git_refnames = "$Format:%d$"
git_full = "$Format:%H$"
git_date = "$Format:%ci$"
keywords = {"refnames": git_refnames, "full": git_full, "date": git_date}
return keywords
class VersioneerConfig:
"""Container for Versioneer configuration parameters."""
def get_config():
"""Create, populate and return the VersioneerConfig() object."""
# these strings are filled in when 'setup.py versioneer' creates
# _version.py
cfg = VersioneerConfig()
cfg.VCS = "git"
cfg.style = "pep440-post"
cfg.tag_prefix = "v"
cfg.parentdir_prefix = "sequoia-"
cfg.versionfile_source = "sequoia/_version.py"
cfg.verbose = False
return cfg
class NotThisMethod(Exception):
"""Exception raised if a method is not valid for the current scenario."""
LONG_VERSION_PY = {}
HANDLERS = {}
def register_vcs_handler(vcs, method): # decorator
"""Create decorator to mark a method as the handler of a VCS."""
def decorate(f):
"""Store f in HANDLERS[vcs][method]."""
if vcs not in HANDLERS:
HANDLERS[vcs] = {}
HANDLERS[vcs][method] = f
return f
return decorate
def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, env=None):
"""Call the given command(s)."""
assert isinstance(commands, list)
p = None
for c in commands:
try:
dispcmd = str([c] + args)
# remember shell=False, so use git.cmd on windows, not just git
p = subprocess.Popen(
[c] + args,
cwd=cwd,
env=env,
stdout=subprocess.PIPE,
stderr=(subprocess.PIPE if hide_stderr else None),
)
break
except EnvironmentError:
e = sys.exc_info()[1]
if e.errno == errno.ENOENT:
continue
if verbose:
print("unable to run %s" % dispcmd)
print(e)
return None, None
else:
if verbose:
print("unable to find command, tried %s" % (commands,))
return None, None
stdout = p.communicate()[0].strip().decode()
if p.returncode != 0:
if verbose:
print("unable to run %s (error)" % dispcmd)
print("stdout was %s" % stdout)
return None, p.returncode
return stdout, p.returncode
def versions_from_parentdir(parentdir_prefix, root, verbose):
"""Try to determine the version from the parent directory name.
Source tarballs conventionally unpack into a directory that includes both
the project name and a version string. We will also support searching up
two directory levels for an appropriately named parent directory
"""
rootdirs = []
for i in range(3):
dirname = os.path.basename(root)
if dirname.startswith(parentdir_prefix):
return {
"version": dirname[len(parentdir_prefix) :],
"full-revisionid": None,
"dirty": False,
"error": None,
"date": None,
}
else:
rootdirs.append(root)
root = os.path.dirname(root) # up a level
if verbose:
print(
"Tried directories %s but none started with prefix %s"
% (str(rootdirs), parentdir_prefix)
)
raise NotThisMethod("rootdir doesn't start with parentdir_prefix")
@register_vcs_handler("git", "get_keywords")
def git_get_keywords(versionfile_abs):
"""Extract version information from the given file."""
# the code embedded in _version.py can just fetch the value of these
# keywords. When used from setup.py, we don't want to import _version.py,
# so we do it with a regexp instead. This function is not used from
# _version.py.
keywords = {}
try:
f = open(versionfile_abs, "r")
for line in f.readlines():
if line.strip().startswith("git_refnames ="):
mo = re.search(r'=\s*"(.*)"', line)
if mo:
keywords["refnames"] = mo.group(1)
if line.strip().startswith("git_full ="):
mo = re.search(r'=\s*"(.*)"', line)
if mo:
keywords["full"] = mo.group(1)
if line.strip().startswith("git_date ="):
mo = re.search(r'=\s*"(.*)"', line)
if mo:
keywords["date"] = mo.group(1)
f.close()
except EnvironmentError:
pass
return keywords
@register_vcs_handler("git", "keywords")
def git_versions_from_keywords(keywords, tag_prefix, verbose):
"""Get version information from git keywords."""
if not keywords:
raise NotThisMethod("no keywords at all, weird")
date = keywords.get("date")
if date is not None:
# Use only the last line. Previous lines may contain GPG signature
# information.
date = date.splitlines()[-1]
# git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant
# datestamp. However we prefer "%ci" (which expands to an "ISO-8601
# -like" string, which we must then edit to make compliant), because
# it's been around since git-1.5.3, and it's too difficult to
# discover which version we're using, or to work around using an
# older one.
date = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
refnames = keywords["refnames"].strip()
if refnames.startswith("$Format"):
if verbose:
print("keywords are unexpanded, not using")
raise NotThisMethod("unexpanded keywords, not a git-archive tarball")
refs = set([r.strip() for r in refnames.strip("()").split(",")])
# starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
# just "foo-1.0". If we see a "tag: " prefix, prefer those.
TAG = "tag: "
tags = set([r[len(TAG) :] for r in refs if r.startswith(TAG)])
if not tags:
# Either we're using git < 1.8.3, or there really are no tags. We use
# a heuristic: assume all version tags have a digit. The old git %d
# expansion behaves like git log --decorate=short and strips out the
# refs/heads/ and refs/tags/ prefixes that would let us distinguish
# between branches and tags. By ignoring refnames without digits, we
# filter out many common branch names like "release" and
# "stabilization", as well as "HEAD" and "master".
tags = set([r for r in refs if re.search(r"\d", r)])
if verbose:
print("discarding '%s', no digits" % ",".join(refs - tags))
if verbose:
print("likely tags: %s" % ",".join(sorted(tags)))
for ref in sorted(tags):
# sorting will prefer e.g. "2.0" over "2.0rc1"
if ref.startswith(tag_prefix):
r = ref[len(tag_prefix) :]
if verbose:
print("picking %s" % r)
return {
"version": r,
"full-revisionid": keywords["full"].strip(),
"dirty": False,
"error": None,
"date": date,
}
# no suitable tags, so version is "0+unknown", but full hex is still there
if verbose:
print("no suitable tags, using unknown + full revision id")
return {
"version": "0+unknown",
"full-revisionid": keywords["full"].strip(),
"dirty": False,
"error": "no suitable tags",
"date": None,
}
@register_vcs_handler("git", "pieces_from_vcs")
def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command):
"""Get version from 'git describe' in the root of the source tree.
This only gets called if the git-archive 'subst' keywords were *not*
expanded, and _version.py hasn't already been rewritten with a short
version string, meaning we're inside a checked out source tree.
"""
GITS = ["git"]
if sys.platform == "win32":
GITS = ["git.cmd", "git.exe"]
out, rc = run_command(GITS, ["rev-parse", "--git-dir"], cwd=root, hide_stderr=True)
if rc != 0:
if verbose:
print("Directory %s not under git control" % root)
raise NotThisMethod("'git rev-parse --git-dir' returned error")
# if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty]
# if there isn't one, this yields HEX[-dirty] (no NUM)
describe_out, rc = run_command(
GITS,
["describe", "--tags", "--dirty", "--always", "--long", "--match", "%s*" % tag_prefix],
cwd=root,
)
# --long was added in git-1.5.5
if describe_out is None:
raise NotThisMethod("'git describe' failed")
describe_out = describe_out.strip()
full_out, rc = run_command(GITS, ["rev-parse", "HEAD"], cwd=root)
if full_out is None:
raise NotThisMethod("'git rev-parse' failed")
full_out = full_out.strip()
pieces = {}
pieces["long"] = full_out
pieces["short"] = full_out[:7] # maybe improved later
pieces["error"] = None
# parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty]
# TAG might have hyphens.
git_describe = describe_out
# look for -dirty suffix
dirty = git_describe.endswith("-dirty")
pieces["dirty"] = dirty
if dirty:
git_describe = git_describe[: git_describe.rindex("-dirty")]
# now we have TAG-NUM-gHEX or HEX
if "-" in git_describe:
# TAG-NUM-gHEX
mo = re.search(r"^(.+)-(\d+)-g([0-9a-f]+)$", git_describe)
if not mo:
# unparseable. Maybe git-describe is misbehaving?
pieces["error"] = "unable to parse git-describe output: '%s'" % describe_out
return pieces
# tag
full_tag = mo.group(1)
if not full_tag.startswith(tag_prefix):
if verbose:
fmt = "tag '%s' doesn't start with prefix '%s'"
print(fmt % (full_tag, tag_prefix))
pieces["error"] = "tag '%s' doesn't start with prefix '%s'" % (full_tag, tag_prefix)
return pieces
pieces["closest-tag"] = full_tag[len(tag_prefix) :]
# distance: number of commits since tag
pieces["distance"] = int(mo.group(2))
# commit: short hex revision ID
pieces["short"] = mo.group(3)
else:
# HEX: no tags
pieces["closest-tag"] = None
count_out, rc = run_command(GITS, ["rev-list", "HEAD", "--count"], cwd=root)
pieces["distance"] = int(count_out) # total number of commits
# commit date: see ISO-8601 comment in git_versions_from_keywords()
date = run_command(GITS, ["show", "-s", "--format=%ci", "HEAD"], cwd=root)[0].strip()
# Use only the last line. Previous lines may contain GPG signature
# information.
date = date.splitlines()[-1]
pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
return pieces
def plus_or_dot(pieces):
"""Return a + if we don't already have one, else return a ."""
if "+" in pieces.get("closest-tag", ""):
return "."
return "+"
def render_pep440(pieces):
"""Build up version string, with post-release "local version identifier".
Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you
get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty
Exceptions:
1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty]
"""
if pieces["closest-tag"]:
rendered = pieces["closest-tag"]
if pieces["distance"] or pieces["dirty"]:
rendered += plus_or_dot(pieces)
rendered += "%d.g%s" % (pieces["distance"], pieces["short"])
if pieces["dirty"]:
rendered += ".dirty"
else:
# exception #1
rendered = "0+untagged.%d.g%s" % (pieces["distance"], pieces["short"])
if pieces["dirty"]:
rendered += ".dirty"
return rendered
def render_pep440_pre(pieces):
"""TAG[.post0.devDISTANCE] -- No -dirty.
Exceptions:
1: no tags. 0.post0.devDISTANCE
"""
if pieces["closest-tag"]:
rendered = pieces["closest-tag"]
if pieces["distance"]:
rendered += ".post0.dev%d" % pieces["distance"]
else:
# exception #1
rendered = "0.post0.dev%d" % pieces["distance"]
return rendered
def render_pep440_post(pieces):
"""TAG[.postDISTANCE[.dev0]+gHEX] .
The ".dev0" means dirty. Note that .dev0 sorts backwards
(a dirty tree will appear "older" than the corresponding clean one),
but you shouldn't be releasing software with -dirty anyways.
Exceptions:
1: no tags. 0.postDISTANCE[.dev0]
"""
if pieces["closest-tag"]:
rendered = pieces["closest-tag"]
if pieces["distance"] or pieces["dirty"]:
rendered += ".post%d" % pieces["distance"]
if pieces["dirty"]:
rendered += ".dev0"
rendered += plus_or_dot(pieces)
rendered += "g%s" % pieces["short"]
else:
# exception #1
rendered = "0.post%d" % pieces["distance"]
if pieces["dirty"]:
rendered += ".dev0"
rendered += "+g%s" % pieces["short"]
return rendered
def render_pep440_old(pieces):
"""TAG[.postDISTANCE[.dev0]] .
The ".dev0" means dirty.
Exceptions:
1: no tags. 0.postDISTANCE[.dev0]
"""
if pieces["closest-tag"]:
rendered = pieces["closest-tag"]
if pieces["distance"] or pieces["dirty"]:
rendered += ".post%d" % pieces["distance"]
if pieces["dirty"]:
rendered += ".dev0"
else:
# exception #1
rendered = "0.post%d" % pieces["distance"]
if pieces["dirty"]:
rendered += ".dev0"
return rendered
def render_git_describe(pieces):
"""TAG[-DISTANCE-gHEX][-dirty].
Like 'git describe --tags --dirty --always'.
Exceptions:
1: no tags. HEX[-dirty] (note: no 'g' prefix)
"""
if pieces["closest-tag"]:
rendered = pieces["closest-tag"]
if pieces["distance"]:
rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
else:
# exception #1
rendered = pieces["short"]
if pieces["dirty"]:
rendered += "-dirty"
return rendered
def render_git_describe_long(pieces):
"""TAG-DISTANCE-gHEX[-dirty].
Like 'git describe --tags --dirty --always -long'.
The distance/hash is unconditional.
Exceptions:
1: no tags. HEX[-dirty] (note: no 'g' prefix)
"""
if pieces["closest-tag"]:
rendered = pieces["closest-tag"]
rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
else:
# exception #1
rendered = pieces["short"]
if pieces["dirty"]:
rendered += "-dirty"
return rendered
def render(pieces, style):
"""Render the given version pieces into the requested style."""
if pieces["error"]:
return {
"version": "unknown",
"full-revisionid": pieces.get("long"),
"dirty": None,
"error": pieces["error"],
"date": None,
}
if not style or style == "default":
style = "pep440" # the default
if style == "pep440":
rendered = render_pep440(pieces)
elif style == "pep440-pre":
rendered = render_pep440_pre(pieces)
elif style == "pep440-post":
rendered = render_pep440_post(pieces)
elif style == "pep440-old":
rendered = render_pep440_old(pieces)
elif style == "git-describe":
rendered = render_git_describe(pieces)
elif style == "git-describe-long":
rendered = render_git_describe_long(pieces)
else:
raise ValueError("unknown style '%s'" % style)
return {
"version": rendered,
"full-revisionid": pieces["long"],
"dirty": pieces["dirty"],
"error": None,
"date": pieces.get("date"),
}
def get_versions():
"""Get version information or return default if unable to do so."""
# I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have
# __file__, we can work backwards from there to the root. Some
# py2exe/bbfreeze/non-CPython implementations don't do __file__, in which
# case we can only use expanded keywords.
cfg = get_config()
verbose = cfg.verbose
try:
return git_versions_from_keywords(get_keywords(), cfg.tag_prefix, verbose)
except NotThisMethod:
pass
try:
root = os.path.realpath(__file__)
# versionfile_source is the relative path from the top of the source
# tree (where the .git directory might live) to this file. Invert
# this to find the root from __file__.
for i in cfg.versionfile_source.split("/"):
root = os.path.dirname(root)
except NameError:
return {
"version": "0+unknown",
"full-revisionid": None,
"dirty": None,
"error": "unable to find root of source tree",
"date": None,
}
try:
pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose)
return render(pieces, cfg.style)
except NotThisMethod:
pass
try:
if cfg.parentdir_prefix:
return versions_from_parentdir(cfg.parentdir_prefix, root, verbose)
except NotThisMethod:
pass
return {
"version": "0+unknown",
"full-revisionid": None,
"dirty": None,
"error": "unable to compute version",
"date": None,
}
================================================
FILE: sequoia/client/README.md
================================================
# (WIP) Sequoia Client
This is only currently used for the competition. The idea is that the setting (and its environments) are isolated from the user (the 'client'), in order to prevent any modifications / hacking of the environment.
================================================
FILE: sequoia/client/__init__.py
================================================
from .env_proxy import EnvironmentProxy
from .setting_proxy import SettingProxy
================================================
FILE: sequoia/client/__main__.py
================================================
""" TODO: launch the 'sequoia gRPC server' at a given address / port. """
import argparse
from .server import server
if __name__ == "__main__":
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument("--ip", type=str, help="gRPC host ip", default="")
parser.add_argument("-p", "--port", type=int, help="gRPC port", default=13337)
args = parser.parse_args()
server(
grpc_host=args.ip,
grpc_port=args.port,
)
================================================
FILE: sequoia/client/env.proto
================================================
syntax = "proto3";
// Adapted from https://github.com/AppliedDeepLearning/gymx/blob/master/gymx/env.proto
enum SettingType {
CLASS_INCREMENTAL = 0;
TASK_INCREMENTAL = 1;
CONTINUAL_RL = 2;
INCREMENTAL_RL = 3;
}
service Environment {
rpc Make (Name) returns (Info) {};
rpc Reset (Empty) returns (Observation) {};
rpc Step (Action) returns (Transition) {};
}
message Name {
string value = 1;
}
message Info {
repeated int32 observation_shape = 1;
int32 num_actions = 2;
int32 max_episode_steps = 3;
}
message Action {
int32 value = 1;
}
message Observation {
repeated float data = 1;
repeated int32 shape = 2;
}
message Transition {
Observation observation = 1;
float reward = 2;
Observation next_episode = 3;
}
message Empty {}
================================================
FILE: sequoia/client/env_proxy.py
================================================
"""TODO: Create an 'environment proxy' that relays observations / actions etc from a remote environment via gRPC.
For now this simply holds the 'remote' environment in memory.
"""
from typing import Any, Dict, List, Optional, Sequence, Tuple, Type, Union
import numpy as np
from torch import Tensor
from sequoia.common.metrics import Metrics
from sequoia.settings import (
Actions,
ActionType,
Environment,
Observations,
ObservationType,
Results,
Rewards,
RewardType,
Setting,
)
MISSING = object()
class EnvironmentProxy(Environment[ObservationType, ActionType, RewardType]):
def __init__(self, env_fn, setting_type: Type[Setting]):
# TODO: Actually interact with a given environment of the remote Setting
# TODO: env_fn is just a callable that returns the actual env now, but the idea
# is that it would perhaps be a handle/address/whatever which we could contact?
self.__environment = env_fn()
# TODO: Remove this if possible
self._environment_type = type(self.__environment)
self._setting_type = setting_type
self.observation_space = self.get_attribute("observation_space")
self.action_space = self.get_attribute("action_space")
# NOTE: We don't define the `reward_space` attribute if the underlying env
# doesnt have it.
missing = object()
reward_space = self.get_attribute("reward_space", default=missing)
if reward_space is not missing:
self.reward_space = reward_space
# TODO: Double check this also works for RL
batch_size = self.get_attribute("batch_size", default=missing)
if batch_size is not missing:
self.batch_size: Optional[int] = batch_size
def get_attribute(self, name: str, default: Any = MISSING) -> Any:
if default is MISSING:
# TODO: actually get the value from the 'remote' env.
return getattr(self.__environment, name)
else:
return getattr(self.__environment, name, default)
def reset(self) -> ObservationType:
obs = self.__environment.reset()
return obs
def __len__(self) -> int:
return self.__environment.__len__()
def step(
self, actions: ActionType
) -> Tuple[
ObservationType,
RewardType,
Union[bool, Sequence[bool]],
Union[Dict, Sequence[Dict]],
]:
# Simulate converting things to a pickleable object?
if isinstance(actions, Actions):
actions = actions.numpy()
actions_pkl = actions
# TODO: Use some kind of gRPC endpoint.
observations_pkl, rewards_pkl, done_pkl, info_pkl = self.__environment.step(actions_pkl)
if isinstance(observations_pkl, (Observations, dict)):
observations = self._setting_type.Observations(**observations_pkl)
else:
observations = observations_pkl
if isinstance(rewards_pkl, (Rewards, dict)):
rewards = self._setting_type.Rewards(**rewards_pkl)
else:
rewards = rewards_pkl
done = np.array(done_pkl)
info = np.array(info_pkl)
return observations, rewards, done, info
def __iter__(self):
return self.__environment.__iter__()
def __next__(self) -> ObservationType:
return self.__environment.__next__()
def send(self, actions: ActionType):
if isinstance(actions, Actions):
actions = actions.y_pred
if isinstance(actions, Tensor):
actions = actions.cpu().numpy()
actions_pkl = actions
rewards_pkl = self.__environment.send(actions_pkl)
if isinstance(rewards_pkl, (Rewards, dict)):
rewards = self._setting_type.Rewards(**rewards_pkl)
else:
rewards = rewards_pkl
return rewards
def close(self):
self.__environment.close()
@property
def is_closed(self) -> bool:
return self.get_attribute("is_closed")
def render(self, *args, **kwargs):
return self.__environment.render(*args, **kwargs)
def get_results(self) -> Results:
return self.__environment.get_results()
def get_online_performance(self) -> List[Metrics]:
return self.__environment.get_online_performance()
def get_average_online_performance(self) -> Metrics:
return self.__environment.get_average_online_performance()
def __getattr__(self, name: str):
if name.startswith("_"):
raise AttributeError(f"attempted to get missing private attribute '{name}'")
return self.get_attribute(name)
================================================
FILE: sequoia/client/env_proxy_test.py
================================================
import platform
from functools import partial
from typing import ClassVar, Iterable, Tuple, Type, TypeVar
import gym
import numpy as np
import psutil
import pytest
from torch import Tensor
from torchvision.datasets import MNIST
from sequoia.common.gym_wrappers.env_dataset import EnvDataset
from sequoia.common.gym_wrappers.env_dataset_test import TestEnvDataset as _TestEnvDataset
from sequoia.common.gym_wrappers.utils import is_proxy_to
from sequoia.common.spaces import Image
from sequoia.common.transforms import Compose, Transforms
from sequoia.settings.assumptions import IncrementalAssumption
from sequoia.settings.rl.continual.environment import GymDataLoader
from sequoia.settings.rl.continual.environment_test import TestGymDataLoader as _TestGymDataLoader
from sequoia.settings.sl.environment import PassiveEnvironment
from sequoia.settings.sl.environment_test import TestPassiveEnvironment as _TestPassiveEnvironment
from .env_proxy import EnvironmentProxy
# Note: import with underscores so we don't re-run those tests again.
EnvType = TypeVar("EnvType", bound=gym.Env, covariant=True)
def wrap_type_with_proxy(env_type: Type[EnvType]) -> EnvType:
class _EnvProxy(EnvironmentProxy):
def __init__(self, *args, **kwargs):
env_fn = partial(env_type, *args, **kwargs)
super().__init__(env_fn, setting_type=IncrementalAssumption)
return _EnvProxy
ProxyEnvDataset = wrap_type_with_proxy(EnvDataset)
ProxyPassiveEnvironment = wrap_type_with_proxy(PassiveEnvironment)
ProxyGymDataLoader = wrap_type_with_proxy(GymDataLoader)
class TestEnvironmentProxy(_TestEnvDataset, _TestPassiveEnvironment, _TestGymDataLoader):
# IDEA: Reuse the tests for the EnvDataset, but using a proxy to the environment
# instead.
EnvDataset: ClassVar[Type[EnvDataset]] = ProxyEnvDataset
# IDEA: Reuse the tests for the PassiveEnvironment, but using a proxy to the env.
PassiveEnvironment: ClassVar[Type[PassiveEnvironment]] = ProxyPassiveEnvironment
# Reuse the tests for the Gym DataLoader, using a proxy to the loader instead.
GymDataLoader: ClassVar[Type[GymDataLoader]] = ProxyGymDataLoader
def test_sanity_check():
env = ProxyEnvDataset(gym.make("CartPole-v0"))
assert isinstance(env, EnvironmentProxy)
assert issubclass(type(env), EnvironmentProxy)
@pytest.mark.parametrize("use_wrapper", [False, True])
def test_is_proxy_to(use_wrapper: bool):
import numpy as np
from sequoia.common.transforms import Compose, Transforms
transforms = Compose([Transforms.to_tensor, Transforms.three_channels])
from torchvision.datasets import MNIST
from sequoia.common.spaces import Image
batch_size = 32
dataset = MNIST("data", transform=transforms)
obs_space = Image(0, 255, (1, 28, 28), np.uint8)
obs_space = transforms(obs_space)
env_type = ProxyPassiveEnvironment if use_wrapper else PassiveEnvironment
env: Iterable[Tuple[Tensor, Tensor]] = env_type(
dataset,
batch_size=batch_size,
n_classes=10,
observation_space=obs_space,
)
if use_wrapper:
assert isinstance(env, EnvironmentProxy)
assert issubclass(type(env), EnvironmentProxy)
assert is_proxy_to(env, PassiveEnvironment)
else:
assert not is_proxy_to(env, PassiveEnvironment)
# TODO: Write a test that first reproduces issue #204 and then check that removing
# `self.__environment.reset()` from __iter__ fixed it.
@pytest.mark.skipif(
platform.system() != "Linux",
reason="Not sure this would work the same on non-Linux systems.",
)
def test_issue_204():
"""Test that reproduces the issue #204, which was that some zombie processes
appeared to be created when iterating using an EnvironmentProxy.
The issue appears to have been caused by calling `self.__environment.reset()` in
`__iter__`, which I think caused another dataloader iterator to be created?
"""
transforms = Compose([Transforms.to_tensor, Transforms.three_channels])
batch_size = 2048
num_workers = 12
dataset = MNIST("data", transform=transforms)
obs_space = Image(0, 255, (1, 28, 28), np.uint8)
obs_space = transforms(obs_space)
current_process = psutil.Process()
print(
f"Current process is using {current_process.num_threads()} threads, with "
f" {len(current_process.children(recursive=True))} child processes."
)
starting_threads = current_process.num_threads()
starting_processes = len(current_process.children(recursive=True))
for use_wrapper in [False, True]:
threads = current_process.num_threads()
processes = len(current_process.children(recursive=True))
assert threads == starting_threads
assert processes == starting_processes
env_type = ProxyPassiveEnvironment if use_wrapper else PassiveEnvironment
env: Iterable[Tuple[Tensor, Tensor]] = env_type(
dataset,
batch_size=batch_size,
n_classes=10,
observation_space=obs_space,
num_workers=num_workers,
persistent_workers=True,
)
for i, _ in enumerate(env):
threads = current_process.num_threads()
processes = len(current_process.children(recursive=True))
assert threads == starting_threads + num_workers
assert processes == starting_processes + num_workers
print(
f"Current process is using {threads} threads, with "
f" {processes} child processes."
)
for i, _ in enumerate(env):
threads = current_process.num_threads()
processes = len(current_process.children(recursive=True))
assert threads == starting_threads + num_workers
assert processes == starting_processes + num_workers
print(
f"Current process is using {threads} threads, with "
f" {processes} child processes."
)
obs = env.reset()
done = False
while not done:
obs, reward, done, info = env.step(env.action_space.sample())
# env.render(mode="human")
threads = current_process.num_threads()
processes = len(current_process.children(recursive=True))
if not done:
assert threads == starting_threads + num_workers
assert processes == starting_processes + num_workers
print(
f"Current process is using {threads} threads, with "
f" {processes} child processes."
)
env.close()
import time
# Need to give it a second (or so) to cleanup.
time.sleep(1)
threads = current_process.num_threads()
processes = len(current_process.children(recursive=True))
assert threads == starting_threads
assert processes == starting_processes
def test_interaction_with_test_environment():
# IDEA: Maybe write tests for the 'test' environments, and see that they work even
# through the proxy?
pass
================================================
FILE: sequoia/client/server.py
================================================
def server(grpc_host: str, grpc_port: int):
raise NotImplementedError(f"TODO")
================================================
FILE: sequoia/client/setting_proxy.py
================================================
import time
import warnings
from functools import partial
from logging import getLogger
from pathlib import Path
from typing import Any, Callable, Dict, Generic, List, Optional, Type, TypeVar
import gym
import numpy as np
from sequoia.common.config import Config
from sequoia.methods import Method
from sequoia.settings import ClassIncrementalSetting, IncrementalRLSetting, Results, Setting
from sequoia.settings.assumptions.incremental import IncrementalAssumption
from sequoia.settings.base import SettingABC
from .env_proxy import EnvironmentProxy
logger = getLogger(__file__)
# IDEA: Dict that indicates for each setting, which attributes are *NOT* writeable.
_readonly_attributes: Dict[Type[Setting], List[str]] = {
ClassIncrementalSetting: ["test_transforms"],
IncrementalRLSetting: ["test_transforms"],
}
# IDEA: Dict that indicates for each setting, which attributes are *NOT* readable.
_hidden_attributes: Dict[Type[Setting], List[str]] = {
ClassIncrementalSetting: ["test_class_order"],
IncrementalRLSetting: ["test_task_schedule", "test_wrappers"],
}
SettingType = TypeVar("SettingType", bound=Setting)
class SettingProxy(SettingABC, Generic[SettingType]):
"""Proxy for a Setting.
TODO: Creating the Setting locally for now, but we'd spin-up or contact a gRPC
service" that would have at least the following endpoints:
- get_attribute(name: str) -> Any:
returns the attribute from the setting, if that attribute can be read.
- set_attribute(name: str, value: Any) -> bool:
Sets the given attribute to the given value, if that is allowed.
- train_dataloader()
- val_dataloader()
- test_dataloader()
"""
# NOTE: Using __slots__ so we can detect errors if Method tries to set non-existent
# attribute on the SettingProxy.
# TODO: I don't think this has any effect, because we subclass SettingABC which
# doesn't use __slots__.
__slots__ = ["__setting", "_setting_type", "_train_env", "_val_env", "_test_env"]
def __init__(
self,
setting_type: Type[SettingType],
setting_config_path: Path = None,
**setting_kwargs,
):
self._setting_type = setting_type
self.__setting: SettingType
if setting_config_path:
self.__setting = setting_type.load_benchmark(setting_config_path)
if setting_kwargs:
raise RuntimeError(
"Can't use keyword arguments when passing a path to a yaml file!"
)
else:
self.__setting = setting_type(**setting_kwargs)
self.__setting.monitor_training_performance = True
super().__init__()
self._train_env = None
self._val_env = None
self._test_env = None
@property
def observation_space(self) -> gym.Space:
self.set_attribute("train_transforms", self.train_transforms)
return self.get_attribute("observation_space")
@property
def action_space(self) -> gym.Space:
return self.get_attribute("action_space")
@property
def reward_space(self) -> gym.Space:
return self.get_attribute("reward_space")
@property
def train_env(self) -> EnvironmentProxy:
return self._train_env
@property
def val_env(self) -> EnvironmentProxy:
return self._val_env
@property
def test_env(self) -> EnvironmentProxy:
if not self._is_readable("test_env"):
raise RuntimeError("You don't have access to the test_env attribute!")
return self._setting_type.test_env(self)
@test_env.setter
def test_env(self, value) -> None:
if not self._is_writeable("test_env"):
raise RuntimeError("You don't have access to the test_env attribute!")
self.__setting.test_env = value
def _temp_make_readable(self, attribute: str) -> None:
"""Temporarily makes an attribute readable."""
# if attribute in _hidden_attributes:
@property
def config(self) -> Config:
return self.get_attribute("config")
@config.setter
def config(self, value: Config) -> None:
self.set_attribute("config", value)
def prepare_data(self, *args, **kwargs):
self.__setting.prepare_data(*args, **kwargs)
def setup(self, stage: str = None):
self.__setting.setup(stage=stage)
def get_name(self):
return self.__setting.get_name()
def _is_readable(self, attribute: str) -> bool:
if self._setting_type in _hidden_attributes:
key = self._setting_type
else:
for parent_setting_type in self._setting_type.get_parents():
if parent_setting_type in _hidden_attributes:
key = parent_setting_type
break
else:
return True
return attribute not in _hidden_attributes[key]
def _is_writeable(self, attribute: str) -> bool:
if self._setting_type in _readonly_attributes:
key = self._setting_type
else:
for parent_setting_type in self._setting_type.get_parents():
if parent_setting_type in _readonly_attributes:
key = parent_setting_type
break
else:
return True
return attribute not in _readonly_attributes[key]
@property
def batch_size(self) -> Optional[int]:
return self.get_attribute("batch_size")
@batch_size.setter
def batch_size(self, value: Optional[int]) -> None:
self.set_attribute("batch_size", value)
@property
def train_transforms(self) -> List[Callable]:
return self.__setting.train_tansforms
@train_transforms.setter
def train_transforms(self, value: List[Callable]):
self.__setting.train_transforms = value
@property
def val_transforms(self) -> List[Callable]:
return self.__setting.val_tansforms
@val_transforms.setter
def val_transforms(self, value: List[Callable]):
self.__setting.val_transforms = value
@property
def test_transforms(self) -> List[Callable]:
return self.__setting.test_tansforms
@test_transforms.setter
def test_transforms(self, value: List[Callable]):
self.__setting.test_transforms = value
def apply(self, method: Method, config: Config = None) -> Results:
# TODO: Figure out where the 'config' should be defined?
method.configure(setting=self)
self.config = self._setup_config(method)
# TODO: Not sure if the method is changing the train_transforms.
# Run the Main loop.
self.Observations = self._setting_type.Observations
self.Actions = self._setting_type.Actions
self.Rewards = self._setting_type.Rewards
if hasattr(self._setting_type, "TestEnvironment"):
self.TestEnvironment = self._setting_type.TestEnvironment
# results = self._setting_type.apply(self, method, config=config)
results: Results = self.main_loop(method)
logger.info(f"Results objective: {results.objective}")
logger.info(results.summary())
method.receive_results(self, results=results)
return results
def get_attribute(self, name: str) -> Any:
value = getattr(self.__setting, name)
if value is None:
return value
if not isinstance(value, (int, str, bool, np.ndarray, gym.Space, list)):
warnings.warn(
RuntimeWarning(
f"TODO: Attribute {name} has a value of type {type(value)}, which "
f"wouldn't necessarily be easy to transfer with gRPC. This could "
f"mean that we need to implement this on the proxy itself. "
)
)
return value
def set_attribute(self, name: str, value: Any) -> None:
return setattr(self.__setting, name, value)
def train_dataloader(self, batch_size: int = None, num_workers: int = None) -> EnvironmentProxy:
# TODO: Faking this 'remote-ness' for now:
return EnvironmentProxy(
env_fn=partial(
self.__setting.train_dataloader,
batch_size=batch_size,
num_workers=num_workers,
),
setting_type=self._setting_type,
)
batch_size = batch_size if batch_size is not None else self.get_attribute("batch_size")
num_workers = num_workers if num_workers is not None else self.get_attribute("num_workers")
if self._train_env:
self._train_env.close()
del self._train_env
self._train_env = EnvironmentProxy(
env_fn=partial(
self.__setting.train_dataloader,
batch_size=batch_size,
num_workers=num_workers,
),
setting_type=self._setting_type,
)
return self._train_env
def val_dataloader(self, batch_size: int = None, num_workers: int = None) -> EnvironmentProxy:
return EnvironmentProxy(
env_fn=partial(
self.__setting.val_dataloader,
batch_size=batch_size,
num_workers=num_workers,
),
setting_type=self._setting_type,
)
if self._val_env:
self._val_env.close()
del self._val_env
self._val_env = EnvironmentProxy(
env_fn=partial(
self._setting_type.val_dataloader,
self,
batch_size=batch_size,
num_workers=num_workers,
),
setting_type=self._setting_type,
)
return self._val_env
def test_dataloader(self, batch_size: int = None, num_workers: int = None):
# TODO: Get the caller, and if it's 'internal' to sequoia then let it through.
# raise RuntimeError("You don't have access to the test_dataloader method!")
return EnvironmentProxy(
env_fn=partial(
self.__setting.test_dataloader,
batch_size=batch_size,
num_workers=num_workers,
),
setting_type=self._setting_type,
)
# return EnvironmentProxy(
# partial(self._setting_type.test_dataloader, self, batch_size=batch_size, num_workers=num_workers),
# setting_type=self._setting_type,
# )
def __test_dataloader(
self, batch_size: int = None, num_workers: int = None
) -> EnvironmentProxy:
batch_size = batch_size if batch_size is not None else self.get_attribute("batch_size")
num_workers = num_workers if num_workers is not None else self.get_attribute("num_workers")
if self._test_env:
self._test_env.close()
del self._test_env
self._test_env = EnvironmentProxy(
env_fn=partial(
self.__setting.test_dataloader,
batch_size=batch_size,
num_workers=num_workers,
),
setting_type=self._setting_type,
)
return self._test_env
def main_loop(self, method: Method) -> Results:
# TODO: Implement the 'remote' equivalent of the main loop of the IncrementalAssumption.
# test_results = self._setting_type.Results()
method.set_training()
dataset: str = self.get_attribute("dataset")
nb_tasks = self.get_attribute("nb_tasks")
known_task_boundaries_at_train_time: bool = self.get_attribute(
"known_task_boundaries_at_train_time"
)
task_labels_at_train_time: bool = self.get_attribute("task_labels_at_train_time")
# Send the train / val transforms to the 'remote' env.
self.set_attribute("train_transforms", self.train_transforms)
self.set_attribute("val_transforms", self.val_transforms)
self.Results = self._setting_type.Results
# TODO: Can we avoid duplicating the main loop here?
# test_results = self.__setting.main_loop(method)
# test_results._objective_scaling_factor = (
# 0.01 if dataset.startswith("MetaMonsterKong") else 1.0
# )
test_results = self._setting_type.main_loop(self, method=method)
start_time = time.process_time()
# for task_id in range(nb_tasks):
# logger.info(
# f"Starting training" + (f" on task {task_id}." if nb_tasks > 1 else ".")
# )
# self.set_attribute("_current_task_id", task_id)
# if known_task_boundaries_at_train_time:
# # Inform the model of a task boundary. If the task labels are
# # available, then also give the id of the new task to the
# # method.
# # TODO: Should we also inform the method of wether or not the
# # task switch is occuring during training or testing?
# if not hasattr(method, "on_task_switch"):
# logger.warning(
# UserWarning(
# f"On a task boundary, but since your method doesn't "
# f"have an `on_task_switch` method, it won't know about "
# f"it! "
# )
# )
# elif not task_labels_at_train_time:
# method.on_task_switch(None)
# else:
# # NOTE: on_task_switch won't be called if there is only one "task",
# # (as-in one task in a 'sequence' of tasks).
# # TODO: in multi-task RL, i.e. RLSetting(dataset=..., nb_tasks=10),
# # for instance, then there are indeed 10 tasks, but `self.tasks`
# # is used here to describe the number of 'phases' in training and
# # testing.
# if nb_tasks > 1:
# method.on_task_switch(task_id)
# task_train_loader = self.train_dataloader()
# task_valid_loader = self.val_dataloader()
# success = method.fit(
# train_env=task_train_loader, valid_env=task_valid_loader,
# )
# task_train_loader.close()
# task_valid_loader.close()
# test_results._online_training_performance.append(
# task_train_loader.get_online_performance()
# )
# test_loop_results = self.test_loop(method)
# test_results.append(test_loop_results)
# logger.info(f"Finished Training on task {task_id}.")
runtime = time.process_time() - start_time
test_results._runtime = runtime
return test_results
def test_loop(self, method: Method) -> "IncrementalAssumption.Results":
"""(WIP): Runs an incremental test loop and returns the Results.
The idea is that this loop should be exactly the same, regardless of if
you're on the RL or the CL side of the tree.
NOTE: If `self.known_task_boundaries_at_test_time` is `True` and the
method has the `on_task_switch` callback defined, then a callback
wrapper is added that will invoke the method's `on_task_switch` and pass
it the task id (or `None` if `not self.task_labels_available_at_test_time`)
when a task boundary is encountered.
This `on_task_switch` 'callback' wrapper gets added the same way for
Supervised or Reinforcement learning settings.
"""
nb_tasks = self.get_attribute("nb_tasks")
known_task_boundaries_at_test_time = self.get_attribute(
"known_task_boundaries_at_test_time"
)
# TODO: Always setting this to False for now.
task_labels_at_test_time = self.get_attribute("task_labels_at_test_time")
if task_labels_at_test_time:
warnings.warn(
RuntimeWarning("no task labels at test time for now when using a SettingProxy")
)
# TODO: Avoid duplicating the test loop here?
test_results = self.__setting.test_loop(method=method)
# was_training = method.training
# method.set_testing()
# test_env = self.__test_dataloader()
# if known_task_boundaries_at_test_time and nb_tasks > 1:
# # TODO: We need to have a way to inform the Method of task boundaries, if the
# # Setting allows it.
# # Not sure how to do this. It might be simpler to just do something like
# # `obs, rewards, done, info, task_switched = .step(actions)`?
# # # Add this wrapper that will call `on_task_switch` when the right step is
# # # reached.
# # test_env = StepCallbackWrapper(test_env, callbacks=[_on_task_switch])
# pass
# obs = test_env.reset()
# batch_size = test_env.batch_size
# max_steps: int = self.get_attribute("test_steps") // (batch_size or 1)
# # Reset on the last step is causing trouble, since the env is closed.
# pbar = tqdm.tqdm(itertools.count(), total=train_max_steps, desc="Test")
# episode = 0
# for step in pbar:
# if test_env.is_closed():
# logger.debug(f"Env is closed")
# break
# # BUG: This doesn't work if the env isn't batched.
# action_space = test_env.action_space
# batch_size = getattr(
# test_env, "num_envs", getattr(test_env, "batch_size", 0)
# )
# env_is_batched = batch_size is not None and batch_size >= 1
# if env_is_batched:
# # NOTE: Need to pass an action space that actually reflects the batch
# # size, even for the last batch!
# obs_batch_size = obs.x.shape[0] if obs.x.shape else None
# action_space_batch_size = (
# test_env.action_space.shape[0]
# if test_env.action_space.shape
# else None
# )
# if (
# obs_batch_size is not None
# and obs_batch_size != action_space_batch_size
# ):
# action_space = batch_space(
# test_env.single_action_space, obs_batch_size
# )
# action = method.get_actions(obs, action_space)
# # logger.debug(f"action: {action}")
# obs, reward, done, info = test_env.step(action)
# # TODO: Add something to `info` that indicates when a task boundary is
# # reached, so that we can call the `on_task_switch` method on the Method
# # ourselves.
# if done and not test_env.is_closed():
# # logger.debug(f"end of test episode {episode}")
# obs = test_env.reset()
# episode += 1
# test_env.close()
# test_results = test_env.get_results()
# if was_training:
# method.set_training()
return test_results
# NOTE: Was experimenting with the idea of allowing the regular getattr and setattr
# to forward calls to the remote. In the end I think it's better to explicitly
# prevent any of these from happening.
def __getattr__(self, name: str):
# NOTE: This only ever gets called if the attribute was not found on the
if self._is_readable(name):
print(f"Accessing missing attribute {name} from the 'remote' setting.")
return self.get_attribute(name)
raise AttributeError(
f"Attribute {name} is either not present on the setting, or not marked as " f"readable!"
)
# def __setattr__(self, name: str, value: Any) -> None:
# # Weird pytorch-lightning stuff:
# logger.debug(f"__setattr__ called for attribute {name}")
# if name in {"_setting_type", "__setting"}:
# assert name not in self.__dict__, f"Can't change attribute {name}"
# object.__setattr__(self, name, value)
# elif self._is_writeable(name):
# logger.info(f"Setting attribute {name} on the 'remote' setting.")
# self.set_attribute(name, value)
# else:
# raise AttributeError(f"Attribute {name} is marked as read-only!")
================================================
FILE: sequoia/client/setting_proxy_test.py
================================================
"""TODO: Tests for the SettingProxy.
"""
from functools import partial
from typing import ClassVar, Type
import numpy as np
import pytest
from gym import spaces
from sequoia.common.metrics.rl_metrics import EpisodeMetrics
from sequoia.common.spaces import Image, Sparse
from sequoia.common.transforms import Transforms
from sequoia.conftest import slow
from sequoia.methods.base_method import BaseMethod
from sequoia.methods.method_test import key_fn
from sequoia.methods.random_baseline import RandomBaselineMethod
from sequoia.settings import Setting, all_settings
from sequoia.settings.rl import IncrementalRLSetting, TaskIncrementalRLSetting
from sequoia.settings.rl.continual.setting import ContinualRLSetting
from sequoia.settings.rl.continual.setting_test import (
TestContinualRLSetting as ContinualRLSettingTests,
)
from sequoia.settings.sl import ClassIncrementalSetting, DomainIncrementalSLSetting
from sequoia.settings.sl.continual.setting import ContinualSLSetting
from sequoia.settings.sl.continual.setting_test import (
TestContinualSLSetting as ContinualSLSettingTests,
)
from .setting_proxy import SettingProxy
@pytest.mark.parametrize("setting_type", sorted(all_settings, key=key_fn))
def test_spaces_match(setting_type: Type[Setting]):
setting = setting_type()
s_proxy = SettingProxy(setting_type)
assert s_proxy.observation_space == setting.observation_space
assert s_proxy.action_space == setting.action_space
assert s_proxy.reward_space == setting.reward_space
def test_transforms_get_propagated():
for setting in [
TaskIncrementalRLSetting(dataset="MetaMonsterKong-v0"),
SettingProxy(TaskIncrementalRLSetting, dataset="MetaMonsterKong-v0"),
]:
assert setting.observation_space.x == Image(0, 255, shape=(64, 64, 3), dtype=np.uint8)
setting.transforms.append(Transforms.to_tensor)
setting.transforms.append(Transforms.resize_32x32)
# TODO: The observation space doesn't update directly in RL whenever the
# transforms are changed.
assert setting.observation_space.x == Image(0, 1, shape=(3, 32, 32))
assert setting.train_dataloader().reset().x.shape == (3, 32, 32)
class TestContinualSLSettingProxy(ContinualSLSettingTests):
Setting: ClassVar[Type[Setting]] = partial(SettingProxy, ContinualSLSetting)
class TestContinualRLSettingProxy(ContinualRLSettingTests):
Setting: ClassVar[Type[Setting]] = partial(SettingProxy, ContinualRLSetting)
@pytest.mark.timeout(30)
def test_random_baseline(config):
method = RandomBaselineMethod()
setting = SettingProxy(DomainIncrementalSLSetting, config=config)
results = setting.apply(method, config=config)
# domain incremental mnist: 2 classes per task -> chance accuracy of 50%.
assert 0.45 <= results.objective <= 0.55
@pytest.mark.timeout(180)
def test_random_baseline_rl():
method = RandomBaselineMethod()
setting = SettingProxy(
IncrementalRLSetting,
dataset="monsterkong",
monitor_training_performance=True,
# observe_state_directly=False, ## TODO: Make sure this doesn't change anything.
train_steps_per_task=1_000,
test_steps_per_task=1_000,
train_task_schedule={
0: {"level": 0},
1: {"level": 1},
2: {"level": 10},
3: {"level": 11},
4: {"level": 0},
},
# Interesting problem: Will it always do at least an entire episode here per
# env?
# batch_size=2,
# num_workers=0,
)
assert setting.train_max_steps == 4_000
assert setting.test_max_steps == 4_000
results: IncrementalRLSetting.Results[EpisodeMetrics] = setting.apply(method)
assert 20 <= results.average_final_performance.mean_reward_per_episode
@pytest.mark.timeout(120)
def test_random_baseline_SL_track():
method = RandomBaselineMethod()
setting = SettingProxy(ClassIncrementalSetting, dataset="synbols", nb_tasks=12)
results = setting.apply(method)
assert 1 / 48 * 0.5 <= results.objective <= 1 / 48 * 1.5
@slow
@pytest.mark.timeout(300)
def test_baseline_SL_track(config):
"""Applies the BaseMethod on something ressembling the SL track of the
competition.
"""
method = BaseMethod(max_epochs=1)
import numpy as np
class_order = np.random.permutation(48).tolist()
setting = SettingProxy(
ClassIncrementalSetting,
dataset="synbols",
nb_tasks=12,
class_order=class_order,
)
results = setting.apply(method, config)
assert results.to_log_dict()
# TODO: Add tests for having a different ordering of test tasks vs train tasks.
results: ClassIncrementalSetting.Results
online_perf = results.average_online_performance
assert 0.30 <= online_perf.objective <= 0.65
final_perf = results.average_final_performance
assert 0.02 <= final_perf.objective <= 0.06
def test_rl_track_setting_is_correct():
setting = SettingProxy(
IncrementalRLSetting,
"rl_track",
)
assert setting.nb_tasks == 8
assert setting.dataset == "MetaMonsterKong-v0"
assert setting.observation_space == spaces.Dict(
x=Image(0, 1, (3, 64, 64), dtype=np.float32),
task_labels=Sparse(spaces.Discrete(8)),
)
assert setting.action_space == spaces.Discrete(6)
# TODO: The reward range of the MetaMonsterKongEnv is (0, 50), which seems wrong.
# This isn't really a big deal though.
# assert setting.reward_space == spaces.Box(0, 100, shape=(), dtype=np.float32)
assert setting.steps_per_task == 200_000
assert setting.test_steps_per_task == 10_000
assert setting.known_task_boundaries_at_train_time is True
assert setting.known_task_boundaries_at_test_time is False
assert setting.monitor_training_performance is True
assert setting.train_transforms == [Transforms.to_tensor, Transforms.three_channels]
assert setting.val_transforms == [Transforms.to_tensor, Transforms.three_channels]
assert setting.test_transforms == [Transforms.to_tensor, Transforms.three_channels]
train_env = setting.train_dataloader()
assert train_env.observation_space == spaces.Dict(
x=Image(0, 1, (3, 64, 64), dtype=np.float32),
task_labels=spaces.Discrete(8),
)
assert train_env.reset() in train_env.observation_space
valid_env = setting.val_dataloader()
assert valid_env.observation_space == spaces.Dict(
x=Image(0, 1, (3, 64, 64), dtype=np.float32),
task_labels=spaces.Discrete(8),
)
# IDEA: Prevent submissions from calling the test_dataloader method or accessing the
# test_env / test_dataset property?
with pytest.raises(RuntimeError):
test_env = setting.test_dataloader()
test_env.reset()
with pytest.raises(RuntimeError):
test_env = setting.test_env
test_env.reset()
def test_sl_track_setting_is_correct():
setting = SettingProxy(
ClassIncrementalSetting,
"sl_track",
)
assert setting.nb_tasks == 12
assert setting.dataset == "synbols"
assert setting.observation_space == spaces.Dict(
x=Image(0, 1, (3, 32, 32), dtype=np.float32),
task_labels=spaces.Discrete(12),
)
assert setting.n_classes_per_task == 4
assert setting.action_space == spaces.Discrete(48)
assert setting.reward_space == spaces.Discrete(48)
assert setting.known_task_boundaries_at_train_time is True
assert setting.known_task_boundaries_at_test_time is False
assert setting.monitor_training_performance is True
assert setting.train_transforms == [Transforms.to_tensor, Transforms.three_channels]
assert setting.val_transforms == [Transforms.to_tensor, Transforms.three_channels]
assert setting.test_transforms == [Transforms.to_tensor, Transforms.three_channels]
================================================
FILE: sequoia/common/__init__.py
================================================
from .batch import Batch
from .config import Config
from .loss import Loss
from .metrics import ClassificationMetrics, Metrics, RegressionMetrics, get_metrics
from .spaces import Sparse
================================================
FILE: sequoia/common/batch.py
================================================
""" WIP (@lebrice): Playing around with the idea of using a typed object to
represent the different forms of "batches" that settings produce and that
different models expect.
"""
import dataclasses
import itertools
from abc import ABC
from collections import namedtuple
from dataclasses import dataclass
from functools import partial, singledispatch
from typing import (
Any,
Callable,
ClassVar,
Dict,
Iterable,
Iterator,
KeysView,
List,
Mapping,
NamedTuple,
Optional,
Tuple,
Type,
TypeVar,
Union,
)
import gym
import numpy as np
import torch
from torch import Tensor
from sequoia.utils.logging_utils import get_logger
try:
from functools import singledispatchmethod # type: ignore
except ImportError:
from singledispatchmethod import singledispatchmethod # type: ignore
logger = get_logger(__name__)
B = TypeVar("B", bound="Batch", covariant=True)
T = TypeVar("T", Tensor, np.ndarray, "Batch")
V = TypeVar("V")
def hasmethod(obj: Any, method_name: str) -> bool:
return hasattr(obj, method_name) and callable(getattr(obj, method_name))
@dataclass(frozen=True, eq=False)
class Batch(ABC, Mapping[str, T]):
"""Abstract base class for typed, immutable objects holding tensors.
Can be used as an immutable dictionary mapping from strings to tensors, or
as a tuple if you index with an integer.
Also has some Tensor-like helper methods like `to()`, `numpy()`, `detach()`,
etc.
Other features:
- numpy-style indexing/slicing/masking
- moving all items between devices
- changing the dtype of all tensors
- detaching all tensors
- Convertign all tensors to numpy arrays
- convertible to a tuple or a dict
NOTE: Using dataclasses rather than namedtuples, because those aren't really
meant to be subclassed, so we couldn't use them to make the 'Observations'
hierarchy, for instance.
Dataclasses work better for that purpose.
Examples:
>>> import torch
>>> from typing import Optional
>>> from dataclasses import dataclass
>>> @dataclass(frozen=True)
... class MyBatch(Batch):
... x: Tensor
... y: Tensor = None
>>> batch = MyBatch(x=torch.ones([10, 3, 32, 32]), y=torch.arange(10))
>>> batch.shapes
{'x': torch.Size([10, 3, 32, 32]), 'y': torch.Size([10])}
>>> batch.batch_size
10
>>> batch.dtypes
{'x': torch.float32, 'y': torch.int64}
>>> batch.dtype # No shared dtype, so dtype returns None.
>>> batch.float().dtype # Converting the all items to float dtype:
torch.float32
Device-related methods:
>>> from dataclasses import dataclass
>>> import torch
>>> from torch import Tensor
>>> @dataclass(frozen=True)
... class Observations(Batch):
... x: Tensor
... task_labels: Tensor
... done: Tensor
...
>>> # Example: observations from two gym environments (e.g. VectorEnv)
>>> observations = Observations(
... x = torch.arange(10).reshape([2, 5]),
... task_labels = torch.arange(2, dtype=int),
... done = torch.zeros(2, dtype=bool),
... )
>>> observations.shapes
{'x': torch.Size([2, 5]), 'task_labels': torch.Size([2]), 'done': torch.Size([2])}
>>> observations.batch_size
2
Datatypes:
>>> observations.dtypes
{'x': torch.int64, 'task_labels': torch.int64, 'done': torch.bool}
>>> observations.dtype # No shared dtype, so dtype returns None.
>>> observations.float().dtype # Converting the all items to float dtype:
torch.float32
Returns the device common to all items, or None:
>>> observations.device
device(type='cpu')
>>> # observations.to("cuda").device
>>> # device(type='cuda', index=0)
>>> observations[0]
tensor([[0, 1, 2, 3, 4],
[5, 6, 7, 8, 9]])
Additionally, when slicing a Batch across the first dimension, you get
other typed objects as a result! For example:
>>> observations[:, 0]
Observations(x=tensor([0, 1, 2, 3, 4]), task_labels=tensor(0), done=tensor(False))
>>> observations[:, 1]
Observations(x=tensor([5, 6, 7, 8, 9]), task_labels=tensor(1), done=tensor(False))
"""
# TODO: Would it make sense to add a gym Space class variable here?
space: ClassVar[Optional[gym.Space]]
# TODO: Remove these:
field_names: ClassVar[List[str]]
_namedtuple: ClassVar[Type[NamedTuple]]
def __init_subclass__(cls, *args, **kwargs):
# IDEA: By not marking 'Batch' a dataclass, we would let the subclass
# decide it if wants to be frozen or not!
# Subclasses of `Batch` should be dataclasses!
if not dataclasses.is_dataclass(cls):
raise RuntimeError(f"{__class__} subclass {cls} must be a dataclass!")
super().__init_subclass__(*args, **kwargs)
def __post_init__(self):
# Create some class attributes, if they don't already exist.
# TODO: We have to set these here because __init_subclass__ is called
# before the dataclasses package sets the 'fields' attribute, it seems.
cls = type(self)
if "field_names" not in cls.__dict__:
type(self).field_names = [f.name for f in dataclasses.fields(self)]
# Create a NamedTuple type for this new subclass.
if "_named_tuple" not in cls.__dict__:
type(self)._namedtuple = namedtuple(type(self).__name__ + "Tuple", self.field_names)
def __iter__(self) -> Iterator[str]:
"""Yield the 'keys' of this object, i.e. the names of the fields."""
return iter(self.field_names)
def __len__(self) -> int:
"""Returns the number of fields."""
return len(self.field_names)
def __eq__(self, other: Union["Batch", Any]) -> bool:
# Not sure this is useful.
return NotImplemented
if not isinstance(other, Batch):
return NotImplemented
if type(self) != type(other):
# Not allowing these sorts of comparisons.
return NotImplemented
items_equal = {k: v == other[k] for k, v in self.items()}
return all(
is_equal.all() if isinstance(is_equal, (Tensor, np.ndarray)) else is_equal
for is_equal in items_equal.values()
)
@singledispatchmethod
def __getitem__(self, index: Any) -> T:
"""Select a subset of the fields of this object. Can also be indexed
with tuples, boolean numpy arrays or tensors, as well as None.
"""
raise KeyError(index)
@__getitem__.register(type(None))
def _getitem_none(self, index: None) -> "Batch":
"""Indexing with 'None' gives back a copy with all the items having an
extra batch dimension.
"""
return self.with_batch_dimension()
return getattr(self, index)
@__getitem__.register
def _getitem_by_name(self, index: str) -> Union[Tensor, Any]:
return getattr(self, index)
@__getitem__.register
def _getitem_by_index(self, index: int) -> Union[Tensor, Any]:
return getattr(self, self.field_names[index])
@__getitem__.register(slice)
def _getitem_with_slice(self, index: slice) -> "Batch":
# NOTE: I don't think it would be a good idea to support slice indexing,
# as it could be confusing and give the user the impression that it
# is slicing into the tensors, rather than into the fields.
# I guess this might be doable, but is it really useful?
raise NotImplementedError("Batch objects don't support indexing with (just) slices atm.")
if index == slice(None, None, None) or index == slice(0, len(self), 1):
return self
@__getitem__.register(type(Ellipsis))
def _(self: B, index) -> B:
return self
@__getitem__.register(np.ndarray)
@__getitem__.register(Tensor)
def _getitem_with_array(self, index: np.ndarray) -> B:
"""
NOTE: Indexing with just an array uses the array as a 'mask' on all
fields, instead of indexing the "keys" of this object.
"""
assert len(index) == self.batch_size
return self[:, index]
@__getitem__.register(tuple)
def _getitem_with_tuple(self, index: Tuple[Union[slice, Tensor, np.ndarray, int], ...]):
"""When slicing with a tuple, if the first item is an integer, we get
the attribute at that index and slice it with the rest.
For now, the first item in the tuple can only be either an int or an
empty slice.
"""
if len(index) <= 1:
raise IndexError(
f"Invalid index {index}: When indexing with "
f"tuples or lists, they need to have len > 1."
)
field_index = index[0]
item_index = index[1:]
# if len(item_index) == 1:
# item_index = item_index[0]
if isinstance(field_index, int):
# logger.debug(f"Getting the {field_index}'th field, with slice {index[1:]}")
return self[field_index][item_index]
# e.g: forward_pass[:, 1]
if isinstance(field_index, slice):
if field_index == slice(None):
# logger.debug(f"Indexing all fields {field_index} with index: {item_index}")
return type(self)(
**{
key: (
value[index]
if isinstance(value, Batch)
else value[item_index]
if value is not None
else None
)
for key, value in self.items()
}
)
# batch[..., 0] : Not sure this would really be that helpful.
if field_index == Ellipsis:
logger.debug(f"Using ellipsis (...) as the field index?")
return type(self)(
**{
key: value[Ellipsis, item_index] if value is not None else None
for key, value in self.items()
}
)
raise NotImplementedError(
f"Only support tuple indexing with emptyslices or int as first "
f"tuple item for now. (index={index})"
)
def slice(self: B, index: Union[int, slice, np.ndarray, Tensor]) -> B:
"""Gets a slice across the first (batch) dimension.
Raises an error if there is no batch size.
Always returns an object with a batch dimension, even when `index` has len of 1.
"""
if not isinstance(index, (int, slice, np.ndarray, Tensor)):
raise NotImplementedError(f"can't slice with index {index}")
# BUG: By putting a 'None' value in the ForwardPass
def getitem_if_val_is_not_none(val, index):
if val is None:
return None
return val[index]
sliced_value = self._map(partial(getitem_if_val_is_not_none, index=index), recursive=True)
if isinstance(index, int):
sliced_value = sliced_value.with_batch_dimension()
return sliced_value
# return type(self)(**{
# k: v.slice(index) if isinstance(v, Batch) else
# v[index] if v is not None else None
# for k, v in self.items()
# })
def __setitem__(self, index: Union[int, str], value: Any):
"""Set a value in slices of one or more of the fields.
NOTE: Since this class is marked as frozen, we can't change the
attributes, so the index should be a tuple (to change parts of the
tensors, for instance.
"""
if not isinstance(index, tuple) or len(index) < 2:
raise NotImplementedError("index needs to be tuple with len >= 2")
# Get which keys/fields were selected:
selected_fields = np.array(self.field_names)[index[0]]
for selected_field in selected_fields:
item = self[selected_field]
if item is not None:
item[index[1:]] = value
def keys(self) -> KeysView[str]:
return KeysView(self.field_names)
def values(self) -> Tuple[T, ...]:
return self.as_namedtuple()
def items(self) -> Iterable[Tuple[str, T]]:
for name in self.field_names:
yield name, getattr(self, name)
@property
def devices(self) -> Dict[str, Union[Optional[torch.device], Dict]]:
"""Dict from field names to their device if they have one, else None.
If `self` has `Batch` fields, the values for those will be dicts.
"""
return {
k: v.devices if isinstance(v, Batch) else getattr(v, "device", None)
for k, v in self.items()
}
@property
def device(self) -> Optional[torch.device]:
"""Returns the device common to all items, or `None`.
Returns
-------
Tuple[Optional[torch.device]]
None if the devices are unknown/different, or the common device.
"""
device: Optional[torch.device] = None
# TODO: These kinds of methods can't discriminate between a child item
# having all all None tensors and it having different devices atm.
for key, value in self.items():
if isinstance(value, Batch):
item_device = value.device
if item_device is None:
# Child item doesn't have a 'device', so `self` also doesnt.
return None
else:
item_device = getattr(value, "device", None)
if item_device is None:
continue
if device is None:
device = item_device
elif item_device != device:
return None
return device
@property
def dtypes(self) -> Dict[str, Union[Optional[torch.dtype], Dict]]:
"""Dict from field names to their dtypes if they have one, else None.
If `self` has `Batch` fields, the values for those will be dicts.
"""
return {
k: v.dtypes if isinstance(v, Batch) else getattr(v, "dtype", None)
for k, v in self.items()
}
@property
def dtype(self) -> Tuple[Optional[torch.dtype]]:
"""Returns the dtype common to all tensors, or None.
Returns
-------
Dict[Optional[torch.dtype]]
The common dtype, or `None` if the dtypes are unknown/different.
"""
dtype: Optional[torch.dtype] = None
for key, value in self.items():
item_dtype = getattr(value, "dtype", None)
if item_dtype is None:
continue
if dtype is None:
dtype = item_dtype
elif item_dtype != dtype:
return None
return dtype
def as_namedtuple(self) -> Tuple[T, ...]:
return self._namedtuple(**{k: v for k, v in self.items()})
def as_list_of_tuples(self) -> Iterable[Tuple[T, ...]]:
"""Returns an iterable of the items in the 'batch', each item as a
namedtuple (list of tuples).
"""
# If one of the fields is None, then we convert it into a list of Nones,
# so we can zip all the fields to create a list of tuples.
field_items = [
[items for _ in range(self.batch_size)]
if items is None or items is {}
else [item for item in items]
for items in self.as_tuple()
]
assert all([len(items) == self.batch_size for items in field_items])
return list(itertools.starmap(self._namedtuple, zip(*field_items)))
def as_tuple(self) -> Tuple[T, ...]:
"""Returns a namedtuple containing the 'batched' attributes of this
object (tuple of lists).
"""
# TODO: Turning on the namedtuple return value by default.
# return tuple(
# getattr(self, f.name) for f in dataclasses.fields(self)
# )
return self.as_namedtuple()
# def as_dict(self) -> Dict[str, T]:
# # NOTE: dicts are ordered since python 3.7
# return {
# field_name: getattr(self, field_name)
# for field_name in self.field_names
# }
def to(self, *args, **kwargs):
def _to(item, *args_, **kwargs_):
if hasattr(item, "to") and callable(item.to):
return item.to(*args_, **kwargs_)
return item
return self._map(_to, *args, **kwargs, recursive=True)
def float(self, dtype=torch.float):
return self.to(dtype=dtype)
def float32(self, dtype=torch.float32):
return self.to(dtype=dtype)
def int(self, dtype=torch.int):
return self.to(dtype=dtype)
def double(self, dtype=torch.double):
return self.to(dtype=dtype)
def numpy(self):
"""Returns a new Batch object of the same type, with all Tensors
converted to numpy arrays.
Returns
-------
[type]
[description]
"""
def _numpy(v):
if isinstance(v, (Tensor, Batch)):
return v.detach().cpu().numpy()
return v
return self._map(_numpy, recursive=True)
# return type(self)(**{
# k: v.detach().cpu().numpy() if isinstance(v, (Tensor, Batch)) else v
# for k, v in self.items()
# })
def detach(self):
"""Returns a new Batch object of the same type, with all Tensors
detached.
Returns
-------
Batch
New object of the same type, but with all tensors detached.
"""
from sequoia.utils.generic_functions import detach
return self._map(detach)
# return type(self)(**detach({
# k: v.detach() if isinstance(v, (Tensor, Batch)) else v for k, v in self.items()
# }))
def cpu(self, **kwargs):
"""Returns a new Batch object of the same type, with all Tensors
moved to cpu.
Returns
-------
Batch
New object of the same type, but with all tensors moved to CPU.
"""
return self.to(device="cpu", **kwargs)
def cuda(self, device=None, **kwargs):
"""Returns a new Batch object of the same type, with all Tensors
moved to cuda device.
Returns
-------
Batch
New object of the same type, but with all tensors moved to cuda.
"""
return self.to(device=(device or "cuda"), **kwargs)
@property
def shapes(self) -> Dict[str, Union[torch.Size, Dict]]:
"""Dict from field names to their shapes if they have one, else None.
If `self` has `Batch` fields, the values for those will be dicts.
"""
return {
k: v.shapes if isinstance(v, Batch) else getattr(v, "shape", None)
for k, v in self.items()
}
@property
def batch_size(self) -> Optional[int]:
"""Returns the length of the first dimension if it is common to all
tensors in this object, else None.
"""
# NOTE: If all tensors have just one dimension and are all the same
# length, then this would give back that length.
batch_size: Optional[int] = None
for k, v in self.items():
if isinstance(v, Batch):
v_batch_size = v.batch_size
if v_batch_size is None:
# child item doesn't have a batch size, so we dont either.
return None
elif batch_size is None:
batch_size = v_batch_size
elif v_batch_size != batch_size:
return None
else:
item_shape = getattr(v, "shape", None)
if item_shape is None:
continue
if not item_shape:
return None
v_batch_size = item_shape[0]
if batch_size is None:
batch_size = v_batch_size
elif v_batch_size != batch_size:
return None
return batch_size
def with_batch_dimension(self: B) -> B:
"""Returns a copy of `self` where all numpy arrays / tensors have an
extra `batch` dimension of size 1.
"""
# TODO: Do we 'wrap' the `None` values? or keep them as-is?
from sequoia.utils.categorical import Categorical
@singledispatch
def unsqueeze(v: Any) -> Any:
if v is None:
return v
return np.asarray([v])
@unsqueeze.register(Categorical)
@unsqueeze.register(np.ndarray)
@unsqueeze.register(Tensor)
def _unsqueeze_array(
v: Union[np.ndarray, Tensor, Categorical]
) -> Union[np.ndarray, Tensor, Categorical]:
return v[None]
return self._map(unsqueeze)
def remove_batch_dimension(self: B) -> B:
"""Returns a copy of `self` where all numpy arrays / tensors have an
the extra `batch` dimension removed.
Raises an error if any non-None value doesn't have a batch dimension of
size 1.
"""
return self[:, 0]
def split(self: B) -> List[B]:
"""Returns an iterable of the items in the 'batch', each item as a
object of the same type as `self`.
"""
# If one of the fields is None, then we convert it into a list of Nones,
# so we can zip all the fields to create a list of tuples.
return [self[:, i] for i in range(self.batch_size)]
@classmethod
def stack(cls: Type[B], items: List[B]) -> B:
items = list(items)
from sequoia.utils.generic_functions import stack
# Just to make sure that the returned item will be of the type `cls`.
assert isinstance(items[0], cls)
return stack(items)
@classmethod
def concatenate(cls: Type[B], items: List[B], **kwargs) -> B:
items = list(items)
from sequoia.utils.generic_functions import concatenate
assert isinstance(items[0], cls)
return concatenate(items, **kwargs)
def torch(self, device: Union[str, torch.device] = None, dtype: torch.dtype = None):
"""Converts any ndarrays to Tensors if possible and returns a new
object of the same type.
NOTE: This is the opposite of `self.numpy()`
"""
def _from_numpy(v: Union[np.ndarray, Any]) -> Union[Tensor, Any]:
try:
return torch.as_tensor(v, device=device, dtype=dtype)
except (TypeError, RuntimeError):
return v
return self._map(_from_numpy, recursive=True)
def _map(self: B, func: Callable, *args, recursive: bool = True, **kwargs) -> B:
"""Returns an object of the same type as `self`, where function `func`
has been applied (with positional args `args` and keyword-arguments
`kwargs`) to all its values, (inluding the values of nested `Batch`
objects if `recursive` is True).
"""
new_items = {}
for key, value in self.items():
if isinstance(value, Batch):
if not recursive:
# don't apply the function to nested Batch objects unless
# `recursive` is True.
new_items[key] = value
else:
new_items[key] = value._map(func, *args, recursive=recursive, **kwargs)
else:
new_items[key] = func(value, *args, **kwargs) # type: ignore
return type(self)(**new_items)
def _apply(
self: B, func: Callable[[T, Any], None], *args, recursive: bool = True, **kwargs
) -> None:
"""Applies function `func` to all the values in `self`, and optionally
to all its nested values when `recursive` is True.
Returns None, as this assumes that `func` modifies the values in-place.
"""
for key, value in self.items():
if isinstance(value, Batch) and not recursive:
# Skip any Batch objects if `recursive` is False.
continue
func(value, *args, **kwargs) # type: ignore
from sequoia.utils.generic_functions.replace import replace
@replace.register(Batch)
def _replace_batch_items(obj: Batch, **items) -> Batch:
return dataclasses.replace(obj, **items)
from typing import Sequence
from sequoia.utils.generic_functions import get_slice, set_slice
@get_slice.register(Batch)
def _get_batch_slice(value: Batch, indices: Sequence[int]) -> Batch:
return value.slice(indices)
# assert False, f"Removing this in favor of just doing Batch[:, indices]. "
# return type(value)(**{
# field_name: get_slice(field_value, indices) if field_value is not None else None
# for field_name, field_value in value.as_dict().items()
# })
@set_slice.register(Batch)
def set_batch_slice(target: Batch, indices: Sequence[int], values: Batch) -> None:
for key, target_values in target.items():
set_slice(target_values, indices, values[key])
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: sequoia/common/batch_test.py
================================================
""" Tests for the `Batch` class.
"""
from dataclasses import dataclass
from typing import Any, Dict, List, Optional, Tuple, Type
import numpy as np
import pytest
import torch
from torch import Tensor
from sequoia.utils.categorical import Categorical
from .batch import Batch
@dataclass(frozen=True)
class Observations(Batch):
x: Tensor
task_labels: Optional[Tensor] = None
@dataclass(frozen=True)
class Actions(Batch):
y_pred: Tensor
@dataclass(frozen=True)
class RLActions(Actions):
action_dist: Categorical
@dataclass(frozen=True)
class Rewards(Batch):
y: Tensor
@pytest.mark.parametrize(
"batch_type, items_dict",
[
(
Observations,
dict(
x=torch.arange(10),
task_labels=torch.arange(10) + 1,
),
),
],
)
def test_batch_behaves_like_a_dict(batch_type, items_dict):
obj = batch_type(**items_dict)
# NOTE: dicts, along with their .keys() and .values() are ordered as of py37
for i, (k, v) in enumerate(obj.items()):
original_value = items_dict[k]
assert k == list(items_dict.keys())[i] # key order is the same.
assert (v == original_value).all()
if isinstance(original_value, Tensor):
assert v is original_value # Tensors shouldn't be cloned or copied
assert (obj[k] == v).all() # values are the same.
assert (obj[k] == getattr(obj, k)).all() # getattr same as __getitem__
assert (obj[i] == v).all() # can also be indexed with ints like a tuple.
@pytest.mark.parametrize(
"batch_type, items_dict",
[
(
Observations,
dict(
x=torch.arange(10),
task_labels=torch.arange(10) + 1,
),
),
],
)
def test_to(batch_type: Type[Batch], items_dict: Dict[str, Tensor]):
"""Test that the 'to' method behaves like `torch.Tensor.to`, so that we
can move all the items in a `Batch` between devices or dtypes.
"""
original_devices: Dict[str, torch.device] = {k: v.device for k, v in items_dict.items()}
original_dtypes: Dict[str, torch.dtype] = {k: v.dtype for k, v in items_dict.items()}
obj = batch_type(**items_dict)
# The devices and dtypes remain the same when creating the Batch with the
# given items.
for k, v in obj.items():
original_value = items_dict[k]
assert v.device == original_value.device == original_devices[k]
assert v.dtype == original_value.dtype == original_dtypes[k]
# The 'devices' and 'dtypes' attributes give the devices and dtypes of all
# items.
assert obj.devices == original_devices
assert obj.dtypes == original_dtypes
devices = list(original_devices.values())
dtypes = list(original_dtypes.values())
if len(set(devices)) == 1:
# If they all share the same device, then the `device` attribute on the
# `batch` is this shared device.
common_device = devices[0]
assert obj.device == common_device
if len(set(dtypes)) == 1:
# If all tensors have the same dtype, then the `dtype` attribute on the
# `batch` is this shared dtype.
common_dtype = dtypes[0]
assert obj.dtype == common_dtype
# Test moving to another device, if possible.
if torch.cuda.is_available():
cuda_obj = obj.to("cuda")
for i, (k, v) in enumerate(cuda_obj.items()):
assert v.device.type == "cuda"
float_obj = obj.to(dtype=torch.float32)
for k, v in float_obj.items():
original_value = items_dict[k]
assert v.device == original_value.device
assert v.dtype == torch.float32
assert (v == original_value.to(dtype=torch.float32)).all()
@pytest.mark.parametrize(
"batch_type, items_dict",
[
(
Observations,
dict(
x=torch.arange(25).reshape([5, 5]),
task_labels=torch.arange(25).reshape([5, 5]) + 1,
),
),
],
)
@pytest.mark.parametrize(
"index",
[
(0, 0), # obj[0, 0]
(0, ..., 0), # obj[0, ..., 0]
(slice(None), 0), # obj[:, 0]
(slice(None), slice(3)), # obj[:, :3]
(slice(None), slice(None, -3)), # obj[:, -3:]
(slice(None), slice(None, None, 2)), # obj[:, ::2]
(slice(None), np.arange(5) % 2 == 0), # obj[:, even_mask]
(slice(None), np.arange(5) % 2 == 0), # obj[:, even_mask]
],
)
def test_tuple_indexing(
batch_type: Type[Batch], items_dict: Dict[str, Tensor], index: Tuple[Any, ...]
):
"""Test that we can index into the object in the same style as an ndarray"""
obj = batch_type(**items_dict)
keys = list(items_dict.keys())
print(f"Expected keys: {keys}")
expected_items = {k: items_dict[k][index[1:]] for k in np.array(keys)[index[0]]}
print(f"expected sliced items:")
for key, value in expected_items.items():
print(key, value)
actual_slice = obj[index]
if index[0] == slice(None):
# actual_slice: Batch
assert isinstance(actual_slice, batch_type)
assert list(actual_slice.keys()) == keys
for k, sliced_value in actual_slice.items():
print(f"key {k}, index {index}")
print(f"Sliced value: {sliced_value}")
expected_value = expected_items[k]
print(f"Expected value: {expected_value}")
assert (sliced_value == expected_value).all()
if isinstance(index[0], int):
# e.g. Observations[0, <...>]
key = keys[index[0]]
expected_value = expected_items[key]
assert (actual_slice == expected_value).all()
def test_masking():
"""Test indexing or changing values in the item using a mask array."""
bob = Observations(
x=torch.arange(25).reshape([5, 5]),
)
odd_rows = np.arange(5) % 2 == 1
bob[:, odd_rows] = False
tensor = torch.as_tensor
expected = Observations(
x=tensor(
[
[0, 1, 2, 3, 4],
[0, 0, 0, 0, 0],
[10, 11, 12, 13, 14],
[0, 0, 0, 0, 0],
[20, 21, 22, 23, 24],
]
),
task_labels=None,
)
assert (expected.x == bob.x).all()
assert expected.task_labels == bob.task_labels
def test_newaxis():
"""WIP: Trying out np.newaxis as a way to add an extra batch dimension."""
x = Observations(
x=torch.arange(5),
task_labels=1,
)
# Test out different ways of 'unsqueezing' the object.
for expanded in [x[np.newaxis], x.with_batch_dimension()]:
assert str(expanded) == str(
Observations(
x=torch.tensor([[0, 1, 2, 3, 4]], dtype=int),
task_labels=np.array([1]),
)
)
def test_single_index():
"""observations[0] should gives the first field."""
obs = Observations(
x=torch.arange(5),
task_labels=1,
)
assert obs[0] is obs.x
def test_remove_batch_dim():
"""Removing an extra batch dimension."""
bob = Observations(
x=torch.tensor([[0, 1, 2, 3, 4]], dtype=int),
task_labels=np.array([1]),
)
expected = Observations(
x=torch.arange(5),
task_labels=1,
)
for expanded in [bob.remove_batch_dimension(), bob[:, 0]]:
assert str(expanded) == str(expected)
bob = Observations(
x=torch.tensor([[0, 1, 2, 3, 4]], dtype=int),
task_labels=None,
)
expected = Observations(
x=torch.arange(5),
task_labels=None,
)
for expanded in [
bob.remove_batch_dimension(),
bob[
:,
0,
],
]:
assert str(expanded) == str(expected)
def test_remove_batch_dim_with_nested_objects():
obj = ForwardPass(
observations=Observations(
x=torch.arange(5).reshape([1, 5]),
task_labels=None,
),
h_x=torch.arange(4).reshape([1, 4]),
actions=Actions(
y_pred=torch.tensor(1).reshape(
[
1,
]
),
),
)
actual = obj.remove_batch_dimension()
assert str(actual) == str(
ForwardPass(
observations=Observations(
x=torch.arange(5),
task_labels=None,
),
h_x=torch.arange(4),
actions=Actions(
y_pred=torch.tensor(1),
),
)
)
def test_split():
"""Split a batch into a list of Batch objects"""
bob = Observations(
x=torch.tensor([[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]], dtype=int),
task_labels=np.array([0, 1]),
)
expected = [
Observations(
x=torch.arange(5) + i * 5,
task_labels=i,
)
for i in range(2)
]
assert str(bob.split()) == str(expected)
@pytest.mark.parametrize(
"items, expected",
[
(
[
Observations(
x=torch.as_tensor([0, 1, 2, 3, 4], dtype=int),
task_labels=np.array(0),
),
Observations(
x=torch.as_tensor([5, 6, 7, 8, 9], dtype=int),
task_labels=np.array(1),
),
],
Observations(
x=torch.as_tensor([[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]], dtype=int),
task_labels=np.array([0, 1]),
),
),
(
[
RLActions(
y_pred=torch.as_tensor([0, 1, 2, 3, 4], dtype=int),
action_dist=Categorical(logits=torch.ones([5, 5], dtype=float) / 5),
),
RLActions(
y_pred=torch.as_tensor([0, 1, 2, 3, 4], dtype=int),
action_dist=Categorical(logits=torch.ones([5, 5], dtype=float) / 5),
),
],
RLActions(
y_pred=torch.as_tensor([[0, 1, 2, 3, 4], [0, 1, 2, 3, 4]], dtype=int),
action_dist=Categorical(logits=torch.ones([2, 5, 5], dtype=float) / 5),
),
),
],
)
def test_stack(items: List[Batch], expected: Batch):
"""Split a batch into a list of Batch objects"""
assert str(type(items[0]).stack(items)) == str(expected)
# Same test, but with only numpy arrays as items:
assert str(type(items[0]).stack(map(lambda i: i.numpy(), items))) == str(expected.numpy())
# Same test, but with Tensor items:
assert str(type(items[0]).stack(map(lambda i: i.torch(), items))) == str(expected.torch())
@pytest.mark.parametrize(
"items, expected",
[
(
[
Observations(
x=torch.as_tensor([0, 1, 2, 3, 4], dtype=int),
task_labels=None,
),
Observations(
x=torch.as_tensor([5, 6, 7, 8, 9], dtype=int),
task_labels=None,
),
],
Observations(
x=torch.as_tensor([[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]], dtype=int),
task_labels=None,
),
),
(
[
Observations(
x=torch.as_tensor([0, 1, 2, 3, 4], dtype=int),
task_labels=None,
),
Observations(
x=torch.as_tensor([5, 6, 7, 8, 9], dtype=int),
task_labels=1,
),
],
Observations(
x=torch.as_tensor([[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]], dtype=int),
task_labels=np.array([None, 1]),
),
),
],
)
def test_stack_with_none_values(items: List[Batch], expected: Batch):
"""Test that if all values are None, a single None is produced, but if only some
values are None, then an ndarray of dtype `object` is created instead.
"""
cls = type(items[0])
assert str(cls.stack(items)) == str(expected)
# Same test, but with only numpy arrays as items:
items = [item.numpy() for item in items]
assert str(cls.stack(items)) == str(expected.numpy())
@pytest.mark.parametrize(
"items, expected",
[
(
[
Observations(
x=torch.as_tensor([0, 1, 2, 3, 4], dtype=int),
task_labels=0,
),
Observations(
x=torch.as_tensor([5, 6, 7, 8, 9], dtype=int),
task_labels=1,
),
],
Observations(
x=torch.as_tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=int),
task_labels=np.array([0, 1]),
),
),
(
[
Observations(
x=torch.as_tensor([0, 1, 2, 3, 4], dtype=int),
task_labels=None,
),
Observations(
x=torch.as_tensor([5, 6, 7, 8, 9], dtype=int),
task_labels=None,
),
],
Observations(
x=torch.as_tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=int),
task_labels=None,
),
),
(
[
RLActions(
y_pred=torch.as_tensor([0, 1, 2, 3, 4], dtype=int),
action_dist=Categorical(logits=torch.ones([5, 5], dtype=float) / 5),
),
RLActions(
y_pred=torch.as_tensor([0, 1, 2, 3, 4], dtype=int),
action_dist=Categorical(logits=torch.ones([5, 5], dtype=float) / 5),
),
],
RLActions(
y_pred=torch.as_tensor([0, 1, 2, 3, 4, 0, 1, 2, 3, 4], dtype=int),
action_dist=Categorical(logits=torch.ones([10, 5], dtype=float) / 5),
),
),
],
)
def test_concatenate(items: List[Batch], expected: Batch):
"""Split a batch into a list of Batch objects"""
assert str(type(items[0]).concatenate(items)) == str(expected)
# Same test, but with only numpy arrays as items:
assert str(type(items[0]).concatenate(map(lambda i: i.numpy(), items))) == str(expected.numpy())
# Same test, but with Tensor items:
assert str(type(items[0]).concatenate(map(lambda i: i.torch(), items))) == str(expected.torch())
@pytest.mark.parametrize(
"numpy_batch, torch_batch",
[
(
Observations(
x=np.array([[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]]),
task_labels=np.array([None, None]),
),
Observations(
x=torch.tensor([[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]], dtype=int),
task_labels=np.array([None, None]),
),
),
],
)
def test_convert_between_ndarrays_and_tensors(numpy_batch: Batch, torch_batch: Batch):
assert str(numpy_batch.torch()) == str(torch_batch)
assert str(numpy_batch.torch().numpy()) == str(numpy_batch)
assert str(torch_batch.numpy()) == str(numpy_batch)
assert str(torch_batch.numpy().torch()) == str(torch_batch)
if torch.cuda.is_available():
torch_batch = torch_batch.cuda()
assert torch_batch.device.type == "cuda"
assert str(numpy_batch.torch(device="cuda")) == str(torch_batch)
assert str(numpy_batch.torch(device="cuda").numpy()) == str(numpy_batch)
assert str(torch_batch.numpy()) == str(numpy_batch)
assert str(torch_batch.numpy().torch(device="cuda")) == str(torch_batch)
@dataclass(frozen=True)
class ForwardPass(Batch):
observations: Observations
h_x: Tensor
actions: Actions
def test_nesting():
obj = ForwardPass(
observations=Observations(
x=torch.arange(10).reshape([2, 5]),
task_labels=torch.arange(2, dtype=int),
),
h_x=torch.arange(8).reshape([2, 4]),
actions=Actions(
y_pred=torch.arange(2, dtype=int),
),
)
assert obj.batch_size == 2
assert obj[0, 1, 0] == obj.observations.task_labels[0]
tensor = torch.as_tensor
assert str(obj.slice(0)) == str(
ForwardPass(
observations=Observations(x=tensor([[0, 1, 2, 3, 4]]), task_labels=tensor([0])),
h_x=tensor([[0, 1, 2, 3]]),
actions=Actions(y_pred=tensor([0])),
)
)
def test_slicing_with_one_item():
observations = Observations(
x=torch.arange(10).reshape([2, 5]),
task_labels=torch.arange(2, dtype=int),
)
indices = torch.as_tensor([0])
assert observations.slice(indices).shapes == {
"x": torch.Size([1, 5]),
"task_labels": torch.Size([1]),
}
================================================
FILE: sequoia/common/callbacks/__init__.py
================================================
"""
TODO: Migrate the addons to Pytorch-Lightning, maybe in the form of callbacks
or as optional extensions to be added to Classifier?
"""
# from .knn_callback import KnnCallback
# from .vae_callback import SaveVaeSamplesCallback
================================================
FILE: sequoia/common/callbacks/knn_callback.py
================================================
""" Callback that evaluates representations with a KNN after each epoch.
TODO: The code here is split into too many functions and its a bit confusing.
Will Need to rework that at some point.
NOTE: Currently unused.
"""
import math
from dataclasses import asdict, dataclass
from typing import List, Optional, Tuple
import numpy as np
import torch
from pytorch_lightning import Callback, LightningModule, Trainer
from simple_parsing import field, mutable_field
from sklearn.metrics import log_loss
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler
from torch import Tensor
from torch.utils.data import DataLoader
from sequoia.common.loss import Loss
# from sequoia.methods.models.base_model.model import LightningModule
from sequoia.settings import Setting
from sequoia.settings.sl import ClassIncrementalSetting
from sequoia.utils.logging_utils import get_logger, pbar
from sequoia.utils.utils import roundrobin, take
logger = get_logger(__name__)
@dataclass
class KnnClassifierOptions:
"""Set of options for configuring the KnnClassifier."""
n_neighbors: int = field(default=5, alias="n_neighbours") # Number of neighbours.
metric: str = "cosine"
algorithm: str = "auto" # See the sklearn docs
leaf_size: int = 30 # See the sklearn docs
p: int = 2 # see the sklean docs
n_jobs: Optional[int] = -1 # see the sklearn docs.
@dataclass
class KnnCallback(Callback):
"""Addon that adds the option of evaluating representations with a KNN.
TODO: Perform the KNN evaluations in different processes using multiprocessing.
TODO: We could even evaluate the representations of a DIFFERENT dataset with
the KNN, if the shapes were compatible with the model! For example, we could
train the model on some CL/RL/etc task, like Omniglot or something, and at
the same time, evaluate how good the model's representations are at
disentangling the classes from MNIST or Fashion-MNIST or something else
entirely! This could be nice when trying to argue about better generalization
in the model's representations.
"""
# Options for the KNN classifier
knn_options: KnnClassifierOptions = mutable_field(KnnClassifierOptions)
# Maximum number of examples to take from the dataloaders. When None, uses
# the entire training/validaton/test datasets.
knn_samples: int = 0
def __post_init__(self):
self.max_num_batches: int = 0
self.model: LightningModule
self.trainer: Trainer
def on_train_start(self, trainer, pl_module):
"""Called when the train begins."""
self.trainer = trainer
self.model = pl_module
self.setting: ClassIncrementalSetting
def setup(self, trainer, pl_module, stage: str):
"""Called when fit or test begins"""
super().setup(trainer, pl_module, stage)
def on_epoch_end(self, trainer: Trainer, pl_module: LightningModule):
self.trainer = trainer
self.model = pl_module
self.setting = self.model.setting
config = self.model.config
if self.knn_samples > 0:
batch_size = pl_module.batch_size
# We round this up so we always take at least one batch_size of
# samples from each dataloader.
self.max_num_batches = math.ceil(self.knn_samples / batch_size)
logger.debug(
f"Taking a maximum of {self.max_num_batches} batches from each dataloader."
)
if config.debug:
self.knn_samples = min(self.knn_samples, 100)
valid_knn_loss, test_knn_loss = self.evaluate_knn(pl_module)
# assert False, trainer.callback_metrics.keys()
loss: Optional[Loss] = trainer.callback_metrics.get("loss_object")
if loss:
assert "knn/valid" not in loss.losses
assert "knn/test" not in loss.losses
loss.losses["knn/valid"] = valid_knn_loss
loss.losses["knn/test"] = test_knn_loss
def log(self, loss_object: Loss):
if self.trainer.logger:
self.trainer.logger.log_metrics(loss_object.to_log_dict())
def get_dataloaders(self, model: LightningModule, mode: str) -> List[DataLoader]:
"""Retrieve the train/val/test dataloaders for all 'tasks'."""
setting = model.datamodule
assert setting, "The LightningModule must have its 'datamodule' attribute set for now."
# if the setting defines a dataloaders() method, those are for each of the tasks, which is what we want!
fn = getattr(setting, f"{mode}_dataloaders", getattr(setting, f"{mode}_dataloader"))
loaders = fn()
if isinstance(loaders, DataLoader):
return [loaders]
assert isinstance(loaders, list)
return loaders
def evaluate_knn(self, model: LightningModule) -> Tuple[Loss, Loss]:
"""Evaluate the representations with a KNN in the context of CL.
We shorten the train dataloaders to take only the first
`knn_samples` samples in order to save some compute.
TODO: Figure out a way to cleanly add the metrics from the callback to
the ``log dict'' which is returned by the model. Right now they are
only printed / logged to wandb directly from here.
"""
setting = model.datamodule
assert isinstance(setting, Setting)
# TODO: Remove this if we want to use this for something else than a
# Continual setting in the future.
assert isinstance(setting, ClassIncrementalSetting)
num_classes = setting.num_classes
# Check wether the method has access to the task labels at train/test time.
task_labels_at_test_time: bool = False
from sequoia.settings import TaskIncrementalSLSetting
if isinstance(setting, TaskIncrementalSLSetting):
if setting.task_labels_at_test_time:
task_labels_at_test_time = True
# TODO: Figure out a way to make sure that we get at least one example
# of each class to fit the KNN.
self.knn_samples = max(self.knn_samples, num_classes**2)
self.max_num_batches = math.ceil(self.knn_samples / model.batch_size)
logger.info(f"number of classes: {num_classes}")
logger.info(f"Number of KNN samples: {self.knn_samples}")
logger.debug(f"Taking a maximum of {self.max_num_batches} batches from each dataloader.")
train_loaders: List[DataLoader] = self.get_dataloaders(model, mode="train")
valid_loaders: List[DataLoader] = self.get_dataloaders(model, mode="val")
test_loaders: List[DataLoader] = self.get_dataloaders(model, mode="test")
# Only take the first `knn_samples` samples from each dataloader.
def shorten(dataloader: DataLoader):
return take(dataloader, n=self.max_num_batches)
if self.max_num_batches:
train_loaders = list(map(shorten, train_loaders))
valid_loaders = list(map(shorten, valid_loaders))
test_loaders = list(map(shorten, test_loaders))
# Create an iterator that alternates between each of the train dataloaders.
# NOTE: we shortened each of the dataloaders just to be sure that we get at least
train_loader = roundrobin(*train_loaders)
h_x, y = get_hidden_codes_array(
model=model, dataloader=train_loader, description="KNN (Train)"
)
train_loss, scaler, knn_classifier = fit_knn(
x=h_x, y=y, options=self.knn_options, num_classes=num_classes, loss_name="knn/train"
)
logger.info(f"KNN Train Acc: {train_loss.accuracy:.2%}")
self.log(train_loss)
total_valid_loss = Loss("knn/valid")
# Save the current task ID so we can reset it after testing.
starting_task_id = model.setting.current_task_id
for i, dataloader in enumerate(valid_loaders):
if task_labels_at_test_time:
model.on_task_switch(i, training=False)
loss_i = evaluate(
model=model,
dataloader=dataloader,
loss_name=f"[{i}]",
scaler=scaler,
knn_classifier=knn_classifier,
num_classes=setting.num_classes_in_task(i),
)
# We use `.absorb(loss_i)` here so that the metrics get merged.
# That way, if we access `total_valid_loss.accuracy`, this gives the
# accuracy over all the validation tasks.
# If we instead used `+= loss_i`, then loss_i would become a subloss
# of `total_valid_loss`, since they have different names.
# TODO: Explain this in more detail somewhere else.
total_valid_loss.absorb(loss_i)
logger.info(f"KNN Valid[{i}] Acc: {loss_i.accuracy:.2%}")
self.log(loss_i)
logger.info(f"KNN Average Valid Acc: {total_valid_loss.accuracy:.2%}")
self.log(total_valid_loss)
total_test_loss = Loss("knn/test")
for i, dataloader in enumerate(test_loaders):
if task_labels_at_test_time:
model.on_task_switch(i, training=False)
# TODO Should we set the number of classes to be the number of
# classes in the current task?
loss_i = evaluate(
model=model,
dataloader=dataloader,
loss_name=f"[{i}]",
scaler=scaler,
knn_classifier=knn_classifier,
num_classes=num_classes,
)
total_test_loss.absorb(loss_i)
logger.info(f"KNN Test[{i}] Acc: {loss_i.accuracy:.2%}")
self.log(loss_i)
if task_labels_at_test_time:
model.on_task_switch(starting_task_id, training=False)
logger.info(f"KNN Average Test Acc: {total_test_loss.accuracy:.2%}")
self.log(total_test_loss)
return total_valid_loss, total_test_loss
def evaluate(
model: LightningModule,
dataloader: DataLoader,
loss_name: str,
scaler: StandardScaler,
knn_classifier: KNeighborsClassifier,
num_classes: int,
) -> Loss:
"""Evaluates the 'quality of representations' using a KNN.
Assumes that the knn classifier was fitted on the same classes as
the ones present in the dataloader.
Args:
model (Classifier): a Classifier model to use to encode samples.
dataloader (DataLoader): a dataloader.
loss_name (str): name to give to the resulting loss.
scaler (StandardScaler): the scaler used during fitting.
knn_classifier (KNeighborsClassifier): The KNN classifier.
Returns:
Loss: The loss object containing metrics and a 'total loss'
which isn't a tensor in this case (since passing through the KNN
isn't a differentiable operation).
"""
h_x_test, y_test = get_hidden_codes_array(
model,
dataloader,
description=f"KNN ({loss_name})",
)
train_classes = set(knn_classifier.classes_)
test_classes = set(y_test)
# Check that the same classes were used.
assert test_classes.issubset(train_classes), (
f"y and y_test should contain the same classes: "
f"(train classes: {train_classes}, "
f"test classes: {test_classes})."
)
test_loss = get_knn_performance(
x_t=h_x_test,
y_t=y_test,
loss_name=loss_name,
scaler=scaler,
knn_classifier=knn_classifier,
num_classes=num_classes,
)
test_loss.loss = torch.as_tensor(test_loss.loss)
logger.info(f"{loss_name} Acc: {test_loss.accuracy:.2%}")
return test_loss
def get_hidden_codes_array(
model: LightningModule, dataloader: DataLoader, description: str = "KNN"
) -> Tuple[np.ndarray, np.ndarray]:
"""Gets the hidden vectors and corresponding labels."""
h_x_list: List[np.ndarray] = []
y_list: List[np.ndarray] = []
for batch in pbar(dataloader, description, leave=False):
# TODO: Debug this, make sure this callback still works.
x, y = batch
assert isinstance(x, Tensor), type(x)
# We only do KNN with examples that have a label.
assert y is not None, f"Should have a 'y' for now! {x}, {y}"
if y is not None:
# TODO: There will probably be some issues with trying to use
# the model's encoder to encode stuff when using DataParallel or
# DistributedDataParallel, as PL might be interfering somehow.
h_x = model.encode(x.to(model.device))
h_x_list.append(h_x.detach().cpu().numpy())
y_list.append(y.detach().cpu().numpy())
codes = np.concatenate(h_x_list)
labels = np.concatenate(y_list)
return codes.reshape(codes.shape[0], -1), labels
def fit_knn(
x: np.ndarray,
y: np.ndarray,
num_classes: int,
options: KnnClassifierOptions = None,
loss_name: str = "knn",
) -> Tuple[Loss, StandardScaler, KNeighborsClassifier]:
# print(x.shape, y.shape, x_t.shape, y_t.shape)
options = options or KnnClassifierOptions()
scaler = StandardScaler()
x_s = scaler.fit_transform(x)
# Create and train the Knn Classifier using the options as the kwargs
knn_classifier = KNeighborsClassifier(**asdict(options)).fit(x_s, y)
train_loss = get_knn_performance(
x_t=x,
y_t=y,
scaler=scaler,
knn_classifier=knn_classifier,
num_classes=num_classes,
)
return train_loss, scaler, knn_classifier
def get_knn_performance(
x_t: np.ndarray,
y_t: np.ndarray,
scaler: StandardScaler,
knn_classifier: KNeighborsClassifier,
num_classes: int,
loss_name: str = "KNN",
) -> Loss:
# Flatten the inputs to two dimensions only.
x_t = x_t.reshape(x_t.shape[0], -1)
assert len(x_t.shape) == 2
x_t = scaler.transform(x_t)
y_t_prob = knn_classifier.predict_proba(x_t)
classes = knn_classifier.classes_
# make sure the classes are sorted:
assert np.array_equal(sorted(classes), classes)
if y_t_prob.shape[-1] == num_classes:
y_t_logits = y_t_prob
else:
# Not all classes were encountered, so we need to 'expand' the predicted
# logits to the right shape.
logger.info(f"{y_t_prob.shape} {num_classes}")
num_classes = max(num_classes, y_t_prob.shape[-1])
y_t_logits = np.zeros([y_t_prob.shape[0], num_classes], dtype=y_t_prob.dtype)
for i, logits in enumerate(y_t_prob):
for label, logit in zip(classes, logits):
y_t_logits[i][label - 1] = logit
## We were constructing this to reorder the classes in case the ordering was
## not the same between the KNN's internal `classes_` attribute and the task
## classes, However I'm not sure if this is necessary anymore.
# y_t_logits = np.zeros((y_t.size, y_t.max() + 1))
# for i, label in enumerate(classes):
# y_t_logits[:, label] = y_t_prob[:, i]
# We get the Negative Cross Entropy using the scikit-learn function, but we
# could instead get it using pytorch's function (maybe even inside the
# Loss object!
nce_t = log_loss(y_true=y_t, y_pred=y_t_prob, labels=classes)
# BUG: There is sometimes a case where some classes aren't present in
# `classes_`, and as such the ClassificationMetrics object created in the
# Loss constructor has an error.
test_loss = Loss(loss_name, loss=nce_t, y_pred=y_t_logits, y=y_t)
return test_loss
from simple_parsing.helpers.serialization import register_decoding_fn
register_decoding_fn(KnnCallback, lambda v: v)
================================================
FILE: sequoia/common/callbacks/vae_callback.py
================================================
from dataclasses import dataclass
from typing import Optional
import torch
from pytorch_lightning import Callback, Trainer
from torch import Tensor
from torchvision.utils import save_image
from sequoia.methods.aux_tasks.reconstruction import AEReconstructionTask, VAEReconstructionTask
from sequoia.methods.models import BaseModel
from sequoia.utils.logging_utils import get_logger
logger = get_logger(__name__)
@dataclass
class SaveVaeSamplesCallback(Callback):
"""Callback which saves some generated/reconstructed samples.
Reconstructs and/or generates samples periodically during training if any of
of the autoencoder/generative model based auxiliary tasks are used.
"""
def __post_init__(self, *args, **kwargs):
self.reconstruction_task: Optional[AEReconstructionTask] = None
self.generation_task: Optional[VAEReconstructionTask] = None
self.latents_batch: Optional[Tensor] = None
self.model: BaseModel
self.trainer: Trainer
def setup(self, trainer, pl_module, stage: str):
"""Called when fit or test begins"""
super().setup(trainer, pl_module, stage)
def on_train_start(self, trainer, pl_module):
"""Called when the train begins."""
self.trainer = trainer
self.model = pl_module
from sequoia.methods.models.base_model.self_supervised_model import SelfSupervisedModel
if isinstance(pl_module, SelfSupervisedModel):
# if our model has auxiliary tasks (i.e., if it's a self-supervised model.)
if VAEReconstructionTask.name in self.model.tasks:
self.reconstruction_task = self.model.tasks[VAEReconstructionTask.name]
self.generation_task = self.reconstruction_task
self.latents_batch = torch.randn(64, self.model.hp.hidden_size)
elif AEReconstructionTask.name in pl_module.tasks:
self.reconstruction_task = self.model.tasks[AEReconstructionTask.name]
self.generation_task = None
def on_train_epoch_end(self, trainer: Trainer, pl_module: BaseModel):
# do something
if self.generation_task:
# Save a batch of fake images after each epoch.
self.generate_samples()
## Reconstruct some samples after each epoch.
# TODO: change this to use an interval instead.
x_batch = None
if x_batch is not None:
self.reconstruct_samples(x_batch)
@torch.no_grad()
def reconstruct_samples(self, data: Tensor):
if not self.reconstruction_task or not self.reconstruction_task.enabled:
return
n = min(data.size(0), 16)
originals = data[:n]
reconstructed = self.reconstruction_task.reconstruct(originals)
comparison = torch.cat([originals, reconstructed])
reconstruction_images_dir = self.model.config.log_dir / "reconstruction"
reconstruction_images_dir.mkdir(parents=True, exist_ok=True)
file_name = reconstruction_images_dir / f"step_{self.trainer.global_step:08d}.png"
comparison = comparison.cpu().detach()
# TODO: Debug this:
# import wandb
# if self.trainer.logger:
# self.trainer.logger.log({"reconstruction": wandb.Image(comparison)})
save_image(comparison, file_name, nrow=n)
@torch.no_grad()
def generate_samples(self):
if not self.generation_task or not self.generation_task.enabled:
return
n = 64
latents = self.latents_batch
fake_samples = self.generation_task.generate(latents)
fake_samples = fake_samples.cpu().reshape(n, *reversed(self.model.setting.dims))
# fake_samples = (fake_samples * 255).astype(np.uint8)
generation_images_dir = self.model.config.log_dir / "generated_samples"
generation_images_dir.mkdir(parents=True, exist_ok=True)
file_name = generation_images_dir / f"step_{self.trainer.global_step:08d}.png"
# import wandb
# if self.model.logger:
# self.model.logger.experiment.log({"generated": wandb.Image(fake_samples)})
save_image(fake_samples, file_name, normalize=True)
logger.debug(f"saved image at path {file_name}")
================================================
FILE: sequoia/common/config/__init__.py
================================================
from .config import Config
from .wandb_config import WandbConfig
================================================
FILE: sequoia/common/config/config.py
================================================
""" Config dataclasses for use with pytorch lightning.
@author Fabrice Normandin (@lebrice)
"""
import os
from dataclasses import dataclass
from pathlib import Path
from typing import Optional
import numpy as np
import torch
from pytorch_lightning import seed_everything
from pyvirtualdisplay import Display
from simple_parsing import Serializable, flag
from sequoia.utils.logging_utils import get_logger
from sequoia.utils.parseable import Parseable
# from .trainer_config import TrainerConfig
logger = get_logger(__name__)
virtual_display = None
@dataclass
class Config(Serializable, Parseable):
"""Configuration options for an experiment.
TODO: This should contain configuration options that are not specific to
either the Setting or the Method, or common to both. For instance, the
random seed, or the log directory, wether CUDA is to be used, etc.
"""
# Directory containing the datasets.
data_dir: Path = Path(os.environ.get("SLURM_TMPDIR", os.environ.get("DATA_DIR", "data")))
# Directory containing the results of an experiment.
log_dir: Path = Path(os.environ.get("RESULTS_DIR", "results"))
# Run in Debug mode: no wandb logging, extra output.
debug: bool = flag(False)
# Wether to render the environment observations. Slows down training.
render: bool = flag(False)
# Enables more verbose logging.
verbose: bool = flag(False)
# Number of workers for the dataloaders.
num_workers: Optional[int] = None
# Random seed.
seed: Optional[int] = None
# Which device to use. Defaults to 'cuda' if available.
device: torch.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
def __post_init__(self):
self.seed_everything()
self._display: Optional[Display] = None
self.rng = np.random.default_rng(self.seed)
self.log_dir = Path(self.log_dir)
self.data_dir = Path(self.data_dir)
def __del__(self):
if self._display:
self._display.stop()
def get_display(self) -> Optional[Display]:
if self._display:
return self._display
if not self.render:
# If `--render` isn't set, then try to create a virtual display.
# This has the same effect as running the script with xvfb-run
try:
virtual_display = Display(visible=False, size=(1366, 768))
virtual_display.start()
self._display = virtual_display
except Exception as e:
logger.warning(
RuntimeWarning(
f"Rendering is disabled, but we were unable to start the "
f"virtual display! {e}\n"
f"Make sure that xvfb is installed on your machine if you "
f"want to prevent rendering the environment's observations."
)
)
return self._display
def seed_everything(self) -> None:
if self.seed is not None:
seed_everything(self.seed)
================================================
FILE: sequoia/common/config/wandb_config.py
================================================
"""TODO: Re-enable the wandb stuff (disabled for now).
"""
import os
import re
from dataclasses import dataclass
from pathlib import Path
from typing import *
from pytorch_lightning.loggers import WandbLogger
from simple_parsing import field, list_field
import wandb
from sequoia.utils.logging_utils import get_logger
from sequoia.utils.serialization import Serializable
def patched_monitor():
vcr = wandb.util.get_module(
"gym.wrappers.monitoring.video_recorder",
required="Couldn't import the gym python package, install with pip install gym",
)
print(f"Using patched version of `wandb.gym.monitor()`")
if hasattr(vcr.ImageEncoder, "orig_close"):
print(f"wandb.gym.monitor() has already been called.")
return
else:
vcr.ImageEncoder.orig_close = vcr.ImageEncoder.close
def close(self):
vcr.ImageEncoder.orig_close(self)
m = re.match(r".+(video\.\d+).+", self.output_path)
if m:
key = m.group(1)
else:
key = "videos"
wandb.log({key: wandb.Video(self.output_path)})
vcr.ImageEncoder.close = close
wandb.patched["gym"].append(["gym.wrappers.monitoring.video_recorder.ImageEncoder", "close"])
import wandb.integration.gym
wandb.integration.gym.monitor = patched_monitor
# GYM_MONITOR = os.environ.get("GYM_MONITOR", "")
# if not GYM_MONITOR:
# wandb.gym.monitor()
# os.environ["GYM_MONITOR"] = "True"
# else:
# assert False, "importing this a second time?"
logger = get_logger(__name__)
@dataclass
class WandbConfig(Serializable):
"""Set of configurations options for calling wandb.init directly."""
# Which user to use
entity: str = ""
# project name to use in wandb.
project: str = ""
# Name used to easily group runs together.
# Used to create a parent folder that will contain the `run_name` directory.
# A unique string shared by all runs in a given group
# Used to create a parent folder that will contain the `run_name` directory.
group: Optional[str] = None
# Wandb run name. If None, will use wandb's automatic name generation
run_name: Optional[str] = None
# Identifier unique to each individual wandb run. When given, will try to
# resume the corresponding run, generates a new ID each time.
run_id: Optional[str] = None
# An run number is used to differentiate different iterations of the same experiment.
# Runs with the same name can be later grouped with wandb to produce stderr plots.
# TODO: Could maybe use the run_id instead?
run_number: Optional[int] = None
# Path where the wandb files should be stored. If the 'WANDB_DIR'
# environment variable is set, uses that value. Otherwise, defaults to
# the value of "/wandb"
wandb_path: Optional[Path] = (
Path(os.environ["WANDB_DIR"]) if "WANDB_DIR" in os.environ else None
)
# Tags to add to this run with wandb.
tags: List[str] = list_field()
# Notes about this particular experiment. (will be logged to wandb if used.)
notes: Optional[str] = None
# Root Logging directory.
log_dir_root: Path = Path("results")
monitor_gym: bool = True
# Wandb api key. Useful for preventing the login prompt from wandb from appearing
# when running on clusters or docker-based setups where the environment variables
# aren't always shared.
wandb_api_key: Optional[Union[str, Path]] = field(
default=os.environ.get("WANDB_API_KEY"),
to_dict=False, # Do not serialize this field.
repr=False, # Do not show this field in repr().
)
# Run offline (data can be streamed later to wandb servers).
offline: bool = False
# Enables or explicitly disables anonymous logging.
anonymous: bool = False
# Sets the version, mainly used to resume a previous run.
version: Optional[str] = None
# Save checkpoints in wandb dir to upload on W&B servers.
log_model: bool = False
# Class variables used to check wether wandb.login has already been called or not.
logged_in: ClassVar[bool] = False
key_configured: ClassVar[bool] = False
@property
def log_dir(self):
return self.log_dir_root.joinpath(
(self.project or ""),
(self.group or ""),
(self.run_name or "default"),
(f"run_{self.run_number}" if self.run_number is not None else ""),
)
def wandb_login(self) -> bool:
"""Calls `wandb.login()`.
Returns
-------
bool
If the key is configured.
"""
key = None
if self.wandb_api_key is not None and self.project:
if Path(self.wandb_api_key).is_file():
key = Path(self.wandb_api_key).read_text()
else:
key = str(self.wandb_api_key)
assert isinstance(key, str)
cls = type(self)
if not cls.logged_in:
cls.key_configured = wandb.login(key=key)
cls.logged_in = True
return cls.key_configured
def wandb_init_kwargs(self) -> Dict:
"""Return the kwargs to pass to wandb.init()"""
if self.run_name is None:
# TODO: Create a run name using the coefficients of the tasks, etc?
# At the moment, if no run name is given, the 'random' name from wandb is used.
pass
if self.wandb_path is None:
self.wandb_path = self.log_dir_root / "wandb"
self.wandb_path.mkdir(parents=True, mode=0o777, exist_ok=True)
return dict(
dir=str(self.wandb_path),
project=self.project,
entity=self.entity,
name=self.run_name,
id=self.run_id,
group=self.group,
notes=self.notes,
reinit=True,
tags=self.tags,
resume="allow",
monitor_gym=self.monitor_gym,
)
def wandb_init(self, config_dict: Dict = None) -> wandb.wandb_run.Run:
"""Executes the call to `wandb.init()`.
TODO(@lebrice): Not sure if it still makes sense to call `wandb.init`
ourselves when using Pytorch Lightning, should probably ask @jeromepl
for advice on this.
Args:
config_dict (Dict): The configuration dictionary. Usually obtained
by calling `to_dict()` on a `Serializable` dataclass, or `asdict()`
on a regular dataclass.
Returns:
wandb.wandb_run.Run: Whatever gets returned by `wandb.init()`.
"""
logger.info(f"Wandb run id: {self.run_id}")
logger.info(
f"Using wandb. Group name: {self.group} run name: {self.run_name}, "
f"log_dir: {self.log_dir}"
)
self.wandb_login()
init_kwargs = self.wandb_init_kwargs()
init_kwargs["config"] = config_dict
run = wandb.init(**init_kwargs)
logger.info(f"Run: {run}")
if run:
if self.run_name is None:
self.run_name = run.name
# run.save()
if run.resumed:
# TODO: add *proper* wandb resuming, probaby by using @nitarshan 's md5 id cool idea.
# wandb.restore(self.log_dir / "checkpoints")
pass
return run
def make_logger(self, wandb_parent_dir: Path = None) -> WandbLogger:
logger.info(f"Creating a WandbLogger with using options {self}.")
self.wandb_login()
wandb_logger = WandbLogger(
name=self.run_name,
save_dir=str(wandb_parent_dir) if wandb_parent_dir else None,
offline=self.offline,
id=self.run_id,
anonymous=self.anonymous,
version=self.version,
project=self.project,
tags=self.tags,
log_model=self.log_model,
entity=self.entity,
group=self.group,
monitor_gym=self.monitor_gym,
reinit=True,
)
return wandb_logger
================================================
FILE: sequoia/common/gym_wrappers/__init__.py
================================================
""" Contains some potentially useful gym wrappers. """
from .add_done import AddDoneToObservation
from .add_info import AddInfoToObservation
from .convert_tensors import ConvertToFromTensors
from .env_dataset import EnvDataset
from .multi_task_environment import MultiTaskEnvironment
from .pixel_observation import PixelObservationWrapper
from .policy_env import PolicyEnv
from .smooth_environment import SmoothTransitions
from .step_callback_wrapper import PeriodicCallback, StepCallback, StepCallbackWrapper
from .transform_wrappers import TransformAction, TransformObservation, TransformReward
from .utils import IterableWrapper, RenderEnvWrapper, has_wrapper
================================================
FILE: sequoia/common/gym_wrappers/action_limit.py
================================================
""" IDEA: same as ObservationLimit, for for the number of total actions (steps).
"""
import gym
from gym.error import ClosedEnvironmentError
from sequoia.utils import get_logger
from .utils import IterableWrapper
logger = get_logger(__name__)
class ActionCounter(IterableWrapper):
"""Wrapper that counts the total number of actions performed so far.
(including those in the individual environments when wrapping a VectorEnv.)
"""
def __init__(self, env: gym.Env):
super().__init__(env=env)
self._action_counter: int = 0
def step_count(self) -> int:
return self._action_counter
def action_count(self) -> int:
return self._action_counter
def step(self, action):
obs, reward, done, info = self.env.step(action)
self._action_counter += self.env.num_envs if self.is_vectorized else 1
return obs, reward, done, info
class ActionLimit(ActionCounter):
"""Closes the env when `max_steps` actions have been performed *in total*.
For vectorized environments, each step consumes up to `num_envs` from this
total budget, i.e. the step counter is incremented by the batch size at
each step.
"""
def __init__(self, env: gym.Env, max_steps: int):
super().__init__(env=env)
self._max_steps = max_steps
self._initial_reset = False
self._is_closed: bool = False
@property
def max_steps(self) -> int:
return self._max_steps
def __len__(self):
return self.max_steps
def closed_error_message(self) -> str:
return f"Env reached max number of steps ({self._max_steps})"
def step(self, action):
if self._action_counter >= self._max_steps:
raise ClosedEnvironmentError(f"Env reached max number of steps ({self._max_steps})")
obs, reward, done, info = super().step(action)
# logger.debug(f"(step {self._action_counter}/{self._max_steps})")
# BUG: If we dont use >=, then iteration with EnvDataset doesn't work.
if self._action_counter >= self._max_steps:
self.close()
# done = True
# info["truncated"] = True
return obs, reward, done, info
================================================
FILE: sequoia/common/gym_wrappers/action_limit_test.py
================================================
from typing import List
import gym
import pytest
from gym.wrappers import TimeLimit
from sequoia.common.gym_wrappers.env_dataset import EnvDataset
from .action_limit import ActionLimit
def test_basics():
env = gym.make("CartPole-v0")
env = ActionLimit(env, max_steps=10)
def test_EnvDataset_of_ActionLimit():
max_episode_steps = 10
max_steps = 100
env = gym.make("CartPole-v0")
env = TimeLimit(env, max_episode_steps=max_episode_steps)
env = ActionLimit(env, max_steps=max_steps)
env = EnvDataset(env)
done = False
episode_steps: List[int] = []
total_steps = 0
for episode in range(15):
print(f"Staring episode {episode}, env.is_closed(): {env.is_closed()}")
step = None
for step, obs in enumerate(env):
print(f"Episode {episode}, Step {step}, obs {obs} {env.is_closed()}")
assert step <= max_episode_steps
env.send(env.action_space.sample())
total_steps += 1
assert step is not None
# NOTE: Here we have the last 'step' as 9.
episode_steps.append(step)
assert total_steps <= max_steps
if total_steps == max_steps:
break
assert env.is_closed()
assert sum(step + 1 for step in episode_steps) == max_steps
@pytest.mark.xfail(
reason="FIXME: Shouldn't use CartPole env for this test since episodes aren't "
"always longer than 10."
)
def test_ActionLimit_of_EnvDataset():
max_episode_steps = 10
max_steps = 100
env = gym.make("CartPole-v0")
env = TimeLimit(env, max_episode_steps=max_episode_steps)
env = EnvDataset(env)
env = ActionLimit(env, max_steps=max_steps)
env.seed(123)
done = False
episode_steps: List[int] = []
for episode in range(10):
print(f"Staring episode {episode}, env.is_closed(): {env.is_closed()}")
step = 0
for step, obs in enumerate(env):
print(f"Episode {episode}, Step {step}, obs {obs} {env.is_closed()}")
assert step <= max_episode_steps
env.send(env.action_space.sample())
assert step > 0
# NOTE: Here we have the last 'step' as 9.
episode_steps.append(step)
assert env.is_closed()
assert sum(step + 1 for step in episode_steps) == max_steps
from sequoia.settings.sl.wrappers.measure_performance_test import with_is_last
@pytest.mark.xfail(
reason=(
"BUG: Why is the BaseMethod working fine on a `TraditionalRLSetting, but "
"not on an IncrementalRLSetting? Seems like the 'max_steps' isn't enforced the "
" same way in both somehow."
)
)
def test_delayed_EnvDataset_of_ActionLimit():
"""Same test as above, however introduce a delay (like what's happening in the pl.Trainer)
between the items sent by the trainer and the rewards returned by the env.
"""
max_episode_steps = 10
max_steps = 100
env = gym.make("CartPole-v0")
env = TimeLimit(env, max_episode_steps=max_episode_steps)
env = EnvDataset(env)
env = ActionLimit(env, max_steps=max_steps)
done = False
episode_steps: List[int] = []
for episode in range(10):
print(f"Staring episode {episode}, env.is_closed(): {env.is_closed()}")
step = 0
for step, (obs, is_last) in enumerate(with_is_last(env)):
print(f"Episode {episode}, Step {step}, obs {obs} {env.is_closed()}")
assert step <= max_episode_steps
env.send(env.action_space.sample())
if step == max_episode_steps - 1:
assert is_last
assert step > 0
# NOTE: Here we have the last 'step' as 9.
episode_steps.append(step)
assert env.is_closed()
assert sum(step + 1 for step in episode_steps) == max_steps
================================================
FILE: sequoia/common/gym_wrappers/add_done.py
================================================
""" Wrapper that adds 'done' as part of the environment's observations.
"""
from dataclasses import is_dataclass, replace
from functools import singledispatch
from typing import Any, Dict, Sequence, Tuple, TypeVar, Union
import gym
import numpy as np
from gym import Space, spaces
from gym.vector.utils import batch_space
from torch import Tensor
from sequoia.common.spaces import TypedDictSpace
from .utils import IterableWrapper
T = TypeVar("T")
Bool = TypeVar("Bool", bound=Union[bool, Sequence[bool]])
K = TypeVar("K")
V = TypeVar("V")
@singledispatch
def add_done(observation: Any, done: Any) -> Any:
"""Generic function that adds the provided `done` value to an observation.
Returns the modified observation, which might not always be of the same type.
"""
if is_dataclass(observation):
return replace(observation, done=done)
raise NotImplementedError(
f"Function add_done has no handler registered for observations of type "
f"{type(observation)}."
)
@add_done.register(int)
@add_done.register(float)
@add_done.register(Tensor)
@add_done.register(np.ndarray)
def _add_done_to_array_obs(observation: T, done: bool) -> Dict[str, Union[T, bool]]:
# TODO: use 'x' or 'observation'?
return {"x": observation, "done": done}
@add_done.register(tuple)
def _add_done_to_tuple_obs(observation: Tuple, done: bool) -> Tuple:
return observation + (done,)
@add_done.register(dict)
def _add_done_to_dict_obs(observation: Dict[K, V], done: bool) -> Dict[K, Union[V, bool]]:
assert "done" not in observation
observation["done"] = done
return observation
@add_done.register
def add_done_to_space(observation: Space, done: Space) -> Space:
"""Adds the space of the 'done' value to the given space.
By default, `done` corresponds to what you'd get from a single
(i.e. non-vectorized) environment.
"""
raise NotImplementedError(
f"No handler registered for spaces of type {type(observation)}. "
f"(value = {observation}, done={done})"
)
@add_done.register(spaces.Discrete)
@add_done.register(spaces.MultiDiscrete)
@add_done.register(spaces.MultiBinary)
@add_done.register(spaces.Box)
def _add_done_to_box_space(observation: Space, done: Space) -> spaces.Dict:
# TODO: Use 'x' or 'observation' as the key?
return TypedDictSpace(
x=observation,
done=done,
)
@add_done.register
def _add_done_to_tuple_space(observation: spaces.Tuple, done: Space) -> spaces.Tuple:
return spaces.Tuple(
[
*observation.spaces,
done,
]
)
@add_done.register
def _add_done_to_dict_space(observation: spaces.Dict, done: Space) -> spaces.Dict:
new_spaces = observation.spaces.copy()
assert "done" not in new_spaces, "space shouldn't already have a 'done' key."
new_spaces["done"] = done
return type(observation)(new_spaces)
class AddDoneToObservation(IterableWrapper):
"""Wrapper that adds the 'done' from step to the
Need to add the 'done' vector to the observation, so we can
get access to the 'end of episode' signal in the shared_step, since
when iterating over the env like a dataloader, the yielded items only
have the observations, and dont have the 'done' vector. (so as to be
consistent with supervised learning).
NOTE: NEVER use this *BEFORE* batching, because of how the 'reset' works in
all VectorEnvs, the observations will always be the 'new' ones, so `done`
(in the obs) will always be False!
"""
def __init__(self, env: gym.Env, done_space: Space = None):
super().__init__(env)
# boolean value. (0 or 1)
if done_space is None:
done_space = spaces.Box(0, 1, (), dtype=np.bool)
if self.is_vectorized:
self.single_observation_space = add_done(self.single_observation_space, done_space)
done_space = batch_space(done_space, self.env.num_envs)
self.done_space = done_space
self.observation_space = add_done(self.env.observation_space, self.done_space)
def reset(self, **kwargs):
observation = self.env.reset()
if self.is_vectorized:
done = self.done_space.low
else:
done = False
return add_done(observation, done)
def step(self, action):
observation, reward, done, info = self.env.step(action)
observation = add_done(observation, done)
return observation, reward, done, info
================================================
FILE: sequoia/common/gym_wrappers/add_info.py
================================================
""" Wrapper that adds the 'info' as a part of the environment's observations.
"""
from dataclasses import is_dataclass, replace
from functools import singledispatch
from typing import Dict, Sequence, Tuple, TypeVar, Union
import gym
import numpy as np
from gym import Space, spaces
from gym.vector import VectorEnv
from gym.vector.utils import batch_space
from torch import Tensor
from .utils import IterableWrapper
Info = TypeVar("Info", bound=Union[Dict, Sequence[Dict]])
K = TypeVar("K")
V = TypeVar("V")
@singledispatch
def add_info(observation, info):
"""Generic function that adds the provided `info` value to an observation.
Returns the modified observation, which might not always be of the same type.
NOTE: Can also be applied to spaces.
"""
if is_dataclass(observation):
# TODO: This assumes that the dataclass already has the 'info' field, if
# that dataclass is frozen.
return replace(observation, info=info)
raise NotImplementedError(
f"Function add_info has no handler registered for inputs of type " f"{type(observation)}."
)
@add_info.register(Tensor)
@add_info.register(np.ndarray)
def _add_info_to_array_obs(observation: np.ndarray, info: Info) -> Tuple[np.ndarray, Info]:
return (observation, info)
@add_info.register(tuple)
def _add_info_to_tuple_obs(observation: Tuple, info: Info) -> Tuple:
return observation + (info,)
@add_info.register(dict)
def _add_info_to_dict_obs(observation: Dict[K, V], info: Info) -> Dict[K, Union[V, Info]]:
assert "info" not in observation
observation["info"] = info
return observation
@add_info.register(spaces.Space)
def add_info_to_space(observation: Space, info: Space) -> Space:
"""Adds the space of the 'info' value from the env to this observation
space.
"""
raise NotImplementedError(
f"No handler registered for spaces of type {type(observation)}. " f"(value = {observation})"
)
@add_info.register
def _add_info_to_box_space(observation: spaces.Box, info: Space) -> spaces.Tuple:
return spaces.Tuple(
[
observation,
info,
]
)
@add_info.register
def _add_info_to_tuple_space(observation: spaces.Tuple, info: Space) -> spaces.Tuple:
return spaces.Tuple(
[
*observation.spaces,
info,
]
)
@add_info.register
def _add_info_to_dict_space(observation: spaces.Dict, info: Space) -> spaces.Dict:
new_spaces = observation.spaces.copy()
assert "info" not in new_spaces, "space shouldn't already have an 'info' key."
new_spaces["info"] = info
return type(observation)(new_spaces)
class AddInfoToObservation(IterableWrapper):
# TODO: Need to add the 'info' dict to the Observation, so we can have
# access to the final observation (which gets stored in the info dict at key
# 'final_state'.
# Do we through?
# TODO: Should we also add the 'final state' to the observations as well?
def __init__(self, env: gym.Env, info_space: spaces.Space = None):
super().__init__(env)
self.is_vectorized = isinstance(env.unwrapped, VectorEnv)
# TODO: Should we make 'info_space' mandatory here?
if info_space is None:
# TODO: There seems to be some issues if we have an empty info space
# before the batching.
info_space = spaces.Dict({})
if self.is_vectorized:
info_space = batch_space(info_space, self.env.num_envs)
self.info_space = info_space
self.observation = add_info(self.env.observation_space, self.info_space)
def reset(self, **kwargs):
observation = self.env.reset()
info = {}
if self.is_vectorized:
info = np.array([{} for _ in range(self.env.num_envs)])
obs = add_info(observation, info)
return obs
def step(self, action):
observation, reward, done, info = self.env.step(action)
observation = add_info(observation, info)
return observation, reward, done, info
================================================
FILE: sequoia/common/gym_wrappers/convert_tensors.py
================================================
from dataclasses import is_dataclass, replace
import dataclasses
from functools import singledispatch, wraps
from typing import Any, Dict, Tuple, TypeVar, Union
import gym
import numpy as np
import torch
from gym import Space, spaces
from torch import Tensor
from sequoia.common.spaces.image import Image, ImageTensorSpace
from sequoia.common.spaces.named_tuple import NamedTupleSpace
from sequoia.common.spaces.typed_dict import TypedDictSpace
from sequoia.utils.generic_functions import from_tensor, move # , to_tensor
from sequoia.utils.logging_utils import get_logger
from .utils import IterableWrapper
@singledispatch
def to_tensor(v, device: torch.device = None) -> Union[Tensor, Any]:
"""Converts `v` into a tensor if `v` is a value, otherwise convert the items of `v` to tensors.
- If `v` is a list, tuple, or dict, then the items are converted to tensors recursively.
- If `v` is a dataclass, converts the fields to Tensors using `to_tensor` recursively.
Otherwise, just uses `torch.as_tensor(v, device=device)`.
"""
if v is None:
return None
if dataclasses.is_dataclass(v):
return type(v)(
**{
field.name: to_tensor(getattr(v, field.name), device=device)
for field in dataclasses.fields(v)
}
)
return torch.as_tensor(v, device=device)
@to_tensor.register(tuple)
def _(
v,
device: torch.device = None,
):
# NOTE: Choosing to convert tuples of things into tuples of tensor things, rather than torch
# tensors.
return tuple(to_tensor(v_i, device=device) for v_i in v)
@to_tensor.register(dict)
def _(v: Dict, device: torch.device = None) -> Dict:
return type(v)(**{k: to_tensor(v_i, device=device) for k, v_i in v.items()})
logger = get_logger(__name__)
T = TypeVar("T")
S = TypeVar("S", bound=Space)
# TODO: Add 'TensorSpace' space which wraps a given space, doing the same kinda thing
# as in Sparse.
class ConvertToFromTensors(IterableWrapper):
"""Wrapper that converts Tensors into samples/ndarrays and vice versa.
Whatever comes into the env is converted into np.ndarrays or samples from
the action space, and whatever comes out of the environment (observations,
rewards, dones, etc.) get converted to Tensors.
Also supports Dict/Tuple/etc observation/action spaces.
Also makes it so the `sample` methods of both the observation and
action spaces return Tensors, and that their `contains` methods also accept
Tensors as an input.
If `device` is given, created Tensors are moved to the provided device.
"""
def __init__(self, env: gym.Env, device: Union[torch.device, str] = None):
super().__init__(env=env)
self.device = device
self.observation_space: Space = add_tensor_support(
self.env.observation_space, device=device
)
self.action_space: Space = add_tensor_support(self.env.action_space, device=device)
self.reward_space: Space
if hasattr(self.env, "reward_space"):
self.reward_space = self.env.reward_space
else:
reward_range = getattr(self.env, "reward_range", (-np.inf, np.inf))
reward_shape: Tuple[int, ...] = ()
if self.is_vectorized:
reward_shape = (self.env.num_envs,)
self.reward_space = spaces.Box(
reward_range[0], reward_range[1], reward_shape, np.float32
)
self.reward_space = add_tensor_support(self.reward_space, device=device)
def reset(self, *args, **kwargs):
obs = self.env.reset(*args, **kwargs)
return self.observation(obs)
def observation(self, observation):
return to_tensor(observation, device=self.device)
def action(self, action):
if isinstance(self.action_space, spaces.MultiDiscrete) and is_dataclass(action):
# TODO: Fixme, the actions don't currently fit their space!
action_np = replace(action, y_pred=from_tensor(self.action_space, action.y_pred))
# FIXME: for now, unwrapping the actions
action = action_np["y_pred"]
return action
return from_tensor(self.action_space, action)
def reward(self, reward):
return to_tensor(reward, device=self.device)
def step(self, action):
action = self.action(action)
assert action in self.env.action_space, (action, self.env.action_space)
result = self.env.step(action)
observation, reward, done, info = result
observation = self.observation(observation)
reward = self.reward(reward)
# NOTE: Not sure this is useful, actually!
# done = torch.as_tensor(done, device=self.device)
# We could actually do this!
# info = np.ndarray(info)
return observation, reward, done, info
def supports_tensors(space: S) -> bool:
# TODO: Remove this, instead use a generic function
return getattr(space, "_supports_tensors", False)
def has_tensor_support(space: S) -> bool:
return supports_tensors(space)
def _mark_supports_tensors(space: S) -> None:
# TODO: Remove this!
setattr(space, "_supports_tensors", True)
@singledispatch
def add_tensor_support(space: S, device: torch.device = None) -> S:
"""Modifies `space` so its `sample()` method produces Tensors, and its
`contains` method also accepts Tensors.
For Dict and Tuple spaces, all the subspaces are also modified recursively.
Returns the modified Space.
"""
# Save the original methods so we can use them.
sample = space.sample
contains = space.contains
if supports_tensors(space):
# logger.debug(f"Space {space} already supports Tensors.")
return space
@wraps(space.sample)
def _sample(*args, **kwargs):
samples = sample(*args, **kwargs)
samples = to_tensor(space, samples)
if device:
samples = move(samples, device)
return samples
@wraps(space.contains)
def _contains(x: Union[Tensor, Any]) -> bool:
x = from_tensor(space, x)
return contains(x)
space.sample = _sample
space.contains = _contains
_mark_supports_tensors(space)
assert has_tensor_support(space)
return space
@add_tensor_support.register
def _(space: Image, device: torch.device = None) -> Image:
tensor_box = TensorBox(
space.low, space.high, shape=space.shape, dtype=space.dtype, device=device
)
return ImageTensorSpace.from_box(tensor_box)
@add_tensor_support.register
def _(space: spaces.Dict, device: torch.device = None) -> spaces.Dict:
space = type(space)(
**{key: add_tensor_support(value, device=device) for key, value in space.spaces.items()}
)
# TODO: Remove this '_mark_supports_tensors' and instead use a generic function.
_mark_supports_tensors(space)
return space
@add_tensor_support.register
def _(space: TypedDictSpace, device: torch.device = None) -> TypedDictSpace:
space = type(space)(
{key: add_tensor_support(value, device=device) for key, value in space.spaces.items()},
dtype=space.dtype,
)
_mark_supports_tensors(space)
return space
@add_tensor_support.register(NamedTupleSpace)
def _(space: Dict, device: torch.device = None) -> Dict:
space = type(space)(
**{key: add_tensor_support(value, device=device) for key, value in space.items()},
dtype=space.dtype,
)
_mark_supports_tensors(space)
return space
@add_tensor_support.register(spaces.Tuple)
def _(space: Dict, device: torch.device = None) -> Dict:
space = type(space)([add_tensor_support(value, device=device) for value in space.spaces])
_mark_supports_tensors(space)
return space
# TODO: Should this be moved to the place where these are defined instead?
from sequoia.common.spaces.tensor_spaces import TensorBox, TensorDiscrete, TensorMultiDiscrete
@add_tensor_support.register
def _(space: spaces.Box, device: torch.device = None) -> spaces.Box:
space = TensorBox(space.low, space.high, shape=space.shape, dtype=space.dtype, device=device)
_mark_supports_tensors(space)
return space
@add_tensor_support.register
def _(space: spaces.Discrete, device: torch.device = None) -> spaces.Box:
space = TensorDiscrete(n=space.n, device=device)
_mark_supports_tensors(space)
return space
@add_tensor_support.register
def _(space: spaces.MultiDiscrete, device: torch.device = None) -> spaces.Box:
space = TensorMultiDiscrete(nvec=space.nvec, device=device)
_mark_supports_tensors(space)
return space
================================================
FILE: sequoia/common/gym_wrappers/convert_tensors_test.py
================================================
from typing import Union
import gym
import pytest
import torch
from gym import spaces
from torch import Tensor
from sequoia.conftest import skipif_param
from .convert_tensors import ConvertToFromTensors, add_tensor_support
@pytest.mark.parametrize(
"device",
[
None,
"cpu",
skipif_param(
not torch.cuda.is_available(),
"cuda",
reason="Cuda is required for this test",
),
],
)
def test_convert_tensors_wrapper(device: Union[str, torch.device]):
env_name = "CartPole-v0"
env = gym.make(env_name)
env = ConvertToFromTensors(env, device=device)
obs = env.reset()
assert isinstance(obs, Tensor)
if device:
assert obs.device.type == device
action = env.action_space.sample()
obs, reward, done, info = env.step(torch.as_tensor(action))
assert isinstance(obs, Tensor)
assert isinstance(reward, Tensor)
# TODO: Not quite sure this is the best thing to do:
# assert isinstance(done, Tensor) # not sure this is useful!
if device:
assert obs.device.type == device
assert reward.device.type == device
# assert done.device.type == device
from dataclasses import dataclass
from typing import Optional
from sequoia.common.batch import Batch
from sequoia.common.spaces import NamedTupleSpace, TypedDictSpace
@dataclass(frozen=True)
class Foo(Batch):
x: Tensor
task_labels: Optional[Tensor]
def test_preserves_dtype_of_namedtuple_space():
input_space = NamedTupleSpace(
x=spaces.Box(0, 1, [32, 123, 123, 3]),
task_labels=spaces.MultiDiscrete([5 for _ in range(32)]),
dtype=Foo,
)
output_space = add_tensor_support(input_space)
assert output_space.dtype is input_space.dtype
def test_preserves_dtype_of_typeddict_space():
input_space = TypedDictSpace(
x=spaces.Box(0, 1, [32, 123, 123, 3]),
task_labels=spaces.MultiDiscrete([5 for _ in range(32)]),
dtype=Foo,
)
output_space = add_tensor_support(input_space)
assert output_space.dtype is input_space.dtype
================================================
FILE: sequoia/common/gym_wrappers/env_dataset.py
================================================
""" Creates an IterableDataset from a Gym Environment.
"""
import warnings
from typing import Dict, Generic, Iterable, Iterator, Optional, Sequence, Tuple, TypeVar, Union
import gym
from gym.vector import VectorEnv
from torch import Tensor
from torch.utils.data import IterableDataset
from sequoia.utils.logging_utils import get_logger
from .utils import ActionType
from .utils import MayCloseEarly as CloseableWrapper
from .utils import ObservationType, RewardType, StepResult
# from sequoia.settings.base.objects import Observations, Rewards, Actions
logger = get_logger(__name__)
Item = TypeVar("Item")
class EnvDataset(
CloseableWrapper,
IterableDataset,
Generic[ObservationType, ActionType, RewardType, Item],
Iterable[Item],
):
"""Wrapper that exposes a Gym environment as an IterableDataset.
This makes it possible to iterate over a gym env with an Active DataLoader.
One pass through __iter__ is one episode. The __iter__ method can be called
at most `max_episodes` times.
"""
def __init__(
self,
env: gym.Env,
max_steps: Optional[int] = None,
max_episodes: Optional[int] = None,
max_steps_per_episode: Optional[int] = None,
):
# TODO: Remove these options
if max_steps:
from .action_limit import ActionLimit
env = ActionLimit(env, max_steps=max_steps)
self._max_steps = max_steps
if max_episodes:
from .episode_limit import EpisodeLimit
env = EpisodeLimit(env, max_episodes=max_episodes)
self._max_episodes = max_episodes
super().__init__(env=env)
if isinstance(env.unwrapped, VectorEnv):
if not max_steps_per_episode:
warnings.warn(
UserWarning(
"Iterations through the dataset (episodes) could be "
"infinitely long, since the env is a VectorEnv and "
"max_steps_per_episode wasn't given!"
)
)
# Maximum number of episodes
# self._max_episodes = None
# Maximum number of steps per iteration.
# self._max_steps = None
self._max_steps_per_episode = max_steps_per_episode
# Number of steps performed in the current episode.
self.n_steps_in_episode_: int = 0
# Total number of steps performed so far.
self.n_steps_: int = 0
# Number of episodes performed in the environment.
# Starts at -1 so the initial was_reset doesn't count as the end of an episode.
self.n_episodes_: int = 0
# Number of times the `send` method was called.
self.n_sends_: int = 0
self.observation_: Optional[ObservationType] = None
self.action_: Optional[ActionType] = None
self.reward_: Optional[RewardType] = None
self.done_: Optional[Union[bool, Sequence[bool]]] = None
self.info_: Optional[Union[Dict, Sequence[Dict]]] = None
self.closed_: bool = False
self.reset_: bool = False
self.current_step_result_: StepResult = None
self.previous_step_result_: StepResult = None
def reset_counters(self):
self.n_steps_ = 0
self.n_episodes_ = 0
self.n_sends_ = 0
self.n_steps_in_episode_ = 0
def observation(self, observation):
return observation
def action(self, action):
return action
def reward(self, reward):
return reward
def step(self, action) -> StepResult:
if self.closed_ or self.is_closed():
if self.reached_episode_limit:
raise gym.error.ClosedEnvironmentError(
f"Env has already reached episode limit ({self._max_episodes}) and is closed."
)
elif self.reached_step_limit:
raise gym.error.ClosedEnvironmentError(
f"Env has already reached step limit ({self._max_steps}) and is closed."
)
else:
raise gym.error.ClosedEnvironmentError(
f"Can't call step on closed env. ({self.n_steps_})"
)
# Here we add calls to the (potentially overwritten) 'observation',
# 'action' and 'reward' methods.
action = self.action(action)
if isinstance(action, Tensor) and action.requires_grad:
action = action.detach()
observation, reward, done, info = super().step(action)
observation = self.observation(observation)
reward = self.reward(reward)
self.n_steps_ += 1
self.n_steps_in_episode_ += 1
result = StepResult(observation, reward, done, info)
self.previous_step_result_ = self.current_step_result_
self.current_step_result_ = result
return result
def __next__(
self,
) -> Tuple[ObservationType, Union[bool, Sequence[bool]], Union[Dict, Sequence[Dict]]]:
"""Produces the next observations, or raises StopIteration.
Returns
-------
Tuple[ObservationType, Union[bool, Sequence[bool]], Union[Dict, Sequence[Dict]]]
[description]
Raises
------
gym.error.ClosedEnvironmentError
If the env is already closed.
gym.error.ResetNeeded
If the env hasn't been reset before this is called.
StopIteration
When the step limit has been reached.
StopIteration
When the episode limit has been reached.
RuntimeError
When an action wasn't passed through 'send', and a default policy
isn't set.
"""
# logger.debug(f"__next__ is being called at step {self.n_steps_}.")
if self.closed_:
raise gym.error.ClosedEnvironmentError("Env is closed.")
if self.reached_episode_limit:
logger.debug("Reached episode limit, raising StopIteration.")
raise StopIteration
if self.reached_step_limit:
logger.debug("Reached step limit, raising StopIteration.")
raise StopIteration
if self.reached_episode_length_limit:
logger.debug("Reached episode length limit, raising StopIteration.")
raise StopIteration
if not self.reset_:
raise gym.error.ResetNeeded("Need to reset the env before you can call __next__")
if self.action_ is None:
raise RuntimeError("You have to send an action using send() between every observation.")
if hasattr(self.action_, "detach"):
self.action_ = self.action_.detach()
self.observation_, self.reward_, self.done_, self.info_ = self.step(self.action_)
return self.observation_
def send(self, action: ActionType) -> RewardType:
"""Sends an action to the environment, returning a reward.
This can raise the same errors as calling __next__, namely,
StopIteration, ResetNeeded, raise an error when if not called without
"""
assert action is not None, "Don't send a None action!"
self.action_ = action
self.observation_, self.reward_, self.done_, self.info_ = self.step(action)
# self.observation_ = self.__next__()
self.n_sends_ += 1
return self.reward_
def __iter__(self) -> Iterator[ObservationType]:
"""Iterator for an episode in the environment, which uses the 'active
dataset' style with __iter__ and send.
TODO: BUG: Wrappers applied on top of the EnvDataset won't have an
effect on the values yielded by this iterator. Currently trying to fix
this inside the IterableWrapper base class, but it's not that simple.
TODO: To allow wrappers to also be iterable, we need to rename all the
"private" attributes to "public" names, so that they can call something
like:
type(self.env).__iter__(self) (from within the wrapper).
Yields
-------
Observations
Observations from the environment.
Raises
------
RuntimeError
[description]
"""
if self.closed_ or self.is_closed():
if self.reached_episode_limit:
raise gym.error.ClosedEnvironmentError(
f"Env has already reached episode limit ({self._max_episodes}) and is closed."
)
elif self.reached_step_limit:
raise gym.error.ClosedEnvironmentError(
f"Env has already reached step limit ({self._max_steps}) and is closed."
)
else:
raise gym.error.ClosedEnvironmentError(f"Env is closed, can't iterate over it.")
# First step reset automatically before iterating, if needed.
if not self.reset_:
self.observation_ = self.reset()
self.done_ = False
self.action_ = None
self.reward_ = None
assert self.observation_ is not None
# Yield the first observation_.
# TODO: What do we want to yield, actually? Just observations?
yield self.observation_
if self.action_ is None:
raise RuntimeError(
f"You have to send an action using send() between every "
f"observation. (env = {self})"
)
# logger.debug(f"episode {self.n_episodes_}/{self._max_episodes}")
while not any(
[
self.done_is_true(),
self.reached_step_limit,
self.reached_episode_length_limit,
self.is_closed(),
]
):
# logger.debug(f"step {self.n_steps_}/{self._max_steps}, (episode {self.n_episodes_})")
# Set those to None to force the user to call .send()
self.action_ = None
self.reward_ = None
yield self.observation_
if self.action_ is None:
raise RuntimeError(
f"You have to send an action using send() between every "
f"observation. (env = {self})"
)
# Force the user to call reset() between episodes.
self.reset_ = False
self.n_episodes_ += 1
# logger.debug(f"self.n_steps: {self.n_steps_} self.n_episodes: {self.n_episodes_}")
# logger.debug(f"Reached step limit: {self.reached_step_limit}")
# logger.debug(f"Reached episode limit: {self.reached_episode_limit}")
# logger.debug(f"Reached episode length limit: {self.reached_episode_length_limit}")
if self.reached_episode_limit or self.reached_step_limit:
logger.debug("Done iterating, closing the env.")
self.close()
@property
def reached_step_limit(self) -> bool:
if self._max_steps is None:
return False
return self.n_steps_ >= self._max_steps
@property
def reached_episode_limit(self) -> bool:
if self._max_episodes is None:
return False
return self.n_episodes_ >= self._max_episodes
@property
def reached_episode_length_limit(self) -> bool:
if self._max_steps_per_episode is None:
return False
return self.n_steps_in_episode_ >= self._max_steps_per_episode
# @property
def done_is_true(self) -> bool:
"""Returns wether self.done_ is True.
This will always return False if the wrapped env is a VectorEnv,
regardless of if the some of the values in the self.done_ array are
true. This is because the VectorEnvs already reset the underlying envs
when they have done=True.
Returns
-------
bool
Wether the episode is considered "done" based on self.done_.
"""
if isinstance(self.done_, bool):
return self.done_
if isinstance(self.env.unwrapped, VectorEnv):
# VectorEnvs reset themselves, so we consider the "_done" as False,
# regarless
return False
if isinstance(self.done_, Tensor) and not self.done_.shape:
return bool(self.done_)
raise RuntimeError(
f"'done' should be a single boolean, but got "
f"{self.done_} of type {type(self.done_)})"
)
raise RuntimeError(f"Can't tell if we're done: self.done_={self.done_}")
def reset(self, **kwargs) -> ObservationType:
observation = self.env.reset(**kwargs)
self.observation_ = self.observation(observation)
self.reset_ = True
self.n_steps_in_episode_ = 0
# self.n_episodes_ += 1
return self.observation_
def close(self) -> None:
# This will stop the iterator on the next step.
# self._max_steps = 0
self.closed_ = True
self.action_ = None
self.observation_ = None
self.reward_ = None
super().close()
# TODO: calling `len` on an RL environment probably shouldn't work! (it should
# behave the same exact way as an IterableDataset)
# def __len__(self) -> Optional[int]:
# if self._max_steps is None:
# raise RuntimeError(f"The dataset has no length when max_steps is None.")
# return self._max_steps
def __add__(self, other):
from sequoia.utils.generic_functions import concatenate
return concatenate(self, other)
================================================
FILE: sequoia/common/gym_wrappers/env_dataset_test.py
================================================
from functools import partial
from typing import ClassVar, Type
import gym
import numpy as np
import pytest
from gym import spaces
from sequoia.common.transforms import Transforms
from sequoia.conftest import DummyEnvironment, atari_py_required
from sequoia.settings.rl.continual.make_env import make_batched_env
from .env_dataset import EnvDataset
from .transform_wrappers import TransformObservation
class TestEnvDataset:
# NOTE: We do this so that other tests for potential subclasses or wrappers around
# an env dataset can reuse this while changing the type of wrapper used (for example
# in the tests for `EnvProxy`).
EnvDataset: ClassVar[Type[EnvDataset]] = EnvDataset
@pytest.fixture()
def dummy_env_fn(self):
return DummyEnvironment
def test_step_normally_works_fine(self, dummy_env_fn: Type[DummyEnvironment]):
env = dummy_env_fn()
env = self.EnvDataset(env)
env.seed(123)
obs = env.reset()
assert obs == 0
obs, reward, done, info = env.step(0)
assert (obs, reward, done, info) == (0, 5, False, {})
obs, reward, done, info = env.step(1)
assert (obs, reward, done, info) == (1, 4, False, {})
obs, reward, done, info = env.step(1)
assert (obs, reward, done, info) == (2, 3, False, {})
obs, reward, done, info = env.step(2)
assert (obs, reward, done, info) == (1, 4, False, {})
obs, reward, done, info = env.step(1)
assert (obs, reward, done, info) == (2, 3, False, {})
obs, reward, done, info = env.step(1)
assert (obs, reward, done, info) == (3, 2, False, {})
obs, reward, done, info = env.step(1)
assert (obs, reward, done, info) == (4, 1, False, {})
obs, reward, done, info = env.step(1)
assert (obs, reward, done, info) == (5, 0, True, {})
env.reset()
obs, reward, done, info = env.step(0)
assert (obs, reward, done, info) == (0, 5, False, {})
def test_iterating_with_send(self, dummy_env_fn: Type[DummyEnvironment]):
env = dummy_env_fn(target=5)
env = self.EnvDataset(env)
env.seed(123)
actions = [0, 1, 1, 2, 1, 1, 1, 1, 0, 0, 0]
expected_obs = [0, 0, 1, 2, 1, 2, 3, 4, 5]
expected_rewards = [5, 4, 3, 4, 3, 2, 1, 0]
expected_dones = [False, False, False, False, False, False, False, True]
reset_obs = 0
# obs = env.reset()
# assert obs == reset_obs
n_calls = 0
for i, observation in enumerate(env):
print(f"Step {i}: batch: {observation}")
assert observation == expected_obs[i]
action = actions[i]
reward = env.send(action)
assert reward == expected_rewards[i]
# TODO: The episode will end as soon as 'done' is encountered, which means
# that we will never be given the 'final' observation. In this case, the
# DummyEnvironment will set done=True when the state is state = target = 5
# in this case.
assert observation == 4
def test_raise_error_when_missing_action(self, dummy_env_fn: Type[DummyEnvironment]):
env = dummy_env_fn()
with self.EnvDataset(env) as env:
env.reset()
env.seed(123)
with pytest.raises(RuntimeError):
for i, observation in zip(range(5), env):
pass
def test_doesnt_raise_error_when_action_sent(self, dummy_env_fn: Type[DummyEnvironment]):
env = dummy_env_fn()
with self.EnvDataset(env) as env:
env.reset()
env.seed(123)
for i, obs in zip(range(5), env):
assert obs in env.observation_space
reward = env.send(env.action_space.sample())
def test_max_episodes(self):
max_episodes = 3
env = self.EnvDataset(
env=gym.make("CartPole-v0"),
max_episodes=max_episodes,
)
env.seed(123)
for episode in range(max_episodes):
# This makes use of the fact that given this seed, the episode should only
# last a set number of frames.
for i, observation in enumerate(env):
print(f"step {i} {observation}")
action = 0
reward = env.send(action)
if i >= 50:
assert False, "The episode should never be longer than about 10 steps!"
with pytest.raises(gym.error.ClosedEnvironmentError):
for i, observation in enumerate(env):
print(f"step {i} {observation}")
env.send(env.action_space.sample())
def test_max_steps(self):
epochs = 3
max_steps = 5
env = self.EnvDataset(
env=gym.make("CartPole-v0"),
max_steps=max_steps,
)
all_rewards = []
all_observations = []
with env:
# TODO: Should we could what is given back by 'reset' as an observation?
all_observations.append(env.reset())
for i, batch in enumerate(env):
assert i < max_steps, f"Max steps should have been respected: {i}"
rewards = env.send(env.action_space.sample())
all_rewards.append(rewards)
assert len(all_rewards) == max_steps
with pytest.raises(gym.error.ClosedEnvironmentError):
env.reset()
with pytest.raises(gym.error.ClosedEnvironmentError):
for i in range(10):
print(i)
observation = next(env)
rewards = env.send(env.action_space.sample())
all_rewards.append(rewards)
assert len(all_rewards) == max_steps
def test_max_steps_per_episode(self):
n_episodes = 4
max_steps_per_episode = 5
env = self.EnvDataset(
env=gym.make("CartPole-v0"),
max_steps_per_episode=max_steps_per_episode,
)
all_observations = []
with env:
for episode in range(n_episodes):
env.reset()
for i, batch in enumerate(env):
assert (
i < max_steps_per_episode
), f"Max steps per episode should have been respected: {i}"
rewards = env.send(env.action_space.sample())
assert i == max_steps_per_episode - 1
@pytest.mark.parametrize("env_name", ["CartPole-v0"])
@pytest.mark.parametrize("batch_size", [1, 2, 5, 10])
def test_not_setting_max_steps_per_episode_with_vector_env_raises_warning(
self, env_name: str, batch_size: int
):
from functools import partial
from gym.vector import SyncVectorEnv
env = SyncVectorEnv([partial(gym.make, env_name) for i in range(batch_size)])
with pytest.warns(UserWarning):
dataset = self.EnvDataset(env)
env.close()
@atari_py_required
def test_observation_wrapper_applies_to_yielded_objects(self):
"""Test that when an TransformObservation wrapper (or any wrapper that
changes the Observations) is applied on the env, the observations that are
yielded by the GymDataLoader are also transformed, in the same way as those
returned by step() or reset().
"""
env_name = "ALE/Breakout-v5"
batch_size = 10
num_workers = 4
max_steps_per_episode = 100
wrapper = partial(TransformObservation, f=Transforms.channels_first)
vector_env = make_batched_env(env_name, batch_size=batch_size, num_workers=num_workers)
env = self.EnvDataset(vector_env, max_steps_per_episode=max_steps_per_episode)
assert env.observation_space == spaces.Box(0, 255, (10, 210, 160, 3), np.uint8)
env = TransformObservation(env, f=Transforms.channels_first)
# env = wrapper(env)
assert env.observation_space == spaces.Box(0, 255, (10, 3, 210, 160), np.uint8)
# env = DummyWrapper(env)
# assert env.observation_space == spaces.Box(0, 255 // 2, (10, 210, 160, 3), np.uint8)
print("Before reset")
reset_obs = env.reset()
assert reset_obs in env.observation_space
print("Before step")
step_obs, _, _, _ = env.step(env.action_space.sample())
assert step_obs in env.observation_space
# We need to send an action before we can do this.
action = env.action_space.sample()
print(f"Before send")
reward = env.send(action)
# TODO: Perhaps going to drop this API, because if really complicates the
# wrappers.
print("Before __next__")
next_obs = next(env)
assert next_obs.shape == env.observation_space.shape
assert next_obs in env.observation_space
print(f"Before iterating")
# TODO: This still doesn't call the right .observation() method!
for i, iter_obs in zip(range(3), env):
assert iter_obs.shape == env.observation_space.shape
assert iter_obs in env.observation_space
action = env.action_space.sample()
reward = env.send(action)
env.close()
@atari_py_required
def test_iteration_with_more_than_one_wrapper(self):
"""Same as above, but with more than one wrapper applied on top of the
EnvDataset.
"""
env_name = "ALE/Breakout-v5"
batch_size = 10
num_workers = 4
max_steps_per_episode = 100
vector_env = make_batched_env(env_name, batch_size=batch_size, num_workers=num_workers)
env = self.EnvDataset(vector_env, max_steps_per_episode=max_steps_per_episode)
assert env.observation_space == spaces.Box(0, 255, (10, 210, 160, 3), np.uint8)
env = TransformObservation(env, f=Transforms.channels_first)
assert env.observation_space == spaces.Box(0, 255, (10, 3, 210, 160), np.uint8)
env = TransformObservation(env, f=[Transforms.to_tensor, Transforms.resize_64x64])
assert env.observation_space == spaces.Box(0, 1.0, (10, 3, 64, 64), np.float32)
# env = DummyWrapper(env)
# assert env.observation_space == spaces.Box(0, 255 // 2, (10, 210, 160, 3), np.uint8)
print("Before reset")
reset_obs = env.reset().numpy()
assert reset_obs in env.observation_space
print("Before step")
step_obs, _, _, _ = env.step(env.action_space.sample())
assert step_obs.numpy() in env.observation_space
# We need to send an action before we can do this.
action = env.action_space.sample()
print(f"Before send")
reward = env.send(action)
print("Before __next__")
next_obs = next(env).numpy()
assert next_obs in env.observation_space
print(f"Before iterating")
# TODO: This still doesn't call the right .observation() method!
for i, iter_obs in zip(range(3), env):
assert iter_obs.shape == env.observation_space.shape
assert iter_obs.numpy() in env.observation_space
action = env.action_space.sample()
reward = env.send(action)
env.close()
================================================
FILE: sequoia/common/gym_wrappers/episode_limit.py
================================================
# IDEA: Limit the total number of episodes, even in vectorized
# environments!
import warnings
from typing import Sequence, Union
import gym
import numpy as np
from gym.error import ClosedEnvironmentError
from gym.utils import colorize
from sequoia.utils import get_logger
from .utils import IterableWrapper
logger = get_logger(__name__)
class EpisodeCounter(IterableWrapper):
"""Closes the environment when a given number of episodes is performed.
NOTE: This also applies to vectorized environments, i.e. the episode counter
is incremented for when every individual environment reaches the end of an
episode.
"""
def __init__(self, env: gym.Env):
super().__init__(env=env)
self._episode_counter: int = 0 # -1 to account for the initial reset?
self._done: Union[bool, Sequence[bool]] = False
if self.is_vectorized:
self._done = np.zeros(self.env.num_envs, dtype=bool)
self._initial_reset: bool = False
def episode_count(self) -> int:
return self._episode_counter
def reset(self):
obs = super().reset()
if self._episode_counter >= self._max_episodes:
raise ClosedEnvironmentError(
f"Env reached max number of episodes ({self._max_episodes})"
)
if self.is_vectorized:
if not self._initial_reset:
self._initial_reset = True
self._episode_counter = 0
else:
# Resetting all envs.
n_unfinished_envs: int = (self._done == False).sum()
self._episode_counter += n_unfinished_envs
self._done[:] = False
else:
# Increment every time for non-vectorized env, or just once for
# VectorEnvs.
self._episode_counter += 1
return obs
def step(self, action):
obs, reward, done, info = self.env.step(action)
if self.is_vectorized:
self._episode_counter += (done == True).sum()
else:
# NOTE: We don't increment the episode counter based on `done` here
# with non-vectorized environments. Instead, we cound the number of
# calls to the `reset()` method.
pass
# if done:
# self._episode_counter += 1
return obs, reward, done, info
class EpisodeLimit(EpisodeCounter):
"""Closes the environment when a given number of episodes is performed.
NOTE: This also applies to vectorized environments, i.e. the episode counter
is incremented for when every individual environment reaches the end of an
episode.
"""
def __init__(self, env: gym.Env, max_episodes: int):
super().__init__(env=env)
self._max_episodes = max_episodes
@property
def max_episodes(self) -> int:
return self._max_episodes
def closed_error_message(self) -> str:
"""Return the error message to use when attempting to use the closed env.
This can be useful for wrappers that close when a given condition is reached,
e.g. a number of episodes has been performed, which could return a more relevant
message here.
"""
return f"Env reached max number of episodes ({self.max_episodes})"
def reset(self):
# NOTE: MayCloseEarly.reset() will raise a ClosedEnvironmentError if
# self.is_closed() is True, which will always be the case if we exceed the
# limit.
obs = super().reset()
assert not self.is_closed()
if self.is_vectorized:
n_unfinished_envs: int = (~self._done).sum()
if self._episode_counter != 0 and n_unfinished_envs:
# Wasting some steps in unfinished environments!
w = UserWarning(
f"Calling .reset() on a VectorEnv resets all the envs, "
f"ending episodes prematurely. This env has a limit of "
f"{self._max_episodes} episodes in total, so by calling "
f"reset() here, you could be wasting {n_unfinished_envs} "
f"episodes from your budget!"
)
warnings.warn(colorize(f"WARN: {w}", "yellow"))
logger.debug(f"Starting episode {self._episode_counter}/{self._max_episodes})")
if self._episode_counter == self._max_episodes:
logger.warning("Beware, entering last episode")
return obs
def __iter__(self):
return super().__iter__()
def step(self, action):
if self.is_closed():
if self._episode_counter >= self._max_episodes:
raise ClosedEnvironmentError(
f"Env reached max number of episodes ({self._max_episodes})"
)
raise ClosedEnvironmentError("Can't step through closed env.")
obs, reward, done, info = super().step(action)
if self.is_vectorized:
# BUG: This can be reached while in the last 'send' (which uses self.send)
# of the previous epoch while iterating
if any(done) and self._episode_counter >= self.max_episodes:
logger.info(f"Closing the envs since we reached the max number of episodes.")
self.close()
done[:] = True
else:
if done and self._episode_counter == self._max_episodes:
logger.info(f"Closing the env since we reached the max number of episodes.")
self.close()
return obs, reward, done, info
================================================
FILE: sequoia/common/gym_wrappers/episode_limit_test.py
================================================
from functools import partial
import gym
import numpy as np
import pytest
from gym.vector import SyncVectorEnv
from gym.wrappers import TimeLimit
from sequoia.conftest import DummyEnvironment
from .env_dataset import EnvDataset
from .episode_limit import EpisodeLimit
def test_basics():
env = TimeLimit(gym.make("CartPole-v0"), max_episode_steps=10)
env = EnvDataset(env)
env = EpisodeLimit(env, max_episodes=3)
env.seed(123)
for episode in range(3):
obs = env.reset()
done = False
step = 0
while not done:
print(f"step {step}")
obs, reward, done, info = env.step(env.action_space.sample())
step += 1
assert env.is_closed()
with pytest.raises(gym.error.ClosedEnvironmentError):
_ = env.reset()
with pytest.raises(gym.error.ClosedEnvironmentError):
_ = env.step(env.action_space.sample())
with pytest.raises(gym.error.ClosedEnvironmentError):
for _ in env:
break
@pytest.mark.parametrize("env_name", ["CartPole-v0"])
def test_episode_limit_with_single_env(env_name: str):
"""EpisodeLimit should close the env when a given number of episodes is
reached.
"""
env = gym.make(env_name)
env = EpisodeLimit(env, max_episodes=3)
env.seed(123)
done = False
assert env.episode_count() == 0
# First episode.
obs = env.reset()
while not done:
obs, reward, done, info = env.step(env.action_space.sample())
assert env.episode_count() == 1
# Second episode.
obs = env.reset()
done = False
while not done:
obs, reward, done, info = env.step(env.action_space.sample())
assert env.episode_count() == 2
# Third episode.
obs = env.reset()
done = False
while not done:
obs, reward, done, info = env.step(env.action_space.sample())
assert env.episode_count() == 3
assert env.is_closed()
with pytest.raises(gym.error.ClosedEnvironmentError):
obs = env.reset()
with pytest.raises(gym.error.ClosedEnvironmentError):
_ = env.step(env.action_space.sample())
@pytest.mark.parametrize("env_name", ["CartPole-v0"])
def test_episode_limit_with_single_env_dataset(env_name: str):
"""EpisodeLimit should close the env when a given number of episodes is
reached when iterating through the env.
"""
env = gym.make(env_name)
env = EpisodeLimit(env, max_episodes=2)
env = EnvDataset(env)
# TODO: The reverse ordering doesn't work: (EnvDataset(EpisodeLimit))
# TODO: There's a warning that doing this steps even though done = True?
env.seed(123)
done = False
# First episode.
for obs in env:
print("in loop:", env.episode_count())
reward = env.send(env.action_space.sample())
print("between loops", env.episode_count())
# Second episode.
for i, obs in enumerate(env):
print("Second loop", env.episode_count())
reward = env.send(env.action_space.sample())
# Trying to start a third episode should fail:
with pytest.raises(gym.error.ClosedEnvironmentError):
env.reset()
for obs in env:
assert False
@pytest.mark.parametrize("batch_size", [3, 5])
def test_episode_limit_with_vectorized_env(batch_size):
"""Test that when adding the EpisodeLimit wrapper on top of a vectorized
environment, the episode limit is with respect to each individual env rather
than the batched env.
"""
starting_values = [0 for i in range(batch_size)]
targets = [10 for i in range(batch_size)]
env = SyncVectorEnv(
[
partial(DummyEnvironment, start=start, target=target, max_value=10 * 2)
for start, target in zip(starting_values, targets)
]
)
env = EpisodeLimit(env, max_episodes=2 * batch_size)
obs = env.reset()
assert obs.tolist() == starting_values
print("reset obs: ", obs)
for i in range(10):
print(i, obs)
actions = np.ones(batch_size)
obs, reward, done, info = env.step(actions)
# all episodes end at step 10
assert all(done)
# Because of how VectorEnvs work, the obs are the new 'reset' obs, rather
# than the final obs in the episode.
assert obs.tolist() == starting_values
assert obs.tolist() == starting_values
print("reset obs: ", obs)
for i in range(10):
print(i, obs)
actions = np.ones(batch_size)
obs, reward, done, info = env.step(actions)
# all episodes end at step 10
assert all(done)
assert env.is_closed
assert obs.tolist() == starting_values
with pytest.raises(gym.error.ClosedEnvironmentError):
actions = np.ones(batch_size)
obs, reward, done, info = env.step(actions)
# @pytest.mark.xfail(reason="TODO: Fix the bugs in the interaction between "
# "EnvDataset and EpisodeLimit.")
@pytest.mark.parametrize("batch_size", [3, 5])
def test_episode_limit_with_vectorized_env_dataset(batch_size):
"""Test that when adding the EpisodeLimit wrapper on top of a vectorized
environment, the episode limit is with respect to each individual env rather
than the batched env.
"""
start = 0
target = 10
starting_values = [start for i in range(batch_size)]
targets = [target for i in range(batch_size)]
env = SyncVectorEnv(
[
partial(DummyEnvironment, start=start, target=target, max_value=10 * 2)
for start, target in zip(starting_values, targets)
]
)
max_episodes = 2
# TODO: For some reason the reverse order doesn't work!
env = EpisodeLimit(env, max_episodes=max_episodes * batch_size)
env = EnvDataset(env)
for i, obs in enumerate(env):
print(i, obs)
actions = np.ones(batch_size)
reward = env.send(actions)
assert i == max_episodes * target - 1
with pytest.raises(gym.error.ClosedEnvironmentError):
env.reset()
with pytest.raises(gym.error.ClosedEnvironmentError):
for i, obs in enumerate(env):
print(i, obs)
actions = np.ones(batch_size)
reward = env.send(actions)
# all episodes end at step 10
# @pytest.mark.xfail(reason=f"BUG in EnvDataset, it doesn't finish ")
@pytest.mark.parametrize("batch_size", [3, 5])
def test_reset_vectorenv_with_unfinished_episodes_raises_warning(batch_size):
"""Test that when adding the EpisodeLimit wrapper on top of a vectorized
environment, the episode limit is with respect to each individual env rather
than the batched env.
"""
start = 0
target = 10
starting_values = [start for i in range(batch_size)]
targets = [target for i in range(batch_size)]
env = SyncVectorEnv(
[
partial(DummyEnvironment, start=start, target=target, max_value=10 * 2)
for start, target in zip(starting_values, targets)
]
)
env = EpisodeLimit(env, max_episodes=3 * batch_size)
obs = env.reset()
_ = env.step(env.action_space.sample())
_ = env.step(env.action_space.sample())
with pytest.warns(UserWarning) as record:
env.reset()
================================================
FILE: sequoia/common/gym_wrappers/measure_performance.py
================================================
""" Abstract base class for a Wrapper that gets applied onto the environment in order to
measure the online training performance.
The concrete versions of this wrapper are located.
"""
from abc import ABC
from typing import Dict, Generic, List, Optional
from sequoia.common.gym_wrappers.utils import EnvType, IterableWrapper
from sequoia.common.metrics import MetricsType
from sequoia.settings.base import Environment
class MeasurePerformanceWrapper(IterableWrapper[EnvType], Generic[EnvType, MetricsType], ABC):
def __init__(self, env: Environment):
super().__init__(env)
self._metrics: Dict[int, MetricsType] = {}
def get_online_performance(self) -> Dict[int, List[MetricsType]]:
"""Returns the online performance over the evaluation period.
Returns
-------
Dict[int, MetricsType]
A dict mapping from step number to the Metrics object captured at that step.
"""
return dict(self._metrics.copy())
def get_average_online_performance(self) -> Optional[MetricsType]:
"""Returns the average online performance over the evaluation period, or None
if the env was not iterated over / interacted with.
Returns
-------
Optional[MetricsType]
Metrics
"""
if not self._metrics:
return None
return sum(self._metrics.values())
================================================
FILE: sequoia/common/gym_wrappers/multi_task_environment.py
================================================
import bisect
import dataclasses
from functools import singledispatch
from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple, Type, TypeVar, Union
import gym
import numpy as np
from gym import spaces
from gym.envs.classic_control import CartPoleEnv
from torch import Tensor
from sequoia.common.spaces.named_tuple import NamedTupleSpace
from sequoia.utils.logging_utils import get_logger
from .utils import MayCloseEarly
task_param_names: Dict[Union[Type[gym.Env], str], List[str]] = {
CartPoleEnv: ["gravity", "masscart", "masspole", "length", "force_mag", "tau"]
# TODO: Add more of the classic control envs here.
}
logger = get_logger(__name__)
X = TypeVar("X")
T = TypeVar("T")
K = TypeVar("K")
V = TypeVar("V")
def make_env_attributes_task(
env: gym.Env,
task_params: Union[List[str], Dict[str, Any]],
seed: int = None,
rng: np.random.Generator = None,
noise_std: float = 0.2,
) -> Dict[str, Any]:
task: Dict[str, Any] = {}
rng: np.random.Generator = rng or np.random.default_rng(seed)
if isinstance(task_params, list):
task_params = {param: getattr(env.unwrapped, param) for param in task_params}
for attribute, default_value in task_params.items():
new_value = default_value
if isinstance(default_value, (int, float, np.ndarray)):
new_value *= rng.normal(1.0, noise_std)
# Clip the value to be in the [0.1*default, 10*default] range.
new_value = max(0.1 * default_value, new_value)
new_value = min(10 * default_value, new_value)
if isinstance(default_value, int):
new_value = round(new_value)
elif isinstance(default_value, bool):
new_value = rng.choice([True, False])
else:
raise NotImplementedError(
f"TODO: Don't yet know how to sample a random value for "
f"attribute {attribute} with default value {default_value} of type "
f" {type(default_value)}."
)
task[attribute] = new_value
return task
# class ObservationsAndTaskLabels(NamedTuple):
# x: Any
# task_labels: Any
@singledispatch
def add_task_labels(observation: Any, task_labels: Any) -> Any:
raise NotImplementedError(observation, task_labels)
@add_task_labels.register(int)
@add_task_labels.register(float)
@add_task_labels.register(Tensor)
@add_task_labels.register(np.ndarray)
def _add_task_labels_to_single_obs(observation: X, task_labels: T) -> Tuple[X, T]:
return {
"x": observation,
"task_labels": task_labels,
}
# return ObservationsAndTaskLabels(observation, task_labels)
from sequoia.common.batch import Batch
@add_task_labels.register(Batch)
def _add_task_labels_to_batch(observation: Batch, task_labels: T) -> Batch:
return dataclasses.replace(observation, task_labels=task_labels)
from sequoia.common.spaces import TypedDictSpace
@add_task_labels.register(spaces.Space)
def _add_task_labels_to_space(observation: spaces.Space, task_labels: T) -> spaces.Dict:
# TODO: Return a dict or NamedTuple at some point:
return TypedDictSpace(
x=observation,
task_labels=task_labels,
)
# return NamedTupleSpace(
# x=observation, task_labels=task_labels, dtype=ObservationsAndTaskLabels,
# )
@add_task_labels.register(NamedTupleSpace)
def _add_task_labels_to_namedtuple(
observation: NamedTupleSpace, task_labels: gym.Space
) -> NamedTupleSpace:
assert "task_labels" not in observation._spaces, "space already has task labels!"
return type(observation)(
**observation._spaces, task_labels=task_labels, dtype=observation.dtype
)
@add_task_labels.register(spaces.Tuple)
@add_task_labels.register(tuple)
def _add_task_labels_to_tuple(observation: Tuple, task_labels: T) -> Tuple:
return type(observation)([*observation, task_labels])
@add_task_labels.register(spaces.Dict)
def _add_task_labels_to_dict_space(observation: spaces.Dict, task_labels: T) -> spaces.Dict:
assert "task_labels" not in observation.spaces
d_spaces = observation.spaces.copy()
d_spaces["task_labels"] = task_labels
return type(observation)(**d_spaces)
@add_task_labels.register(TypedDictSpace)
def _add_task_labels_to_typed_dict_space(
observation: TypedDictSpace, task_labels: T
) -> TypedDictSpace:
# TODO: Raise a warning instead?
# assert "task_labels" not in observation.spaces, observation
d_spaces = observation.spaces.copy()
d_spaces["task_labels"] = task_labels
# NOTE: We assume here that the `dtype` of the typed dict space (e.g. the
# `Observations` class, usually) can handle having a `task_labels` field.
return type(observation)(**d_spaces, dtype=observation.dtype)
@add_task_labels.register(dict)
def _add_task_labels_to_dict(observation: Dict[str, V], task_labels: T) -> Dict[str, Union[V, T]]:
new: Dict[str, Union[V, T]] = {key: value for key, value in observation.items()}
# TODO: Raise a warning instead?
# assert "task_labels" not in new
new["task_labels"] = task_labels
return type(observation)(**new) # type: ignore
class MultiTaskEnvironment(MayCloseEarly):
"""Creates 'tasks' by modifying attributes or applying functions to the wrapped env.
This wrapper accepts a `task_schedule` dictionary, which maps from a given
step to either:
- dicts of attributes that are to be set on the (unwrapped) env at that step, or
- callables to apply to the wrapped environment at the given steps.
For example, when wrapping the "CartPole-v0" environment, we could vary any
of the "gravity", "masscart", "masspole", "length", "force_mag" or "tau"
attributes like so:
```
env = gym.make("CartPole-v0")
env = MultiTaskEnvironment(env, task_schedule={
# step -> attributes to set on the environment when step is reached.
10: dict(length=2.0),
20: dict(length=1.0, gravity=20.0),
30: dict(length=0.5, gravity=5.0),
})
env.seed(123)
env.reset()
```
During steps 0-9, the environment is unchanged (length = 0.5).
At step 10, the length of the pole will be set to 2.0
At step 20, the length of the pole will be set to 1.0, and the gravity will
be changed from its default value (9.8) to 20.
etc.
TODO: Might be more accurate to call this a `TaskIncrementalEnvironment`, rather
than `MultiTaskEnvironemnt`, which is more related to the `new_random_task_on_reset`
behaviour anyway.
TODOs:
- Copy this to a `incremental_environment.py` or something similar
- Remove all references to this `new_random_task_on_reset` stuff.
- Rename "smooth_environment" to "nonstationary_environment"?
"""
def __init__(
self,
env: gym.Env,
task_schedule: Dict[int, Union[Dict[str, float], Callable[[gym.Env], Any]]] = None,
task_params: List[str] = None,
noise_std: float = 0.2,
add_task_dict_to_info: bool = False,
add_task_id_to_obs: bool = False,
new_random_task_on_reset: bool = False,
starting_step: int = 0,
nb_tasks: int = None,
max_steps: int = None,
seed: int = None,
):
"""Wraps an environment, allowing it to be 'multi-task'.
NOTE: Assumes that all the attributes in 'task_param_names' are floats
for now.
TODO: Check the case where a task boundary is reached and the episode is not
done yet.
Args:
env (gym.Env): The environment to wrap.
task_param_names (List[str], optional): The attributes of the
environment that will be allowed to change. Defaults to None.
task_schedule (Dict[int, Dict[str, float]], optional): Schedule
mapping from a given step number to the state that will be set
at that time.
noise_std (float, optional): The standard deviation of the noise
used to create the different tasks.
"""
super().__init__(env=env)
self.env: gym.Env
self.noise_std = noise_std
if not task_params:
unwrapped_type = type(env.unwrapped)
if unwrapped_type in task_param_names:
task_params = task_param_names[unwrapped_type]
elif task_schedule:
if not any(isinstance(v, dict) for v in task_schedule.values()):
task_params: List[str] = None
for value in task_schedule.values():
if not isinstance(value, dict):
continue
if task_params is None:
task_params = list(value.keys())
elif not task_params == list(value.keys()):
raise NotImplementedError(
"All tasks need to have the same keys for now."
)
else:
logger.warning(
UserWarning(
f"You didn't pass any 'task params', and the task "
f"parameters aren't known for this type of environment "
f"({unwrapped_type}), so we can't make it multi-task with "
f"this wrapper."
)
)
self._max_steps: Optional[int] = max_steps
self._starting_step: int = starting_step
self._steps: int = self._starting_step
self._episodes: int = 0
self._current_task: Dict = {}
self._task_schedule: Dict[int, Dict[str, Any]] = task_schedule or {}
self.task_params: List[str] = task_params or []
self.default_task: np.ndarray = self.current_task.copy()
self.task_schedule = task_schedule or {}
self.new_random_task_on_reset: bool = new_random_task_on_reset
# Wether we will add a task id to the observation.
self.add_task_id_to_obs = add_task_id_to_obs
# Wether we will add the task dict (the values of the attributes) to the
# 'info' dict.
self.add_task_dict_to_info = add_task_dict_to_info
if 0 not in self.task_schedule:
self.task_schedule[0] = self.default_task
# TODO: Need to do a major refactor of this wrapper.
# Need to clean this up: passing the task schedule to the env and having it "mean" different
# things depending on the value other arguments (discrete vs continuous, etc) is very ugly.
nb_tasks = nb_tasks if nb_tasks is not None else len(self.task_schedule)
if self.add_task_id_to_obs:
self.observation_space = add_task_labels(
self.env.observation_space,
spaces.Discrete(n=nb_tasks),
)
# self.observation_space = spaces.Tuple([
# self.env.observation_space,
# spaces.Discrete(n=n_tasks)
# ])
# self._closed = False
self._on_task_switch_callback: Optional[Callable[[int], None]] = None
self.np_random: np.random.Generator
self.seed(seed)
@property
def current_task_id(self) -> int:
"""Returns the 'index' of the current task within the task schedule."""
if self.new_random_task_on_reset:
# The task id is the index of the key that corresponds to the current task.
return self._current_task_id
current_step = self._steps
assert current_step >= 0
task_steps: List[int] = sorted(self.task_schedule.keys())
assert 0 in task_steps
insertion_index = bisect.bisect_right(task_steps, current_step)
# The current task id is the insertion index - 1
current_task_index = insertion_index - 1
return current_task_index
@current_task_id.setter
def current_task_id(self, value: int) -> None:
self._current_task_id = value
def set_on_task_switch_callback(self, callback: Callable[[int], None]) -> None:
self._on_task_switch_callback = callback
def on_task_switch(self, task_id: int):
if task_id != self.current_task_id:
logger.debug(f"Switching from {self.current_task_id} -> {task_id}.")
# TODO: We could maybe use this to call the method's 'on_task_switch'
# callback?
if self._on_task_switch_callback:
self._on_task_switch_callback(task_id)
def step(self, *args, **kwargs):
# If we reach a step in the task schedule, then we change the task to
# that given step.
# if self._closed:
# raise gym.error.ClosedEnvironmentError("Can't step in closed env.")
if self.steps in self.task_schedule and not self.new_random_task_on_reset:
self.current_task = self.task_schedule[self.steps]
logger.debug(f"New task at step {self.steps}: {self.current_task}")
# Adding this on_task_switch, since it could maybe be easier than
# having to add a callback wrapper to use.
task_id = sorted(self.task_schedule.keys()).index(self.steps)
self.on_task_switch(task_id)
# elif self.new_random_task_on_reset:
# self.current_task_id
observation, rewards, done, info = super().step(*args, **kwargs)
if self.add_task_id_to_obs:
observation = add_task_labels(observation, self.current_task_id)
if self.add_task_dict_to_info:
info.update(self.current_task)
self.steps += 1
return observation, rewards, done, info
# def close(self, **kwargs) -> None:
# return super().close(**kwargs)
def reset(self, new_random_task: bool = None, **kwargs):
"""Resets the wrapped environment.
If `new_random_task` is True, this also sets a new random task as the
current task.
NOTE: This resets the wrapped env, but doesn't reset the number of steps
taken, hence the 'task' progression according to the task_schedule
doesn't change.
"""
if new_random_task is None:
new_random_task = self.new_random_task_on_reset
# if self._closed:
# raise gym.error.ClosedEnvironmentError("Can't reset closed env.")
if new_random_task:
prev_task_id = self.current_task_id
previous_task = self.current_task
self.current_task = self.random_task()
episode = self._episodes
step = self._steps
if previous_task != self.current_task:
logger.debug(
f"Switching tasks at step {step} (end of episode {episode}): "
f"{prev_task_id} -> {self.current_task_id} {self.current_task}"
)
observation = self.env.reset(**kwargs)
if self.add_task_id_to_obs:
observation = add_task_labels(observation, self.current_task_id)
self._episodes += 1
return observation
@property
def steps(self) -> int:
return self._steps
@steps.setter
def steps(self, value: int) -> None:
if value < self._starting_step:
value = self._starting_step
if self._max_steps is not None and value > self._max_steps:
# Reached the maximum number of steps, stagnate.
# TODO: What exactly should we do in this case? Should we close
# the env? Or just stay at the same 'step' in the task schedule
# forever?
# TODO: Is this the "correct" way to limit the number of steps in
# an environment?
value = self._max_steps
self._steps = value
@property
def current_task(self) -> Dict[str, Any]:
# NOTE: This caching mechanism assumes that we are the only source
# of potential change for these attributes.
# At the moment, We're not really concerned with performance, so we
# could turn it off it if misbehaves or causes bugs.
if not self._current_task:
# NOTE: We get the attributes from the unwrapped environment, which
# effectively bypasses any wrappers. Don't know if this is good
# practice, but oh well.
self._current_task = {
name: getattr(self.env.unwrapped, name) for name in self.task_params
}
# Double-checking that the attributes didn't change somehow without us
# knowing.
# TODO: Maybe remove this when done debugging/testing this since it's a
# little bit of a waste of compute.
for attribute, value_in_dict in self._current_task.items():
current_env_value = getattr(self.env.unwrapped, attribute)
if value_in_dict != current_env_value:
raise RuntimeError(
f"The value of the attribute '{attribute}' was changed from "
f"somewhere else! (value in _current_task: {value_in_dict}, "
f"value on env: {current_env_value})"
)
return self._current_task
@current_task.setter
def current_task(self, task: Union[Dict[str, float], Sequence[float], Callable]):
# logger.debug(f"(_step: {self.steps}): Setting the current task to {task}.")
if isinstance(task, (list, np.ndarray)):
assert len(task) == len(self.task_params), "lengths should match!"
task_dict = {}
for k, value in zip(self.task_params, task):
task_dict[k] = value
task = task_dict
if task in self.task_schedule.values():
self._current_task_id = [
i for i, (k, v) in enumerate(self.task_schedule.items()) if v == task
][0]
# assert False, f"Hey, this task is in the values at index {self._current_task_id}"
if callable(task):
task(self.env)
elif isinstance(task, dict):
self._current_task.clear()
self._current_task.update(self.default_task)
if isinstance(task, dict):
for k, value in task.items():
assert isinstance(k, str), "The task dict should have str keys."
self._current_task[k] = value
# Actually change the value of the task attributes in the environment.
for name, param_value in self._current_task.items():
assert hasattr(
self.env.unwrapped, name
), f"the unwrapped environment doesn't have a {name} attribute!"
setattr(self.env.unwrapped, name, param_value)
else:
raise RuntimeError(
f"don't know how to set task {task}! (tasks must be "
f"either callables or dicts mapping attributes to "
f"values. "
)
def random_task(self) -> Dict:
"""Samples a random 'task'.
If the wrapper already has a task schedule, then one of the tasks (values of the
task schedule dict) is selected at random.
How the random value for an attribute is sampled depends on the type of
its default value in the envionment:
- `int`, `float`, or `np.ndarray` attributes are sampled by multiplying
the default value by a N(mean=1., std=`self.noise_std`). `int`
attributes are then rounded to the nearest value.
- `bool` attributes are sampled randomly from `True` and `False`.
TODO: It might be cool to give an option for passing a prior that could
be used for a given attribute, but it would add a bit too much
complexity and isn't really needed atm.
Raises:
NotImplementedError: If the default value has an unsupported type.
Returns:
Dict: A dict of the attribute name, and the value that would be set
for that attribute.
"""
if self.new_random_task_on_reset:
return self.np_random.choice(list(self.task_schedule.values()))
return make_env_attributes_task(
self,
task_params=self.default_task,
rng=self.np_random,
noise_std=self.noise_std,
)
def update_task(self, values: Dict = None, **kwargs):
"""Updates the current task with the params from values or kwargs.
Important: Use this method to update properties of the current task,
instead of trying modifying the `current_task` dictionary. For example,
`env.current_task["length"] = 2.0` will NOT update the length of
the pole in CartPole, whereas using `env.update_task(length=2.0)` will!
NOTE: When passing a dictionary, any missing param is kept at its
current value (not reset to the default value).
"""
current_task = self.current_task.copy()
if isinstance(values, dict):
current_task.update(values)
elif values is not None:
raise RuntimeError(f"values can only be a dict or None (received {values}).")
if kwargs:
current_task.update(kwargs)
self.current_task = current_task
def seed(self, seed: Optional[int] = None) -> List[int]:
self.np_random = np.random.default_rng(seed)
self.action_space.seed(seed)
self.observation_space.seed(seed)
return self.env.seed(seed)
def task_dict(self, task_array: np.ndarray) -> Dict[str, float]:
assert len(task_array) == len(
self.task_params
), "Lengths should match the number of task parameters."
return dict(zip(self.task_params, task_array))
@property
def task_schedule(self) -> Dict:
return self._task_schedule
@task_schedule.setter
def task_schedule(self, value: Dict[str, Any]):
self._task_schedule = {}
if 0 not in value:
self._task_schedule[0] = self.default_task.copy()
for step, task in sorted(value.items()):
# Convert any numpy arrays or lists in the task schedule to dicts
# mapping from attribute name to value to be set.
if isinstance(task, (list, np.ndarray)):
task = self.task_dict(task)
if not (isinstance(task, dict) or callable(task)):
raise RuntimeError(
f"Task schedule can only contain dicts, lists, numpy arrays or"
f"callables, but got {task}!"
)
self._task_schedule[step] = task
if self._steps in self._task_schedule:
self.current_task = self._task_schedule[self._steps]
================================================
FILE: sequoia/common/gym_wrappers/multi_task_environment_test.py
================================================
from typing import Dict, List, Tuple
import gym
import matplotlib.pyplot as plt
import pytest
from gym import spaces
from gym.envs.classic_control import CartPoleEnv
from gym.vector import SyncVectorEnv
from gym.wrappers import TimeLimit
from sequoia.common.gym_wrappers import MultiTaskEnvironment
from sequoia.conftest import atari_py_required, monsterkong_required, param_requires_monsterkong
from sequoia.utils.utils import dict_union
from .multi_task_environment import MultiTaskEnvironment
supported_environments: List[str] = ["CartPole-v0"]
def test_task_schedule():
original: CartPoleEnv = gym.make("CartPole-v0")
starting_length = original.length
starting_gravity = original.gravity
task_schedule = {
10: dict(length=0.1),
20: dict(length=0.2, gravity=-12.0),
30: dict(gravity=0.9),
}
env = MultiTaskEnvironment(original, task_schedule=task_schedule)
env.seed(123)
env.reset()
for step in range(100):
_, _, done, _ = env.step(env.action_space.sample())
# env.render()
if done:
env.reset()
if 0 <= step < 10:
assert env.length == starting_length and env.gravity == starting_gravity
elif 10 <= step < 20:
assert env.length == 0.1
elif 20 <= step < 30:
assert env.length == 0.2 and env.gravity == -12.0
elif step >= 30:
assert env.length == starting_length and env.gravity == 0.9
env.close()
@pytest.mark.parametrize("environment_name", supported_environments)
def test_multi_task(environment_name: str):
original = gym.make(environment_name)
env = MultiTaskEnvironment(original)
env.reset()
env.seed(123)
plt.ion()
default_task = env.default_task
for task_id in range(5):
for i in range(20):
observation, reward, done, info = env.step(env.action_space.sample())
# env.render()
env.reset(new_random_task=True)
print(f"New task: {env.current_task}")
env.close()
plt.ioff()
plt.close()
@pytest.mark.skip(reason="This generates some output, uncomment this to run it.")
@pytest.mark.parametrize("environment_name", supported_environments)
def test_monitor_env(environment_name):
original = gym.make(environment_name)
# original = CartPoleEnv()
env = MultiTaskEnvironment(original)
env = gym.wrappers.Monitor(
env,
f"recordings/multi_task_{environment_name}",
force=True,
write_upon_reset=False,
)
env.seed(123)
env.reset()
plt.ion()
task_param_values: List[Dict] = []
default_length: float = env.length
for task_id in range(20):
for i in range(100):
observation, reward, done, info = env.step(env.action_space.sample())
# env.render()
if done:
env.reset(new_task=False)
task_param_values.append(env.current_task.copy())
# env.update_task(length=(i + 1) / 100 * 2 * default_length)
env.update_task()
print(f"New task: {env.current_task.copy()}")
env.close()
plt.ioff()
plt.close()
def test_update_task():
"""Test that using update_task changes the given values in the environment
and in the current_task dict, and that when a value isn't passed to
update_task, it isn't reset to its default but instead keeps its previous
value.
"""
original = gym.make("CartPole-v0")
env = MultiTaskEnvironment(original)
env.reset()
env.seed(123)
assert env.length == original.length
env.update_task(length=1.0)
assert env.current_task["length"] == env.length == 1.0
env.update_task(gravity=20.0)
assert env.length == 1.0
assert env.current_task["gravity"] == env.gravity == 20.0
env.close()
def test_add_task_dict_to_info():
"""Test that the 'info' dict contains the task dict."""
original: CartPoleEnv = gym.make("CartPole-v0")
starting_length = original.length
starting_gravity = original.gravity
task_schedule = {
10: dict(length=0.1),
20: dict(length=0.2, gravity=-12.0),
30: dict(gravity=0.9),
}
env = MultiTaskEnvironment(
original,
task_schedule=task_schedule,
add_task_dict_to_info=True,
)
env.seed(123)
env.reset()
for step in range(100):
_, _, done, info = env.step(env.action_space.sample())
# env.render()
if done:
env.reset()
if 0 <= step < 10:
assert env.length == starting_length and env.gravity == starting_gravity
assert info == env.default_task
elif 10 <= step < 20:
assert env.length == 0.1
assert info == dict_union(env.default_task, task_schedule[10])
elif 20 <= step < 30:
assert env.length == 0.2 and env.gravity == -12.0
assert info == dict_union(env.default_task, task_schedule[20])
elif step >= 30:
assert env.length == starting_length and env.gravity == 0.9
assert info == dict_union(env.default_task, task_schedule[30])
env.close()
def test_add_task_id_to_obs():
"""Test that the 'info' dict contains the task dict."""
original: CartPoleEnv = gym.make("CartPole-v0")
starting_length = original.length
starting_gravity = original.gravity
task_schedule = {
10: dict(length=0.1),
20: dict(length=0.2, gravity=-12.0),
30: dict(gravity=0.9),
}
env = MultiTaskEnvironment(
original,
task_schedule=task_schedule,
add_task_id_to_obs=True,
)
env.seed(123)
env.reset()
assert env.observation_space == spaces.Dict(
x=original.observation_space,
task_labels=spaces.Discrete(4),
)
for step in range(100):
obs, _, done, info = env.step(env.action_space.sample())
# env.render()
x, task_id = obs["x"], obs["task_labels"]
if 0 <= step < 10:
assert env.length == starting_length and env.gravity == starting_gravity
assert task_id == 0, step
elif 10 <= step < 20:
assert env.length == 0.1
assert task_id == 1, step
elif 20 <= step < 30:
assert env.length == 0.2 and env.gravity == -12.0
assert task_id == 2, step
elif step >= 30:
assert env.length == starting_length and env.gravity == 0.9
assert task_id == 3, step
if done:
obs = env.reset()
assert isinstance(obs, dict)
env.close()
def test_starting_step_and_max_step():
"""Test that when start_step and max_step arg given, the env stays within
the [start_step, max_step] portion of the task schedule.
"""
original: CartPoleEnv = gym.make("CartPole-v0")
starting_length = original.length
starting_gravity = original.gravity
task_schedule = {
10: dict(length=0.1),
20: dict(length=0.2, gravity=-12.0),
30: dict(gravity=0.9),
}
env = MultiTaskEnvironment(
original,
task_schedule=task_schedule,
add_task_id_to_obs=True,
starting_step=10,
max_steps=19,
)
env.seed(123)
env.reset()
assert env.observation_space == spaces.Dict(
x=original.observation_space,
task_labels=spaces.Discrete(4),
)
# Trying to set the 'steps' to something smaller than the starting step
# doesn't work.
env.steps = -123
assert env.steps == 10
# Trying to set the 'steps' to something greater than the max_steps
# doesn't work.
env.steps = 50
assert env.steps == 19
# Here we reset the steps to 10, and also check that this works.
env.steps = 10
assert env.steps == 10
for step in range(0, 100):
# The environment started at an offset of 10.
assert env.steps == max(min(step + 10, 19), 10)
obs, _, done, info = env.step(env.action_space.sample())
# env.render()
x, task_id = obs["x"], obs["task_labels"]
# Check that we're always stuck between 10 and 20
assert 10 <= env.steps < 20
assert env.length == 0.1
assert task_id == 1, step
if done:
print(f"Resetting on step {step}")
obs = env.reset()
assert isinstance(obs, dict)
env.close()
@atari_py_required
def test_task_id_is_added_even_when_no_known_task_schedule():
"""Test that even when the env is unknown or there are no task params, the
task_id is still added correctly and is zero at all times.
"""
# Breakout doesn't have default task params.
original: CartPoleEnv = gym.make("ALE/Breakout-v5")
env = MultiTaskEnvironment(
original,
add_task_id_to_obs=True,
)
env.seed(123)
env.reset()
assert env.observation_space == spaces.Dict(
x=original.observation_space,
task_labels=spaces.Discrete(1),
)
for step in range(0, 100):
obs, _, done, info = env.step(env.action_space.sample())
# env.render()
x, task_id = obs["x"], obs["task_labels"]
assert task_id == 0
if done:
x, task_id = env.reset()
assert task_id == 0
env.close()
@monsterkong_required
def test_task_schedule_monsterkong():
env: MetaMonsterKongEnv = gym.make("MetaMonsterKong-v1")
from gym.wrappers import TimeLimit
env = TimeLimit(env, max_episode_steps=10)
env = MultiTaskEnvironment(
env,
task_schedule={
0: {"level": 0},
100: {"level": 1},
200: {"level": 2},
300: {"level": 3},
400: {"level": 4},
},
add_task_id_to_obs=True,
)
obs = env.reset()
img, task_labels = obs["x"], obs["task_labels"]
assert task_labels == 0
assert env.get_level() == 0
for i in range(500):
obs, reward, done, info = env.step(env.action_space.sample())
assert obs["task_labels"] == i // 100
assert env.level == i // 100
env.render()
assert isinstance(done, bool)
if done:
print(f"End of episode at step {i}")
obs = env.reset()
assert obs["task_labels"] == 4
assert env.level == 4
# level stays the same even after reaching that objective.
for i in range(500):
obs, reward, done, info = env.step(env.action_space.sample())
assert obs["task_labels"] == 4
assert env.level == 4
env.render()
if done:
print(f"End of episode at step {i}")
obs = env.reset()
env.close()
@monsterkong_required
def test_task_schedule_with_callables():
"""Apply functions to the env at a given step."""
env: MetaMonsterKongEnv = gym.make("MetaMonsterKong-v1")
from gym.wrappers import TimeLimit
env = TimeLimit(env, max_episode_steps=10)
from operator import methodcaller
env = MultiTaskEnvironment(
env,
task_schedule={
0: methodcaller("set_level", 0),
100: methodcaller("set_level", 1),
200: methodcaller("set_level", 2),
300: methodcaller("set_level", 3),
400: methodcaller("set_level", 4),
},
add_task_id_to_obs=True,
)
obs = env.reset()
# img, task_labels = obs
assert obs["task_labels"] == 0
assert env.get_level() == 0
for i in range(500):
obs, reward, done, info = env.step(env.action_space.sample())
assert obs["task_labels"] == i // 100
assert env.level == i // 100
env.render()
assert isinstance(done, bool)
if done:
print(f"End of episode at step {i}")
obs = env.reset()
assert obs["task_labels"] == 4
assert env.level == 4
# level stays the same even after reaching that objective.
for i in range(500):
obs, reward, done, info = env.step(env.action_space.sample())
assert obs["task_labels"] == 4
assert env.level == 4
env.render()
if done:
print(f"End of episode at step {i}")
obs = env.reset()
@monsterkong_required
def test_random_task_on_each_episode():
env: MetaMonsterKongEnv = gym.make("MetaMonsterKong-v1")
from gym.wrappers import TimeLimit
env = TimeLimit(env, max_episode_steps=10)
env = MultiTaskEnvironment(
env,
task_schedule={
0: {"level": 0},
5: {"level": 1},
200: {"level": 2},
300: {"level": 3},
400: {"level": 4},
},
add_task_id_to_obs=True,
new_random_task_on_reset=True,
)
task_labels = []
for i in range(10):
obs = env.reset()
task_labels.append(obs["task_labels"])
assert len(set(task_labels)) > 1
# Episodes only last 10 steps. Tasks don't have anything to do with the task
# schedule.
obs = env.reset()
start_task_label = obs["task_labels"]
for i in range(10):
obs, reward, done, info = env.step(env.action_space.sample())
assert obs["task_labels"] == start_task_label
if i == 9:
assert done
else:
assert not done
env.close()
from sequoia.conftest import monsterkong_required
def test_random_task_on_each_episode_and_only_one_task_in_schedule():
"""BUG: When the goal is to have only one task, it instead keeps sampling a new
task from the 'distribution', in the case of cartpole!
"""
env: MetaMonsterKongEnv = gym.make("CartPole-v1")
from gym.wrappers import TimeLimit
env = TimeLimit(env, max_episode_steps=10)
env = MultiTaskEnvironment(
env,
task_schedule={
0: {"length": 0.1},
},
add_task_id_to_obs=True,
new_random_task_on_reset=True,
)
task_labels = []
lengths = []
for i in range(10):
obs = env.reset()
task_labels.append(obs["task_labels"])
lengths.append(env.length)
done = False
while not done:
obs, reward, done, info = env.step(env.action_space.sample())
task_labels.append(obs["task_labels"])
lengths.append(env.length)
assert set(task_labels) == {0}
assert set(lengths) == {0.1}
def env_fn_monsterkong() -> gym.Env:
env = gym.make("MetaMonsterKong-v0")
env = TimeLimit(env, max_episode_steps=10)
env = MultiTaskEnvironment(
env,
task_schedule={
0: {"level": 1},
100: {"level": 2},
200: {"level": 3},
300: {"level": 4},
400: {"level": 5},
},
add_task_id_to_obs=True,
new_random_task_on_reset=True,
)
return env
def env_fn_cartpole() -> gym.Env:
env = gym.make("CartPole-v0")
env = TimeLimit(env, max_episode_steps=10)
env = MultiTaskEnvironment(
env,
task_schedule={
0: {"length": 0.1},
100: {"length": 0.2},
200: {"length": 0.3},
300: {"length": 0.4},
400: {"length": 0.5},
},
add_task_id_to_obs=True,
new_random_task_on_reset=True,
)
return env
@pytest.mark.parametrize("env_id", ["cartpole", param_requires_monsterkong("monsterkong")])
def test_task_sequence_is_reproducible(env_id: str):
"""Test that the multi-task setup is seeded correctly, i.e. that the task sequence
is reproducible given the same seed.
"""
if env_id == "cartpole":
env_fn = env_fn_cartpole
elif env_id == "monsterkong":
env_fn = env_fn_monsterkong
else:
assert False, f"just testing on cartpole and monsterkong for now, but got env {env_id}"
first_results: List[Tuple[int, int]] = []
n_runs = 5
n_episodes_per_run = 10
for run_number in range(n_runs):
print(f"starting run {run_number} / {n_runs}")
# For each 'run', we record the task sequence and how long each task lasted for.
# Then, we want to check that each run was indentical, for a given seed.
env = env_fn()
env.seed(123)
task_ids: List[int] = []
task_lengths: List[int] = []
for episode in range(n_episodes_per_run):
print(f"Episode {episode} / {n_episodes_per_run}")
obs = env.reset()
task_id: int = obs["task_labels"]
task_length = 0
done = False
while not done:
obs, _, done, _ = env.step(env.action_space.sample())
task_length += 1
task_ids.append(task_id)
task_lengths.append(task_length)
task_ids_and_lengths = list(zip(task_ids, task_lengths))
print(f"Task ids and length of each one: {task_ids_and_lengths}")
assert len(set(task_ids)) > 1, "should have been more than just one task!"
if not first_results:
first_results = task_ids_and_lengths
else:
# Make sure that the results from this run are equivalent to the others with
# the same seed:
assert task_ids_and_lengths == first_results
from sequoia.common.gym_wrappers import EnvDataset
from sequoia.utils.utils import unique_consecutive_with_index
def test_iteration():
nb_tasks = 5
steps_per_task = 10
task_schedule = task_schedule = {
i * steps_per_task: dict(length=0.1 + i * 0.2) for i in range(5)
}
env = gym.make("CartPole-v0")
env = MultiTaskEnvironment(env, task_schedule=task_schedule)
env = TimeLimit(env, max_episode_steps=14)
env = EnvDataset(env)
lengths = []
total_steps = 0
for episode in range(10):
for step, obs in enumerate(env):
# print(total_steps, episode, step, obs, env.length)
lengths.append(env.length)
rewards = env.send(env.action_space.sample())
total_steps += 1
if total_steps > 100:
break
actual_task_schedule = dict(unique_consecutive_with_index(lengths))
# NOTE: The keys won't necessarily be the same, since episodes might be shorter
# than `n_steps_per_task`.
length_schedule = {k: v["length"] for k, v in task_schedule.items()}
assert list(actual_task_schedule.values()) == list(length_schedule.values())
# assert False, actual_task_schedule
================================================
FILE: sequoia/common/gym_wrappers/observation_limit.py
================================================
""" IDEA: same as EpisodeLimit, for for the number of total observations.
"""
import gym
from gym.error import ClosedEnvironmentError
from sequoia.utils import get_logger
from .utils import IterableWrapper
logger = get_logger(__name__)
class ObservationLimit(IterableWrapper):
"""Closes the env when `max_steps` steps have been performed *in total*.
For vectorized environments, each step consumes up to `num_envs` from this
total budget, i.e. the step counter is incremented by the batch size at
each step.
"""
def __init__(self, env: gym.Env, max_steps: int):
super().__init__(env=env)
self._max_obs = max_steps
self._obs_counter: int = 0
self._initial_reset = False
self._is_closed: bool = False
def reset(self):
if self._is_closed:
if self._obs_counter >= self._max_obs:
raise ClosedEnvironmentError(
f"Env reached max number of observations ({self._max_obs})"
)
raise ClosedEnvironmentError("Can't step through closed env.")
# Resetting actually gives you an observation, so we count it here.
self._obs_counter += self.env.num_envs if self.is_vectorized else 1
logger.debug(f"(observation {self._obs_counter}/{self._max_obs})")
obs = self.env.reset()
if self._obs_counter >= self._max_obs:
self.close()
return obs
@property
def is_closed(self) -> bool:
return self._is_closed
def step(self, action):
if self._is_closed:
if self._obs_counter >= self._max_obs:
raise ClosedEnvironmentError(
f"Env reached max number of observations ({self._max_obs})"
)
raise ClosedEnvironmentError("Can't step through closed env.")
obs, reward, done, info = self.env.step(action)
self._obs_counter += self.env.num_envs if self.is_vectorized else 1
logger.debug(f"(observation {self._obs_counter}/{self._max_obs})")
# BUG: If we dont use >=, then iteration with EnvDataset doesn't work.
if self._obs_counter >= self._max_obs:
self.close()
return obs, reward, done, info
def close(self):
self.env.close()
self._is_closed = True
================================================
FILE: sequoia/common/gym_wrappers/observation_limit_test.py
================================================
from functools import partial
import gym
import pytest
from gym.vector import SyncVectorEnv
from sequoia.conftest import DummyEnvironment
from .env_dataset import EnvDataset
from .observation_limit import ObservationLimit
@pytest.mark.parametrize("env_name", ["CartPole-v0"])
def test_step_limit_with_single_env(env_name: str):
"""Env should close when a given number of observations have been produced"""
env = gym.make(env_name)
env = ObservationLimit(env, max_steps=5)
env.seed(123)
done = False
# First episode.
obs = env.reset()
obs, reward, done, info = env.step(env.action_space.sample())
obs, reward, done, info = env.step(env.action_space.sample())
obs = env.reset()
obs, reward, done, info = env.step(env.action_space.sample())
assert env.is_closed
with pytest.raises(gym.error.ClosedEnvironmentError):
env.reset()
with pytest.raises(gym.error.ClosedEnvironmentError):
env.step(env.action_space.sample())
@pytest.mark.xfail(
reason="TODO: Fix the bugs in the interaction between " "EnvDataset and ObservationLimit."
)
@pytest.mark.parametrize("env_name", ["CartPole-v0"])
def test_step_limit_with_single_env_dataset(env_name: str):
env = gym.make(env_name)
start = 0
target = 10
env = DummyEnvironment(start=start, target=target, max_value=10 * 2)
env = EnvDataset(env)
max_steps = 5
env = ObservationLimit(env, max_steps=max_steps)
env.seed(123)
values = []
for i, obs in zip(range(100), env):
values.append(obs)
_ = env.send(1)
assert values == list(range(start, max_steps))
assert env.is_closed
with pytest.raises(gym.error.ClosedEnvironmentError):
env.reset()
with pytest.raises(gym.error.ClosedEnvironmentError):
env.step(env.action_space.sample())
with pytest.raises(gym.error.ClosedEnvironmentError):
for i, _ in zip(range(5), env):
assert False
@pytest.mark.parametrize("batch_size", [3, 5])
def test_step_limit_with_vectorized_env(batch_size):
start = 0
target = 10
starting_values = [start for i in range(batch_size)]
targets = [target for i in range(batch_size)]
env = SyncVectorEnv(
[
partial(DummyEnvironment, start=start, target=target, max_value=target * 2)
for start, target in zip(starting_values, targets)
]
)
env = ObservationLimit(env, max_steps=3 * batch_size)
obs = env.reset()
obs, reward, done, info = env.step(env.action_space.sample())
# obs, reward, done, info = env.step(env.action_space.sample())
obs = env.reset()
assert env.is_closed
with pytest.raises(gym.error.ClosedEnvironmentError):
env.reset()
with pytest.raises(gym.error.ClosedEnvironmentError):
_ = env.step(env.action_space.sample())
@pytest.mark.parametrize("batch_size", [3, 5])
def test_step_limit_with_vectorized_env_partial_final_batch(batch_size):
"""In the case where the batch size isn't a multiple of the max
observations, the env returns ceil(max_obs / batch_size) * batch_size
observations in total.
TODO: If we ever get to few-shot learning or something like that, we might
have to care about this.
"""
start = 0
target = 10
starting_values = [start for i in range(batch_size)]
targets = [target for i in range(batch_size)]
env = SyncVectorEnv(
[
partial(DummyEnvironment, start=start, target=target, max_value=target * 2)
for start, target in zip(starting_values, targets)
]
)
env = ObservationLimit(env, max_steps=3 * batch_size + 1)
obs = env.reset()
assert not env.is_closed
obs, reward, done, info = env.step(env.action_space.sample())
obs, reward, done, info = env.step(env.action_space.sample())
assert not env.is_closed
# obs, reward, done, info = env.step(env.action_space.sample())
obs = env.reset()
assert env.is_closed
with pytest.raises(gym.error.ClosedEnvironmentError):
env.reset()
with pytest.raises(gym.error.ClosedEnvironmentError):
_ = env.step(env.action_space.sample())
================================================
FILE: sequoia/common/gym_wrappers/pixel_observation.py
================================================
""" Fixes some of the annoying things about the PixelObservationWrapper. """
from typing import Union
import gym
import numpy as np
from gym.wrappers.pixel_observation import PixelObservationWrapper as PixelObservationWrapper_
from sequoia.common.spaces.image import Image
from .utils import IterableWrapper
class PixelObservationWrapper(PixelObservationWrapper_):
"""Less annoying version of gym's `PixelObservationWrapper`:
- Resets the environment before calling the constructor (fixes crash).
- Makes the popup window non-visible when rendering with mode="rgb_array".
- State is always pixels instead of dict with pixels at key 'pixels'
- TODO: What if we wanted to also have access to the state? We might
have to revert this change at some point.
- `reset()` returns the pixels.
"""
def __init__(self, env: Union[str, gym.Env]):
if isinstance(env, str):
env = gym.make(env)
env.reset()
super().__init__(env)
pixel_space = self.observation_space["pixels"]
self.observation_space = Image.from_box(pixel_space)
from gym.envs.classic_control.rendering import Viewer
self.viewer: Viewer
if self.env.viewer is None:
self.env.render(mode="rgb_array")
if self.env.viewer is not None:
self.viewer: Viewer = env.viewer
self.viewer.window.set_visible(False)
def step(self, *args, **kwargs):
state, reward, done, info = super().step(*args, **kwargs)
state = state["pixels"]
state = self.to_array(state)
return state, reward, done, info
def reset(self, *args, **kwargs):
self.state = super().reset()["pixels"]
self.state = self.to_array(self.state)
return self.state
def render(self, mode: str = "human", **kwargs):
if mode == "human" and self.viewer and not self.viewer.window.visible:
self.viewer.window.set_visible(True)
return super().render(mode=mode, **kwargs)
def to_array(self, image) -> np.ndarray:
if not isinstance(image, np.ndarray):
# TODO: There is something weird happening here, something to do
# with the image having a negative stride dimension or something
# like that. Also, ideally, we would return a numpy array (without
# depending on pytorch here)
from sequoia.common.transforms.to_tensor import to_tensor
return to_tensor(image)
return np.array(image.copy())
return image
class ImageObservations(IterableWrapper):
def __init__(self, env: gym.Env):
super().__init__(env=env)
self.observation_space = Image.wrap(self.env.observation_space)
================================================
FILE: sequoia/common/gym_wrappers/pixel_observation_test.py
================================================
import gym
import numpy as np
import pytest
from .pixel_observation import PixelObservationWrapper
pyglet = pytest.importorskip("pyglet")
def test_passing_string_to_constructor():
env = PixelObservationWrapper("CartPole-v0")
assert env.observation_space.shape == (400, 600, 3)
def test_observation_space():
env = PixelObservationWrapper(gym.make("CartPole-v0"))
assert env.observation_space.shape == (400, 600, 3)
def test_reset_gives_pixels():
with PixelObservationWrapper(gym.make("CartPole-v0")) as env:
start_state = env.reset()
assert start_state.shape == (400, 600, 3)
assert start_state.dtype == np.uint8
def test_step_obs_is_pixels():
with PixelObservationWrapper(gym.make("CartPole-v0")) as env:
env.reset()
obs, _, _, _ = env.step(env.action_space.sample())
assert obs.shape == (400, 600, 3)
assert obs.dtype == np.uint8
def test_state_attribute_is_pixels():
with PixelObservationWrapper(gym.make("CartPole-v0")) as env:
env.reset()
assert env.state.shape == (400, 600, 3)
assert env.state.dtype == np.uint8
def test_render_rgb_array():
with PixelObservationWrapper(gym.make("CartPole-v0")) as env:
window = env.viewer.window
for i in range(50):
obs, _, done, _ = env.step(env.action_space.sample())
state = env.render(mode="rgb_array")
assert state.shape == (400, 600, 3)
assert state.dtype == np.uint8
if done:
env.reset()
def test_render_with_human_mode():
with PixelObservationWrapper(gym.make("CartPole-v0")) as env:
window = env.viewer.window
for i in range(50):
obs, _, done, _ = env.step(env.action_space.sample())
env.render(mode="human")
assert obs.shape == (400, 600, 3)
if done:
env.reset()
assert env.viewer.window is window
def test_render_with_human_mode_with_env_dataset():
from .env_dataset import EnvDataset
with PixelObservationWrapper(gym.make("CartPole-v0")) as env:
env = EnvDataset(env)
window = env.viewer.window
obs = env.reset()
for i, batch in zip(range(500), env):
obs = batch
env.render(mode="human")
assert obs.shape == (400, 600, 3)
action = env.action_space.sample()
rewards = env.send(action)
assert env.viewer.window is window
================================================
FILE: sequoia/common/gym_wrappers/policy_env.py
================================================
"""TODO: Idea: create a wrapper that accepts a 'policy' which will decide an
action to take whenever the `action` argument to the `step` method is None.
This policy should then accept the 'state' or something like that.
"""
from dataclasses import dataclass
from typing import Any, Callable, Dict, Generic, Iterable, Iterator, Optional, Tuple, TypeVar
import gym
from torch.utils.data import IterableDataset
from sequoia.common.batch import Batch
from sequoia.utils.logging_utils import get_logger
from .utils import StepResult
logger = get_logger(__name__)
# from sequoia.settings.base.environment import Environment
# from sequoia.settings.base.objects import (ActionType, ObservationType, RewardType)
ObservationType = TypeVar("ObservationType")
ActionType = TypeVar("ActionType")
RewardType = TypeVar("RewardType")
# Just for type hinting purposes.
class Environment(gym.Env, Generic[ObservationType, ActionType, RewardType]):
def step(self, action: ActionType) -> Tuple[ObservationType, RewardType, bool, Dict]:
raise NotImplementedError
def reset(self) -> ObservationType:
raise NotImplementedError
DatasetItem = TypeVar("DatasetItem")
# Type annotation for functions that will create the items of the
# IterableDataset below, given the current 'Context',
DatasetItemCreator = Callable[
[
ObservationType, # 'current' state
ActionType, # actions applied on the 'current' state
ObservationType, # resulting 'next' state
RewardType, # rewards associated with the transition above
bool, # Wether the 'next' state is final (i.e. the last in an episode)
Dict, # the 'info' dict associated with the 'next' state (from Env.step)
],
DatasetItem,
]
@dataclass(frozen=True)
class StateTransition(Batch, Generic[ObservationType, ActionType]):
observation: ObservationType
action: ActionType
next_observation: ObservationType
# IDEA: Instead of creating extra properties like this, we could have fields
# like 'field(aliases="bob")', and getattr and setattr would get/set the
# corresponding attribute when an alias is used instead of the actual name.
@property
def state(self) -> ObservationType:
return self.observation
@property
def next_state(self) -> ObservationType:
return self.next_observation
# By default, the PolicyEnv will yield this kind of item:
DefaultDatasetItem = Tuple[StateTransition, RewardType]
def default_dataset_item_creator(
observations: ObservationType,
actions: ActionType,
next_observations: ObservationType,
rewards: RewardType,
done: bool,
info: Dict = None,
) -> DefaultDatasetItem:
"""Create an item of the IterableDataset below, given the current 'context'.
Parameters
----------
observations : Observations
The 'starting' observations/state.
actions : Actions
The actions that were taken in the 'starting' state.
next_observations : Observations
The resulting observations in the 'end' state.
rewards : Rewards
The reward associated with that state transition and action.
done : bool
Wether the 'end' observations/state are the last of an episode.
info : Dict, optional
Info dict associated with the 'next' observation, by default None.
Returns
-------
Tuple[StateTransition, Rewards]
A Tuple of the form
`Tuple[Tuple[Observations, Actions, Observations], Rewards]`.
NOTE: `done` and `info` aren't used here, but you could use them in your own
version of this function that you'd then pass to the PolicyEnv constructor
or to the `set_policy` method.
"""
state_transition = StateTransition(observations, actions, next_observations)
return state_transition, rewards
class PolicyEnv(gym.Wrapper, IterableDataset, Iterable[DatasetItem]):
"""Wrapper for an environment that adds the following capabilities:
1. Makes it possible to call step(None), in which case the policy will be
used to determine the action to take given the current observation and
the action space.
2. Creates an 'IterableDataset' from the env, where one iteration over the
dataset is equivalent to one episode/trajectory in the environment.
The types of items yielded by this iterator can be customized by passing
a different callable to `make_dataset_item`.
The default items are of type `Tuple[StateTransition, Rewards]`, where
`StateTransition` is a tuple-like object of the form
`Tuple`.
"""
def __init__(
self,
env: Environment[ObservationType, ActionType, RewardType],
policy: Optional[Callable[[Tuple], Any]] = None,
make_dataset_item: DatasetItemCreator = default_dataset_item_creator,
):
super().__init__(env)
self.make_dataset_item = make_dataset_item
self.policy = policy
self._step_result: Optional[StepResult] = None
self._closed = False
self._reset = False
self._n_episodes: int = 0
self._n_steps: int = 0
self._n_steps_in_episode: int = 0
self._observation: Optional[Observations] = None
self._action: Optional[Actions] = None
def set_policy(self, policy: Callable[[ObservationType, gym.Space], ActionType]) -> None:
"""Sets a new policy to be used to generate missing actions."""
self.policy = policy
def step(self, action: Optional[Any] = None) -> StepResult:
if action is None:
if self.policy is None:
raise RuntimeError("Need to have a policy set, since action is None.")
if self._observation is None:
raise RuntimeError("Reset should have been called before calling step")
# Get the 'filler' action using the current policy.
action = self.policy(self._observation, self.action_space)
if action not in self.action_space:
raise RuntimeError(
f"The policy returned an action which isn't " f"in the action space: {action}"
)
step_result = StepResult(*self.env.step(action))
self._observation = step_result[0]
self._n_steps += 1
self._n_steps_in_episode += 1
return step_result
def close(self) -> None:
self.env.close()
self._reset = False
self._closed = True
self._observation = None
def reset(self, *args, **kwargs) -> None:
self._observation = self.env.reset(*args, **kwargs)
self._reset = True
self._n_steps_in_episode = 0
return self._observation
def __iter__(self) -> Iterator[DatasetItem]:
"""Iterator for an episode/trajectory in the env.
This uses the policy to iteratively perform an episode in the env, and
yields items at each step, which are the result of the
`make_dataset_item` function. By default, these items are of the form
`Tuple, rewards>`.
Returns
-------
Iterable[DatasetItem]
Iterable for a 'trajectory' in the env.
Yields
-------
DatasetItem
The result of `make_dataset_item(current_context)`, by default a
tuple of .
Raises
------
RuntimeError
If no policy is set.
"""
if not self.policy:
raise RuntimeError("Need to have a policy set in order to iterate " "on this env.")
if not self._reset:
# Reset the env, if needed.
previous_observations = self.reset()
else:
# The env was just reset, so the observation was set to
# self._observation.
assert self._observation is not None
previous_observations = self._observation
logger.debug(f"Start of episode {self._n_episodes}")
done = False
while not done:
logger.debug(f"steps (episode): {self._n_steps_in_episode}, total: {self._n_steps}")
# Get the batch of actions using the policy.
actions = self.policy(previous_observations, self.action_space)
observations, rewards, done, info = self.step(actions)
# TODO: Need to figure out what to yield here..
yield self.make_dataset_item(
observations=previous_observations,
actions=actions,
next_observations=observations,
rewards=rewards,
done=done,
info=info,
)
# Update the 'previous' observation.
previous_observations = observations
if not isinstance(done, bool):
if any(done):
raise RuntimeError(
"done should either be a bool or always false, since "
"we can't do partial resets."
)
done = False
self._n_episodes += 1
logger.debug(f"Episode has ended.")
self._reset = False
================================================
FILE: sequoia/common/gym_wrappers/policy_env_test.py
================================================
from typing import List
from sequoia.conftest import DummyEnvironment
from .policy_env import PolicyEnv, StateTransition
def test_iterating_with_policy():
env = DummyEnvironment()
env = PolicyEnv(env)
env.seed(123)
actions = [0, 1, 1, 2, 1, 1, 1, 1]
expected_obs = [0, 0, 1, 2, 1, 2, 3, 4, 5]
expected_rewards = [5, 4, 3, 4, 3, 2, 1, 0]
expected_dones = [False, False, False, False, False, False, False, True]
# Expect the transitions to have this form.
expected_transitions = list(zip(expected_obs[0:], actions[0:], expected_obs[1:]))
reset_obs = 0
# obs = env.reset()
# assert obs == reset_obs
n_calls = 0
def custom_policy(observations, action_space):
# Deteministic policy used for testing purposes.
nonlocal n_calls
action = actions[n_calls]
n_calls += 1
return action
n_expected_transitions = len(actions)
env.set_policy(custom_policy)
actual_transitions: List[StateTransition] = []
i = 0
for i, batch in enumerate(env):
print(f"Step {i}: batch: {batch}")
state_transition, reward = batch
actual_transitions.append(state_transition)
observation, action, next_observation = state_transition.as_tuple()
assert observation == expected_obs[i]
assert next_observation == expected_obs[i + 1]
assert action == actions[i]
assert reward == expected_rewards[i]
assert i == n_expected_transitions - 1
assert len(actual_transitions) == n_expected_transitions
assert [v.as_tuple() for v in actual_transitions] == expected_transitions
================================================
FILE: sequoia/common/gym_wrappers/smooth_environment.py
================================================
"""TODO: A Wrapper that creates smooth transitions between tasks.
Could be based on the MultiTaskEnvironment, but with a moving average update of
the task, rather than setting a brand new random task.
There could also be some kind of 'task_duration' parameter, and the model does
linear or smoothed-out transitions between them depending on the step number?
"""
from typing import Any, Callable, Dict, List, Optional, Union
import gym
import numpy as np
from gym import spaces
from sequoia.common.spaces.sparse import Sparse
from sequoia.utils.logging_utils import get_logger
from .multi_task_environment import MultiTaskEnvironment, add_task_labels
logger = get_logger(__name__)
## TODO (@lebrice): Really cool idea!: Create a TaskSchedule class that inherits
# from Dict and when you __getitem__ a missing key, returns an interpolation!
class SmoothTransitions(MultiTaskEnvironment):
"""Extends MultiTaskEnvironment to support smooth task boudaries.
Same as `MultiTaskEnvironment`, but when in between two tasks, the
environment will have its values set to a linear interpolation of the
attributes from the two neighbouring tasks.
```
env = gym.make("CartPole-v0")
env = SmoothTransitions(env, task_schedule={
10: dict(length=1.0),
20: dict(length=2.0),
})
env.seed(123)
env.reset()
```
At step 0, the length is the default value (0.5)
at step 1, the length is 0.5 + (1 / 10) * (1.0-0.5) = 0.55
at step 2, the length is 0.5 + (2 / 10) * (1.0-0.5) = 0.60,
etc.
NOTE: This only works with float attributes at the moment.
"""
def __init__(
self,
env: gym.Env,
task_schedule: Dict[int, Dict[str, float]] = None,
task_params: List[str] = None,
noise_std: float = 0.2,
add_task_dict_to_info: bool = False,
add_task_id_to_obs: bool = False,
new_random_task_on_reset: bool = False,
starting_step: int = 0,
nb_tasks: int = None,
max_steps: int = None,
seed: int = None,
only_update_on_episode_end: bool = False,
):
"""Wraps the environment, allowing for smooth task transitions.
Same as `MultiTaskEnvironment`, but when in between two tasks, the
environment will have its values set to a linear interpolation of the
attributes from the two neighbouring tasks.
TODO: Should we update the task paramers only on resets? or at each
step? Might save a little bit of compute to only do it on resets, but
then it's not exactly as 'smooth' as we would like it to be, especially
if a single episode can be very long!
NOTE: Assumes that the attributes are floats for now.
Args:
env (gym.Env): The gym environment to wrap.
task_schedule (Dict[int, Dict[str, float]], optional) (Same as
`MultiTaskEnvironment`): Dict mapping from a given step
to the attributes to be set at that time. Interpolations
between the two neighbouring tasks will be used between task
transitions.
only_update_on_episode_end (bool, optional): When `False` (default),
update the attributes of the environment smoothly after each
step. When `True`, only update at the end of episodes (when
`reset()` is called).
"""
if task_schedule:
if not all(isinstance(value, dict) for value in task_schedule.values()):
raise RuntimeError("Task schedule values should be dicts of attributes to change.")
task_params = list(
set().union(*[task_dict.keys() for task_dict in task_schedule.values()])
)
elif not task_params:
raise RuntimeError(
"This wrapper needs either a `task_schedule` or `task_params` (the environment "
"attributes to modify)"
)
super().__init__(
env,
task_schedule=task_schedule,
task_params=task_params,
noise_std=noise_std,
add_task_dict_to_info=add_task_dict_to_info,
add_task_id_to_obs=add_task_id_to_obs,
new_random_task_on_reset=new_random_task_on_reset,
starting_step=starting_step,
nb_tasks=nb_tasks,
max_steps=max_steps,
seed=seed,
)
self.only_update_on_episode_end = only_update_on_episode_end
if self._max_steps is None and len(self.task_schedule) > 1:
# TODO: DO we want to prevent going past the 'task step' in the task schedule?
pass
if isinstance(self.env.unwrapped, gym.vector.VectorEnv):
raise NotImplementedError(
"This isn't really supposed to be applied on top of a "
"vectorized environment, rather, it should be used within each"
" individual env."
)
if self.add_task_id_to_obs:
nb_tasks = nb_tasks if nb_tasks is not None else len(self.task_schedule)
self.observation_space = add_task_labels(
self.env.observation_space,
Sparse(spaces.Discrete(n=nb_tasks), sparsity=1.0),
)
def step(self, *args, **kwargs):
if not self.only_update_on_episode_end:
self.smooth_update()
results = super().step(*args, **kwargs)
return results
def reset(self, **kwargs):
# TODO: test this out.
if self.only_update_on_episode_end:
self.smooth_update()
return super().reset(**kwargs)
@property
def current_task_id(self) -> Optional[int]:
"""Returns the 'index' of the current task within the task schedule.
In this case, we return None, since there aren't clear task boundaries.
"""
return None
def task_array(self, task: Dict[str, float]) -> np.ndarray:
return np.array([task.get(k, self.default_task[k]) for k in self.task_params])
def smooth_update(self) -> None:
"""Update the curren_task at every step, based on a smooth mix of the
previous and the next task. Every time we reach a _step that is in the
task schedule, we update the 'prev_task_step' and 'next_task_step'
attributes.
"""
current_task: Dict[str, float] = {}
for attr in self.task_params:
steps: List[int] = []
# list of the
fixed_points: List[float] = []
for step, task in sorted(self.task_schedule.items()):
steps.append(step)
fixed_points.append(task.get(attr, self.default_task[attr]))
# logger.debug(f"{attr}: steps={steps}, fp={fixed_points}")
interpolated_value: float = np.interp(
x=self.steps,
xp=steps,
fp=fixed_points,
)
current_task[attr] = interpolated_value
# logger.debug(f"interpolated value of {attr} at step {self.step}: {interpolated_value}")
# logger.debug(f"Updating task at step {self.step}: {current_task}")
self.current_task = current_task
================================================
FILE: sequoia/common/gym_wrappers/smooth_environment_test.py
================================================
from typing import Dict
import gym
import matplotlib.pyplot as plt
import numpy as np
from .smooth_environment import SmoothTransitions
def test_task_schedule():
environment_name = "CartPole-v0"
# wandb.init(name="SSCL/RL_testing/smooth", monitor_gym=True)
original = gym.make(environment_name)
starting_length = original.length
starting_gravity = original.gravity
end_length = 5 * starting_length
end_gravity = 5 * starting_gravity
total_steps = 100
# Increase the length linearly up to 3 times the starting value.
# Increase the gravity linearly up to 5 times the starting value.
task_schedule: Dict[int, Dict[str, float]] = {
# 0: dict(length=starting_length, gravity=starting_gravity),
total_steps: dict(length=end_length, gravity=end_gravity),
}
env = SmoothTransitions(
original,
task_schedule=task_schedule,
)
# env = gym.wrappers.Monitor(env, f"recordings/smooth_{environment_name}", force=True)
env.seed(123)
env.reset()
assert env.gravity == starting_gravity
assert env.length == starting_length
# plt.ion()
params: Dict[int, Dict[str, float]] = {}
for step in range(total_steps):
expected_steps = starting_length + (step / total_steps) * (end_length - starting_length)
expected_gravity = starting_gravity + (step / total_steps) * (
end_gravity - starting_gravity
)
_, reward, done, _ = env.step(env.action_space.sample())
assert np.isclose(env.length, expected_steps)
assert np.isclose(env.gravity, expected_gravity)
# env.render()
# if done:
# env.reset()
params[step] = env.current_task.copy()
# print(f"New task: {env.current_task_dict()}")
# assert False, params[step]
env.close()
# plt.ioff()
plt.close()
def test_update_only_on_reset():
"""Test that when using the 'only_update_on_episode_end' argument with a
value of True, the smooth updates don't occur during the episodes, but only
once after an episode has ended (when `reset()` is called).
"""
total_steps = 100
original = gym.make("CartPole-v0")
start_length = original.length
end_length = 10.0
task_schedule = {total_steps: dict(length=end_length)}
env = SmoothTransitions(
original,
task_schedule=task_schedule,
only_update_on_episode_end=True,
)
env.reset()
env.seed(123)
expected_length = start_length
for i in range(total_steps):
assert env.steps == i
_, _, done, _ = env.step(env.action_space.sample())
assert env.steps == i + 1
if done:
_ = env.reset()
expected_length = start_length + ((i + 1) / total_steps) * (end_length - start_length)
assert np.isclose(env.length, expected_length)
def test_task_id_is_always_None():
total_steps = 100
original = gym.make("CartPole-v0")
start_length = original.length
end_length = 10.0
task_schedule = {total_steps: dict(length=end_length)}
env = SmoothTransitions(
original,
task_schedule=task_schedule,
only_update_on_episode_end=True,
add_task_id_to_obs=True,
add_task_dict_to_info=True,
)
for observation in (env.observation_space.sample() for i in range(100)):
x, task_id = observation["x"], observation["task_labels"]
assert task_id is None
env.reset()
env.seed(123)
expected_length = start_length
for i in range(total_steps):
assert env.steps == i
obs, _, done, _ = env.step(env.action_space.sample())
x, task_id = obs["x"], obs["task_labels"]
assert task_id is None
assert env.steps == i + 1
if done:
obs = env.reset()
x, task_id = obs["x"], obs["task_labels"]
assert task_id is None
expected_length = start_length + ((i + 1) / total_steps) * (end_length - start_length)
assert np.isclose(env.length, expected_length)
================================================
FILE: sequoia/common/gym_wrappers/step_callback_wrapper.py
================================================
"""TODO: Make a wrapper that calls a given function/callback when a given step is reached.
"""
from abc import ABC, abstractmethod
from typing import Callable, List, Tuple, Union
import gym
from .utils import IterableWrapper
class Callback(Callable[[int, gym.Env], None], ABC):
@abstractmethod
def __call__(self, step: int, env: gym.Env, step_results: Tuple) -> None:
raise NotImplementedError()
class StepCallback(Callback, ABC):
def __init__(self, step: int, func: Callable[[int, gym.Env, Tuple], None] = None):
self.step = step
self.func = func
def __call__(self, step: int, env: gym.Env, step_results: Tuple) -> None:
if self.func:
return self.func(step, env, step_results)
raise NotImplementedError("Create your own callback or pass a func to use.")
class PeriodicCallback(Callback):
def __init__(self, period: int, offset: int = 0, func: Callable[[int, gym.Env], None] = None):
self.period = period
self.offset = offset
self.func = func
def __call__(self, step: int, env: gym.Env, step_results: Tuple) -> None:
if self.func:
return self.func(step, env, step_results)
raise NotImplementedError("Create your own callback or pass a func to use.")
class StepCallbackWrapper(IterableWrapper):
"""Wrapper that will execute some callbacks when certain steps are reached."""
def __init__(
self,
env: gym.Env,
callbacks: List[Callback] = None,
):
super().__init__(env)
self._steps = 0
self.callbacks = callbacks or []
def add_callback(self, callback: Union[Callback]) -> None:
self.callbacks.append(callback)
def add_step_callback(self, step: int, callback: Callable[[int, gym.Env], None]):
if isinstance(callback, StepCallback):
assert step == callback.step
else:
callback = StepCallback(step=step, func=callback)
self.add_callback(callback)
def add_periodic_callback(self, period: int, callback: StepCallback, offset: int = 0):
if isinstance(callback, PeriodicCallback):
assert period == callback.period
assert offset == callback.offset
else:
callback = PeriodicCallback(period=period, offset=offset, func=callback)
self.add_callback(callback)
def step(self, action):
step_results = super().step(action)
for callback in self.callbacks:
if isinstance(callback, StepCallback):
if callback.step == self._steps:
callback(self._steps, self, step_results)
elif isinstance(callback, PeriodicCallback):
if (
self._steps >= callback.offset
and (self._steps - callback.offset) % callback.period == 0
):
callback(self._steps, self, step_results)
else:
# if it's a callable, just call it all the time, assuming that
# it will use some condition in it's __call__ to check wether
# it should be executed or not.
callback(self._steps, self, step_results)
self._steps += 1
return step_results
================================================
FILE: sequoia/common/gym_wrappers/step_callback_wrapper_test.py
================================================
from typing import Tuple
import gym
from .step_callback_wrapper import PeriodicCallback, StepCallback, StepCallbackWrapper
i: int = 0
def increment_i(step: int, env: gym.Env, step_results: Tuple):
global i
print(f"Incrementing i at step {step}: ({i} -> {i+1})")
i += 1
def decrement_i(step: int, env: gym.Env, step_results: Tuple):
global i
print(f"Decrementing i at step {step}: ({i} -> {i-1})")
i -= 1
def test_step_callback():
callback = StepCallback(step=7, func=increment_i)
env = StepCallbackWrapper(gym.make("CartPole-v0"), callbacks=[callback])
env.reset()
global i
i = 0
for step in range(10):
obs, reward, done, info = env.step(env.action_space.sample())
if step < 7:
assert i == 0
else:
assert i == 1
if done:
env.reset()
env.close()
def test_periodic_callback():
global i
i = 0
inc_callback = PeriodicCallback(period=5, func=increment_i)
dec_callback = PeriodicCallback(period=5, func=decrement_i, offset=2)
env = StepCallbackWrapper(gym.make("CartPole-v0"), callbacks=[inc_callback, dec_callback])
env.reset()
def _next(env) -> int:
obs, reward, done, info = env.step(env.action_space.sample())
if done:
env.reset()
return i
assert _next(env) == 1
assert _next(env) == 1
assert _next(env) == 0
assert _next(env) == 0
assert _next(env) == 0
assert _next(env) == 1
assert _next(env) == 1
assert _next(env) == 0
assert _next(env) == 0
assert _next(env) == 0
env.close()
================================================
FILE: sequoia/common/gym_wrappers/transform_wrappers.py
================================================
from typing import Callable, Union
import typing
import gym
from gym import Space, spaces
from gym.wrappers import TransformObservation as TransformObservation_
from gym.wrappers import TransformReward as TransformReward_
from sequoia.common.gym_wrappers.convert_tensors import add_tensor_support, has_tensor_support
from sequoia.common.transforms.compose import Compose
from sequoia.common.transforms.transform import Transform
# if typing.TYPE_CHECKING:
# from sequoia.common.transforms.transform import Transform
from sequoia.utils.logging_utils import get_logger
from .utils import IterableWrapper
logger = get_logger(__name__)
class TransformObservation(TransformObservation_, IterableWrapper):
def __init__(self, env: gym.Env, f: Union[Callable, Compose]):
if isinstance(f, list) and not callable(f):
f = Compose(f)
super().__init__(env, f=f)
self.f: "Transform"
# try:
self.observation_space = self(self.env.observation_space)
if has_tensor_support(self.env.observation_space):
self.observation_space = add_tensor_support(self.observation_space)
# except Exception as e:
# logger.warning(UserWarning(
# f"Don't know how the transform {self.f} will impact the "
# f"observation space! (Exception: {e})"
# ))
def __call__(self, *args, **kwargs):
return self.f(*args, **kwargs)
def __iter__(self):
if self.wrapping_passive_env:
# TODO: For now, we assume that the passive environment has already
# split stuff correctly for us to use.
for obs, rewards in self.env:
yield self(obs), rewards
else:
return super().__iter__()
class TransformReward(TransformReward_, IterableWrapper):
def __init__(self, env: gym.Env, f: Union[Callable, Compose]):
if isinstance(f, list) and not callable(f):
f = Compose(f)
super().__init__(env, f=f)
self.f: Compose
# Modify the reward space, if it exists.
if hasattr(self.env, "reward_space"):
self.reward_space = self.env.reward_space
else:
self.reward_space = spaces.Box(
low=self.env.reward_range[0],
high=self.env.reward_range[1],
shape=(),
)
try:
self.reward_space = self.f(self.reward_space)
logger.debug(f"New reward space after transform: {self.reward_space}")
except Exception as e:
logger.warning(
UserWarning(
f"Don't know how the transform {self.f} will impact the "
f"reward space! (Exception: {e})"
)
)
class TransformAction(IterableWrapper):
def __init__(self, env: gym.Env, f: Callable[[Union[gym.Env, Space]], Union[gym.Env, Space]]):
if isinstance(f, list) and not callable(f):
f = Compose(f)
super().__init__(env)
self.f: Compose = f
# Modify the action space by applying the transform onto it.
self.action_space = self.env.action_space
if isinstance(self.f, Transform):
self.action_space = self.f(self.env.action_space)
# logger.debug(f"New action space after transform: {self.observation_space}")
def step(self, action):
return self.env.step(self.action(action))
def action(self, action):
return self.f(action)
================================================
FILE: sequoia/common/gym_wrappers/transform_wrappers_test.py
================================================
import gym
import numpy as np
from sequoia.common.spaces import Image
from sequoia.common.transforms import Compose, Transforms
from sequoia.conftest import monsterkong_required
from .transform_wrappers import TransformObservation
@monsterkong_required
def test_compose_on_image_space():
in_space = Image(0, 255, shape=(64, 64, 3), dtype=np.uint8)
transform = Compose([Transforms.to_tensor, Transforms.three_channels])
expected = Image(0, 1.0, shape=(3, 64, 64), dtype=np.float32)
actual = transform(in_space)
assert actual == expected
env = gym.make("MetaMonsterKong-v0")
assert env.observation_space == gym.spaces.Box(0, 255, (64, 64, 3), np.uint8)
assert env.observation_space == in_space
wrapped_env = TransformObservation(env, transform)
assert wrapped_env.observation_space == expected
import pytest
import torch
from torchvision.datasets import MNIST
from sequoia.common.transforms import Compose
@pytest.mark.skipif(not torch.cuda.is_available(), reason="Need cuda for this test.")
def test_move_wrapper_and_iteration():
batch_size = 1
transforms = Compose([Transforms.to_tensor])
dataset = MNIST("data", transform=transforms)
obs_space = Image(0, 255, (1, 28, 28), np.uint8)
obs_space = transforms(obs_space)
from sequoia.settings.sl.environment import PassiveEnvironment
env = PassiveEnvironment(
dataset,
batch_size=batch_size,
n_classes=10,
observation_space=obs_space,
)
from functools import partial
from sequoia.utils.generic_functions import move
from .transform_wrappers import TransformReward
env = TransformObservation(env, partial(move, device="cuda"))
env = TransformReward(env, partial(move, device="cuda"))
obs, rewards_next = next(iter(env))
rewards_send = env.send(env.action_space.sample())
assert obs.device.type == "cuda"
assert rewards_next.device.type == "cuda"
assert rewards_send.device.type == "cuda"
================================================
FILE: sequoia/common/gym_wrappers/utils.py
================================================
import inspect
from abc import ABC
from functools import partial
from typing import (
Any,
Callable,
Dict,
Generic,
Iterator,
NamedTuple,
Optional,
Sequence,
Tuple,
Type,
TypeVar,
Union,
)
import warnings
import gym
import numpy as np
from gym.envs import registry
from gym.envs.classic_control import (
AcrobotEnv,
CartPoleEnv,
Continuous_MountainCarEnv,
MountainCarEnv,
PendulumEnv,
)
from gym.envs.registration import load
from gym.vector import VectorEnv
from torch.utils.data import IterableDataset
from sequoia.utils.logging_utils import get_logger
classic_control_envs = (
AcrobotEnv,
CartPoleEnv,
PendulumEnv,
MountainCarEnv,
Continuous_MountainCarEnv,
)
classic_control_env_prefixes: Tuple[str, ...] = (
"CartPole",
"Pendulum",
"Acrobot",
"MountainCar",
"MountainCarContinuous",
)
logger = get_logger(__name__)
def is_classic_control_env(env: Union[str, gym.Env, Type[gym.Env]]) -> bool:
"""Returns `True` if the given env id, env class, or env instance is a
classic-control env.
Parameters
----------
env : Union[str, gym.Env]
Env id, or env class, or env instance.
Returns
-------
bool
Wether the given env is a classic-control env from Gym.
Examples:
>>> import gym
>>> is_classic_control_env("CartPole-v0")
True
>>> is_classic_control_env("Breakout-v1")
False
>>> is_classic_control_env("bob")
False
>>> from gym.envs.classic_control import CartPoleEnv
>>> is_classic_control_env(CartPoleEnv)
True
"""
if isinstance(env, partial):
if env.func is gym.make and isinstance(env.args[0], str):
logger.warning(
RuntimeWarning(
"Don't pass partial(gym.make, 'some_env'), just use the env string instead."
)
)
env = env.args[0]
if isinstance(env, str):
try:
spec = registry.spec(env)
if isinstance(spec.entry_point, str):
return "gym.envs.classic_control" in spec.entry_point
if inspect.isclass(spec.entry_point):
env = spec.entry_point
except gym.error.Error as e:
# malformed env id, for instance.
logger.debug(f"can't tell if env id {env} is a classic-control env! ({e})")
return False
if inspect.isclass(env):
return issubclass(env, classic_control_envs)
if isinstance(env, gym.Env):
return isinstance(env.unwrapped, classic_control_envs)
return False
def is_proxy_to(env, env_type_or_types: Union[Type[gym.Env], Tuple[Type[gym.Env], ...]]) -> bool:
"""Returns wether `env` is a proxy to an env of the given type or types."""
from sequoia.client.env_proxy import EnvironmentProxy
return isinstance(env.unwrapped, EnvironmentProxy) and issubclass(
env.unwrapped._environment_type, env_type_or_types
)
def is_atari_env(env: Union[str, gym.Env]) -> bool:
"""Returns `True` if the given env id, env class, or env instance is a
Atari environment.
Parameters
----------
env : Union[str, gym.Env]
Env id, or env class, or env instance.
Returns
-------
bool
Wether the given env is an Atari env from Gym.
Examples:
>>> import gym
>>> is_atari_env("CartPole-v0")
False
>>> is_atari_env("bob")
False
>>> # is_atari_env("ALE/Breakout-v5")
# True
>>> # is_atari_env("Breakout-v0")
# True
NOTE: Removing this doctest, since recent changes to gym have changed this a bit.
>>> #from gym.envs import atari
>>> #is_atari_env(atari.AtariEnv) # requires atari_py to be installed
# True
"""
from sequoia.settings.rl.envs import ATARI_PY_INSTALLED
if not isinstance(env, (str, gym.Env)):
raise RuntimeError(f"`env` needs to be either a str or gym env, not {env}")
if isinstance(env, str):
try:
spec = registry.spec(env)
except gym.error.NameNotFound:
return False
except gym.error.NamespaceNotFound:
return False
if spec.namespace is None:
return False
return spec.namespace is "ALE"
if not ATARI_PY_INSTALLED:
return False
raise NotImplementedError(f"TODO: Check if isinstance(env.unwrapped, AtariEnv)")
if isinstance(env, partial):
if env.func is gym.make and isinstance(env.args[0], str):
logger.warning(
RuntimeWarning(
"Don't pass partial(gym.make, 'some_env'), just use the env string instead."
)
)
env = env.args[0]
# assert False, [env_spec for env_spec in registry.all()]
if isinstance(env, str): # and env.startswith("Breakout"):
try:
spec = registry.spec(env)
if isinstance(spec.entry_point, str):
return "gym.envs.atari" in spec.entry_point or "ale_py" in spec.entry_point
if inspect.isclass(spec.entry_point):
env = spec.entry_point
except gym.error.Error as e:
# malformed env id, for instance.
logger.debug(f"can't tell if env id {env} is an atari env! ({e})")
return False
try:
from gym.envs import atari
AtariEnv = atari.AtariEnv
if inspect.isclass(env) and issubclass(env, AtariEnv):
return True
return isinstance(env, gym.Env) and isinstance(env.unwrapped, AtariEnv)
except (ImportError, gym.error.DependencyNotInstalled):
return False
return False
def get_env_class(env: Union[str, gym.Env, Type[gym.Env], Callable[[], gym.Env]]) -> Type[gym.Env]:
if isinstance(env, partial):
if env.func is gym.make and isinstance(env.args[0], str):
return get_env_class(env.args[0])
return get_env_class(env.func)
if isinstance(env, str):
return load(env)
if isinstance(env, gym.Wrapper):
return type(env.unwrapped)
if isinstance(env, gym.Env):
return type(env)
if inspect.isclass(env) and issubclass(env, gym.Env):
return env
raise NotImplementedError(f"Don't know how to get the class of env being used by {env}!")
def is_monsterkong_env(env: Union[str, gym.Env, Callable[[], gym.Env]]) -> bool:
if isinstance(env, str):
return env.lower().startswith(("metamonsterkong", "monsterkong"))
try:
from meta_monsterkong.make_env import MetaMonsterKongEnv
if inspect.isclass(env):
return issubclass(env, MetaMonsterKongEnv)
if isinstance(env, gym.Env):
return isinstance(env, MetaMonsterKongEnv)
return False
except ImportError:
return False
logger = get_logger(__name__)
EnvType = TypeVar("EnvType", bound=gym.Env)
ObservationType = TypeVar("ObservationType")
ActionType = TypeVar("ActionType")
RewardType = TypeVar("RewardType")
class StepResult(NamedTuple):
observation: ObservationType
reward: RewardType
done: Union[bool, Sequence[bool]]
info: Union[Dict, Sequence[Dict]]
def has_wrapper(
env: gym.Wrapper,
wrapper_type_or_types: Union[Type[gym.Wrapper], Tuple[Type[gym.Wrapper], ...]],
) -> bool:
"""Returns wether the given `env` has a wrapper of type `wrapper_type`.
Args:
env (gym.Wrapper): a gym.Wrapper or a gym environment.
wrapper_type (Type[gym.Wrapper]): A type of Wrapper to check for.
Returns:
bool: Wether there is a wrapper of that type wrapping `env`.
"""
# avoid cycles, although that would be very weird to encounter.
while hasattr(env, "env") and env.env is not env:
if isinstance(env, wrapper_type_or_types):
return True
env = env.env
return isinstance(env, wrapper_type_or_types)
class MayCloseEarly(gym.Wrapper, ABC):
"""ABC for Wrappers that may close an environment early depending on some
conditions.
WIP: Also prevents calling `step` and `reset` on a closed env.
"""
def __init__(self, env: gym.Env):
super().__init__(env)
self._is_closed: bool = False
def is_closed(self) -> bool:
# First, make sure that we're not 'overriding' the 'is_closed' of the
# wrapped environment.
if hasattr(self.env, "is_closed"):
assert callable(self.env.is_closed)
self._is_closed = self.env.is_closed()
return self._is_closed
def closed_error_message(self) -> str:
"""Return the error message to use when attempting to use the closed env.
This can be useful for wrappers that close when a given condition is reached,
e.g. a number of episodes has been performed, which could return a more relevant
message here.
"""
return "Env is closed"
def reset(self, **kwargs):
if self.is_closed():
raise gym.error.ClosedEnvironmentError(
f"Can't call `reset()`: {self.closed_error_message()}"
)
return super().reset(**kwargs)
def step(self, action):
if self.is_closed():
raise gym.error.ClosedEnvironmentError(
f"Can't call `step()`: {self.closed_error_message()}"
)
return super().step(action)
def close(self) -> None:
if self.is_closed():
# TODO: Prevent closing an environment twice?
return
# raise gym.error.ClosedEnvironmentError(self.closed_error_message())
self.env.close()
self._is_closed = True
from .env_dataset import EnvDataset
class IterableWrapper(MayCloseEarly, IterableDataset, Generic[EnvType], ABC):
"""ABC for a gym Wrapper that supports iterating over the environment.
This allows us to wrap dataloader-based Environments and still use the gym
wrapper conventions, as well as iterate over a gym environment as in the
Active-dataloader case.
NOTE: We have IterableDataset as a base class here so that we can pass a wrapped env
to the DataLoader function. This wrapper however doesn't perform the actual
iteration, and instead depends on the wrapped environment already supporting
iteration.
"""
def __init__(self, env: gym.Env):
super().__init__(env)
from sequoia.settings.sl import PassiveEnvironment
self.wrapping_passive_env = isinstance(self.unwrapped, PassiveEnvironment)
@property
def is_vectorized(self) -> bool:
"""Returns wether this wrapper is wrapping a vectorized environment."""
return isinstance(self.unwrapped, VectorEnv)
def __next__(self):
# TODO: This is tricky. We want the wrapped env to use *our* step,
# reset(), action(), observation(), reward() methods, instead of its own!
# Otherwise if we are transforming observations for example, those won't
# be affected.
# logger.debug(f"Wrapped env {self.env} isnt a PolicyEnv or an EnvDataset")
# return type(self.env).__next__(self)
from sequoia.settings.rl.environment import ActiveDataLoader
# from sequoia.settings.sl.environment import PassiveEnvironment
if has_wrapper(self.env, EnvDataset) or is_proxy_to(
self.env, (EnvDataset, ActiveDataLoader)
):
obs, reward, done, info = self.step(self.unwrapped.action_)
return obs
# raise RuntimeError(f"WIP: Dropping this '__next__' API in RL.")
# logger.debug(f"Wrapped env is an EnvDataset, using EnvDataset.__iter__.")
# return EnvDataset.__next__(self)
# return EnvDataset.__next__(self)
return self.env.__next__()
# return self.observation(obs)
def observation(self, observation):
# logger.debug(f"Observation won't be transformed.")
return observation
def action(self, action):
return action
def reward(self, reward):
return reward
# def __len__(self):
# return self.env.__len__()
def get_length(self) -> Optional[int]:
"""Attempts to return the "length" (in number of steps/batches) of this env.
When not possible, returns None.
NOTE: This is a bit ugly, but the idea seems alright.
"""
try:
# Try to call self.__len__() without recursing into the wrapped env:
return len(self)
except TypeError:
pass
try:
# Try to call self.env.__len__() without recursing into the wrapped^2 env:
return len(self.env)
except TypeError:
pass
try:
# Try to call self.env.__len__(), allowing recursing down the chain:
return self.env.__len__()
except TypeError:
pass
try:
# If all else fails, delegate to the wrapped env's length() method, if any:
return self.env.get_length()
except AttributeError:
pass
# In the worst case, return None, meaning that we don't have a length.
return None
def send(self, action):
# TODO: Make `send` use `self.step`, that way wrappers can apply the same way to
# RL and SL environments.
if self.wrapping_passive_env:
action = self.action(action)
reward = self.env.send(action)
reward = self.reward(reward)
return reward
self.unwrapped.action_ = action
(
self.unwrapped.observation_,
self.unwrapped.reward_,
self.unwrapped.done_,
self.unwrapped.info_,
) = self.step(action)
return self.unwrapped.reward_
# (Option 1 below)
# return self.env.send(action)
# (Option 2 below)
# return self.env.send(self.action(action))
# (Option 3 below)
# return type(self.env).send(self, action)
# (Following option 4 below)
# if has_wrapper(self.env, EnvDataset):
# # logger.debug(f"Wrapped env is an EnvDataset, using EnvDataset.send.")
# return EnvDataset.send(self, action)
# if hasattr(self.env, "send"):
# action = self.action(action)
# reward = self.env.send(action)
# reward = self.reward(reward)
# return reward
def __iter__(self) -> Iterator:
# TODO: Pretty sure this could be greatly simplified by just always using the loop from EnvDataset.
if self.wrapping_passive_env:
# NOTE: Also applies the `self.observation` `self.reward` methods while
# iterating.
for obs, rewards in self.env:
obs = self.observation(obs)
if rewards is not None:
rewards = self.reward(rewards)
yield obs, rewards
else:
self.unwrapped.observation_ = self.reset()
self.unwrapped.done_ = False
self.unwrapped.action_ = None
self.unwrapped.reward_ = None
# Yield the first observation_.
yield self.unwrapped.observation_
if self.unwrapped.action_ is None:
raise RuntimeError(
f"You have to send an action using send() between every "
f"observation. (env = {self})"
)
def done_is_true(done: Union[bool, np.ndarray, Sequence[bool]]) -> bool:
return done if isinstance(done, bool) or not done.shape else all(done)
while not any([done_is_true(self.unwrapped.done_), self.is_closed()]):
# logger.debug(f"step {self.n_steps_}/{self.max_steps}, (episode {self.n_episodes_})")
# Set those to None to force the user to call .send()
self.unwrapped.action_ = None
self.unwrapped.reward_ = None
yield self.unwrapped.observation_
if self.unwrapped.action_ is None:
raise RuntimeError(
f"You have to send an action using send() between every "
f"observation. (env = {self})"
)
# assert False, "WIP"
# Option 1: Return the iterator from the wrapped env. This ignores
# everything in the wrapper.
# return self.env.__iter__()
# Option 2: apply the transformations on the items yielded by the
# iterator of the wrapped env (this doesn't use the self.observaion(), self.action())
# from .transform_wrappers import TransformObservation, TransformAction, TransformReward
# return map(self.observation, self.env.__iter__())
# Option 3: Calling the method on the wrapped env, but with `self` being
# the wrapper, rather than the wrapped env:
# return type(self.env).__iter__(self)
# Option 4: Slight variation on option 3: We cut straight to the
# EnvDataset iterator.
# from sequoia.settings.rl.environment import ActiveDataLoader
# from sequoia.settings.sl.environment import PassiveEnvironment
# if has_wrapper(self.env, EnvDataset) or is_proxy_to(
# self.env, (EnvDataset, ActiveDataLoader)
# ):
# # logger.debug(f"Wrapped env is an EnvDataset, using EnvDataset.__iter__ with the wrapper as `self`.")
# return EnvDataset.__iter__(self)
# # TODO: Should probably remove this since we don't actually use this 'PolicyEnv'.
# if has_wrapper(self.env, PolicyEnv) or is_proxy_to(self.env, PolicyEnv):
# # logger.debug(f"Wrapped env is a PolicyEnv, will use PolicyEnv.__iter__ with the wrapper as `self`.")
# return PolicyEnv.__iter__(self)
# # NOTE: This works even though IterableDataset isn't a gym.Wrapper.
# if not has_wrapper(self.env, IterableDataset) and not isinstance(
# self.env, DataLoader
# ):
# logger.warning(
# UserWarning(
# f"Will try to iterate on a wrapper for env {self.env} which "
# f"doesn't have the EnvDataset or PolicyEnv wrappers and isn't "
# f"an IterableDataset."
# )
# )
# # if isinstance(self.env, DataLoader):
# # return self.env.__iter__()
# # raise NotImplementedError(f"Wrapper {self} doesn't know how to iterate on {self.env}.")
# return self.env.__iter__()
# @property
# def wrapping_passive_env(self) -> bool:
# """ Returns wether this wrapper is applied over a 'passive' env, in which case
# iterating over the env will yield (up to) 2 items, rather than just 1.
# """
# from sequoia.settings.sl.environment import PassiveEnvironment
# return isinstance(self.unwrapped, PassiveEnvironment) or is_proxy_to(
# self, PassiveEnvironment
# )
# def __setattr__(self, attr, value):
# """
# TODO: Remove/replace this:
# Redirect the __setattr__ of attributes 'owned' by the EnvDataset to
# the EnvDataset.
# We need to do this because we change the value of `self` and call
# EnvDataset.__iter__(self), which might get and set attributes to/from
# `self`, which is what you'd expect normally. However when `self` is a
# wrapper over the env, rather than the env itself, then when attributes
# are set on `self` inside __iter__ or __next__ or send, etc, they are
# actually set on the wrapper, rather than on the env.
# We solve this by detecting when an attribute with a name ending with "_"
# and part of a given list of attributes is set.
# """
# if attr.endswith("_") and has_wrapper(self.env, EnvDataset):
# if attr in {
# "observation_",
# "action_",
# "reward_",
# "done_",
# "info_",
# "n_sends_",
# }:
# # logger.debug(f"Attribute {attr} will be set on the wrapped env rather than on the wrapper itself.")
# env = self.env
# while not isinstance(env, EnvDataset) and env.env is not env:
# env = env.env
# assert isinstance(env, EnvDataset)
# setattr(env, attr, value)
# else:
# object.__setattr__(self, attr, value)
class RenderEnvWrapper(IterableWrapper):
"""Simple Wrapper that renders the env at each step."""
def __init__(self, env: gym.Env, display: Any = None):
super().__init__(env)
# TODO: Maybe use the given display?
def step(self, action):
self.env.render("human")
return self.env.step(action)
def tile_images(img_nhwc):
"""
TAKEN FROM https://github.com/openai/gym/pull/1624/files
Tile N images into one big PxQ image
(P,Q) are chosen to be as close as possible, and if N
is square, then P=Q.
input: img_nhwc, list or array of images, ndim=4 once turned into array
n = batch index, h = height, w = width, c = channel
returns:
bigim_HWc, ndarray with ndim=3
"""
img_nhwc = np.asarray(img_nhwc)
N, h, w, c = img_nhwc.shape
if c not in {1, 3}:
img_nhwc = img_nhwc.transpose([0, 2, 3, 1])
N, h, w, c = img_nhwc.shape
assert c in {1, 3}
H = int(np.ceil(np.sqrt(N)))
W = int(np.ceil(float(N) / H))
img_nhwc = np.array(list(img_nhwc) + [img_nhwc[0] * 0 for _ in range(N, H * W)])
img_HWhwc = img_nhwc.reshape(H, W, h, w, c)
img_HhWwc = img_HWhwc.transpose(0, 2, 1, 3, 4)
img_Hh_Ww_c = img_HhWwc.reshape(H * h, W * w, c)
return img_Hh_Ww_c
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: sequoia/common/gym_wrappers/utils_test.py
================================================
import gym
import pytest
from gym.wrappers import ClipAction
from gym.wrappers.pixel_observation import PixelObservationWrapper
from sequoia.conftest import param_requires_pyglet
from .pixel_observation import PixelObservationWrapper
from .utils import has_wrapper
@pytest.mark.parametrize(
"env,wrapper_type,result",
[
param_requires_pyglet(
lambda: PixelObservationWrapper(gym.make("CartPole-v0")), ClipAction, False
),
param_requires_pyglet(
lambda: PixelObservationWrapper(gym.make("CartPole-v0")), PixelObservationWrapper, True
),
param_requires_pyglet(
lambda: PixelObservationWrapper(gym.make("CartPole-v0")), PixelObservationWrapper, True
),
# param_requires_atari_py(AtariPreprocessing(gym.make("ALE/Breakout-v5")), ClipAction, True),
],
)
def test_has_wrapper(env, wrapper_type, result):
assert has_wrapper(env(), wrapper_type) == result
================================================
FILE: sequoia/common/hparams/__init__.py
================================================
""" Utilities for creating hyper-parameter dataclasses and their fields. """
from simple_parsing.helpers.hparams import categorical, log_uniform, loguniform, uniform
from simple_parsing.helpers.hparams.hyperparameters import HyperParameters, Point
================================================
FILE: sequoia/common/layers.py
================================================
import math
from typing import Callable, List, Optional, Tuple, Union
import numpy as np
import torch
from gym import spaces
from torch import Tensor, nn
from sequoia.common.spaces.image import Image
from sequoia.utils.generic_functions import singledispatchmethod
from sequoia.utils.logging_utils import get_logger
logger = get_logger(__name__)
class Lambda(nn.Module):
def __init__(self, func: Callable):
super().__init__()
self.func = func
def forward(self, x):
return self.func(x)
class Reshape(nn.Module):
def __init__(self, target_shape: Union[List[int], Tuple[int, ...]]):
self.target_shape = target_shape
super().__init__()
def forward(self, inputs):
return inputs.reshape([inputs.shape[0], *self.target_shape])
class ConvBlock(nn.Module):
def __init__(
self, in_channels: int, out_channels: int, kernel_size: int = 3, padding: int = 1, **kwargs
):
super().__init__()
self.in_channels = in_channels
self.out_channels = out_channels
self.kernel_size = kernel_size
self.conv = nn.Conv2d(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=kernel_size,
padding=padding,
**kwargs,
)
self.norm = nn.BatchNorm2d(out_channels)
self.relu = nn.ReLU()
self.pool = nn.MaxPool2d(2)
def forward(self, x):
x = self.conv(x)
x = self.norm(x)
x = self.relu(x)
return self.pool(x)
class DeConvBlock(nn.Module):
"""Block that performs:
Upsample (2x)
Conv
BatchNorm2D
Relu
Conv
BatchNorm2D
Relu (optional)
"""
def __init__(
self,
in_channels: int,
out_channels: int,
hidden_channels: Optional[int] = None,
kernel_size: int = 3,
padding: int = 1,
last_relu: bool = True,
**kwargs,
):
self.in_channels = in_channels
self.out_channels = out_channels
self.hidden_channels = hidden_channels or out_channels
self.kernel_size = kernel_size
self.last_relu = last_relu
super().__init__()
self.upsample = nn.Upsample(scale_factor=2)
self.conv1 = nn.Conv2d(
in_channels=in_channels,
out_channels=self.hidden_channels,
kernel_size=kernel_size,
padding=padding,
**kwargs,
)
self.norm1 = nn.BatchNorm2d(self.hidden_channels)
self.conv2 = nn.Conv2d(
in_channels=self.hidden_channels,
out_channels=out_channels,
kernel_size=kernel_size,
padding=padding,
**kwargs,
)
self.norm2 = nn.BatchNorm2d(self.hidden_channels)
self.relu = nn.ReLU()
def forward(self, x):
x = self.upsample(x)
x = self.conv1(x)
x = self.norm1(x)
x = self.relu(x)
x = self.conv2(x)
x = self.norm2(x)
if self.last_relu:
x = self.relu(x)
return x
def n_output_features(
in_features: int, padding: int = 1, kernel_size: int = 3, stride: int = 1
) -> int:
"""Calculates the number of output features of a conv2d layer given its parameters."""
return math.floor((in_features + 2 * padding - kernel_size) / stride) + 1
class Conv2d(nn.Conv2d):
@singledispatchmethod
def forward(self, input: Union[Image, Tensor]) -> Union[Tensor, Image]:
return super().forward(input)
@forward.register(Image)
def _(self, input: Image) -> Image:
assert input.channels_first, f"Need channels first inputs for conv2d: {input}"
# NOTE: Not strictly necessary for computing the output space, but it would be
# better for the input space to already have a batch size, since conv2d only
# accepts 4-dimensional inputs.
# assert input.batch_size, (
# f"Image space should be batched, since conv2d only accepts 4-dimensional "
# f"inputs. (input={input})"
# )
assert input.channels == self.in_channels, (
f"Input space doesn't have the right number of channels: "
f"input.channels: {input.channels} != self.in_channels: {self.in_channels}"
)
new_height = n_output_features(
input.height,
padding=self.padding[0],
kernel_size=self.kernel_size[0],
stride=self.stride[0],
)
new_width = n_output_features(
input.width,
padding=self.padding[1],
kernel_size=self.kernel_size[1],
stride=self.stride[1],
)
new_channels = self.out_channels
new_shape = [new_channels, new_height, new_width]
if input.batch_size:
new_shape.insert(0, input.batch_size)
output_space: Image = type(input)(low=-np.inf, high=np.inf, shape=new_shape)
output_space.channels_first = True
return output_space
class MaxPool2d(nn.MaxPool2d):
@singledispatchmethod
def forward(self, input: Union[Image, Tensor]) -> Union[Tensor, Image]:
return super().forward(input)
@forward.register(Image)
def _(self, input: Image) -> Image:
assert input.channels_first, f"Need channels first inputs: {input}"
# assert not self.padding, "assuming no padding for now."
padding = [self.padding] * 2 if isinstance(self.padding, int) else self.padding
kernel_size = (
[self.kernel_size] * 2 if isinstance(self.kernel_size, int) else self.kernel_size
)
stride = [self.stride] * 2 if isinstance(self.stride, int) else self.stride
new_height = n_output_features(
input.height,
padding=padding[0],
kernel_size=kernel_size[0],
stride=stride[0],
)
new_width = n_output_features(
input.width,
padding=padding[1],
kernel_size=kernel_size[1],
stride=stride[1],
)
new_channels = input.channels
new_shape = [new_channels, new_height, new_width]
if input.batch_size:
new_shape.insert(0, input.batch_size)
output_space: Image = type(input)(low=-np.inf, high=np.inf, shape=new_shape)
output_space.channels_first = True
# assert False, (self.forward(torch.as_tensor([input.sample()])).shape, output_space)
return output_space
class Sequential(nn.Sequential):
# NB: We can't really type check this function as the type of input
# may change dynamically (as is tested in
# TestScript.test_sequential_intermediary_types). Cannot annotate
# with Any as TorchScript expects a more precise type
def forward(self, input):
if isinstance(input, spaces.Space):
space = input
for module in self:
try:
space = module(space)
except:
if isinstance(space, (spaces.Box, Image)):
# Apply the module to a sample from the space, and create an
# output space of the same shape.
space = Image.from_box(space)
in_sample: Tensor = torch.as_tensor(space.sample())
if not space.batch_size:
in_sample = in_sample.unsqueeze(0)
out_sample = module(in_sample)
out_space = type(space)(low=-np.inf, high=np.inf, shape=out_sample.shape)
space = out_space
else:
logger.debug(
f"Unable to apply module {module} on space {space}: assuming that it doesn't change the space."
)
return space
return super().forward(input)
================================================
FILE: sequoia/common/loss.py
================================================
""" Module that defines a `Loss` class that holds losses and associated metrics.
This Loss object is used to bundle together the Loss and the Metrics.
Loss objects are used to simplify training with multiple "loss signals"
(e.g. in Self-Supervised Learning) by keeping track of the contribution of each
individual 'task' to the total loss, as well as their corresponding metrics.
For example:
>>> from pprint import pprint
>>> loss = Loss("total")
>>> loss += Loss("task_a", loss=1.23, metrics={"accuracy": 0.95})
>>> loss += Loss("task_b", loss=torch.Tensor([2.10]))
>>> loss += Loss("task_c", loss=3.00)
>>> log_dict = loss.to_log_dict()
>>> pprint(log_dict)
{'total/loss': tensor([6.3300]),
'total/task_a/accuracy': 0.95,
'total/task_a/loss': 1.23,
'total/task_b/loss': tensor([2.1000]),
'total/task_c/loss': 3.0}
Another feature of Loss objects is that they can automatically generate
relevant metrics when the associated tensors are passed.
For example, consider a classification problem:
>>> # some fake classification logits.
>>> y_pred = torch.Tensor([
... [.8, .1, .1],
... [.0, .9, .1],
... [.0, .1, .9],
... ])
>>> y = [0, 1, 1]
>>> loss = Loss("test", y_pred=y_pred, y=y)
>>> loss.metric
ClassificationMetrics(n_samples=3, accuracy=0.666667)
Or, consider a regression problem:
>>> y_true = [0.0, 1.0, 2.0, 3.0]
>>> y_pred = [0.0, 1.0, 2.0, 5.0] # mse = 1/4 * (5-3)**2 == 1.0
>>> reg_loss = Loss("test", y_pred=y_pred, y=y_true)
>>> reg_loss.metric
RegressionMetrics(n_samples=4, mse=tensor(1.), l1_error=tensor(0.5000))
See the `Loss` constructor for more info on which tensors are accepted.
"""
from collections.abc import Mapping as MappingABC
from dataclasses import InitVar, dataclass, fields
from typing import Any, ClassVar, Dict, Iterable, List, Optional, Tuple, Union
import torch
from simple_parsing import field
from simple_parsing.helpers import dict_field
from torch import Tensor
from sequoia.utils.logging_utils import cleanup, get_logger
from sequoia.utils.serialization import Serializable
from sequoia.utils.utils import add_dicts, add_prefix
from .metrics import ClassificationMetrics, Metrics, RegressionMetrics, get_metrics
logger = get_logger(__name__)
@dataclass
class Loss(Serializable, MappingABC):
"""Object used to store the losses and metrics.
Used to simplify the return type of the different `get_loss` functions and
also to help in debugging models that use a combination of different loss
signals.
TODO: Add some kind of histogram plot to show the relative contribution of
each loss signal?
TODO: Maybe create a `make_plots()` method to create wandb plots?
"""
name: str
loss: Tensor = 0.0 # type: ignore
losses: Dict[str, "Loss"] = dict_field()
# NOTE: By setting to_dict=False below, we don't include the tensors when
# serializing the attributes.
# TODO: Does that also mean that the tensors can't be pickled (moved) by
# pytorch-lightning during training? Is there a case where that would be
# useful?
tensors: Dict[str, Tensor] = dict_field(repr=False, to_dict=False)
# Dictionary of metrics related to this loss. For example, could be the Accuracy.
# TODO: Test out using this with metrics from `torchmetrics`.
metrics: Dict[str, Union[Metrics, Tensor]] = dict_field()
# When multiplying the Loss by a value, this keep track of the coefficients
# used, so that if we wanted to we could recover the 'unscaled' loss.
_coefficient: Union[float, Tensor] = field(1.0, repr=False)
x: InitVar[Optional[Tensor]] = None
h_x: InitVar[Optional[Tensor]] = None
y_pred: InitVar[Optional[Tensor]] = None
y: InitVar[Optional[Tensor]] = None
_field_names: ClassVar[Tuple[str, ...]]
def __post_init__(
self, x: Tensor = None, h_x: Tensor = None, y_pred: Tensor = None, y: Tensor = None
):
if isinstance(self.name, dict):
# TODO: ugly-ish 'hack', we need to do this because of the infamous
# 'apply_to_collection' function, which does a Loss({k: v for k, v in loss.items()})
# Check that all other fields are empty, so we're not overwriting anything.
assert (isinstance(self.loss, float) or not self.loss.shape) and self.loss == 0.0
assert not self.metrics
assert not self.losses
assert not self.tensors
assert self._coefficient == 1.0
field_values = self.name
self.name = field_values.pop("name")
for k, v in field_values.items():
setattr(self, k, v)
assert self.name, "Loss objects should be given a name!"
if self.name not in self.metrics:
# Create a Metrics object if given the necessary tensors.
metrics = get_metrics(x=x, h_x=h_x, y_pred=y_pred, y=y)
if metrics:
self.metrics[self.name] = metrics
self._device: torch.device = None
for name in list(self.tensors.keys()):
tensor = self.tensors[name]
if not isinstance(tensor, Tensor):
self.tensors[name] = torch.as_tensor(tensor)
elif self._device is None:
self._device = tensor.device
if "_field_names" not in type(self).__dict__:
type(self)._field_names = tuple(f.name for f in fields(self))
def __contains__(self, key: str) -> bool:
if isinstance(key, str):
return key in type(self)._field_names
return NotImplemented
def __getitem__(self, key: str) -> Any:
if key not in self:
raise KeyError(key)
return getattr(self, key)
def __iter__(self) -> Iterable[str]:
return type(self)._field_names
def __len__(self) -> int:
return len(type(self)._field_names)
@property
def total_loss(self) -> Tensor:
return self.loss
@property
def requires_grad(self) -> bool:
"""Returns wether the loss tensor in this object requires grad."""
return isinstance(self.loss, Tensor) and self.loss.requires_grad
def backward(self, *args, **kwargs):
"""Calls `self.loss.backward(*args, **kwargs)`."""
return self.loss.backward(*args, **kwargs)
@property
def metric(self) -> Optional[Metrics]:
"""Shortcut for `self.metrics[self.name]`.
Returns:
Optional[Metrics]: The main metrics associated with this Loss.
"""
return self.metrics.get(self.name)
@metric.setter
def metric(self, value: Metrics) -> None:
"""Shortcut for `self.metrics[self.name] = value`.
Parameters
----------
value : Metrics
The main metrics associated with this Loss.
"""
assert self.name not in self.metrics, "There's already be a metric?"
self.metrics[self.name] = value
@property
def accuracy(self) -> float:
if isinstance(self.metric, ClassificationMetrics):
return self.metric.accuracy
@property
def mse(self) -> Tensor:
assert isinstance(self.metric, RegressionMetrics), self
return self.metric.mse
def __add__(self, other: Union["Loss", Any]) -> "Loss":
"""Adds two Loss instances together.
Adds the losses, total loss and metrics. Overwrites the tensors.
Keeps the name of the first one. This is useful when doing something
like:
```
loss = Loss("Test")
for x, y in dataloader:
loss += model.get_loss(x=x, y=y)
```
Returns
-------
Loss
The merged/summed up Loss.
"""
if other == 0:
return self
if not isinstance(other, Loss):
return NotImplemented
name = self.name
loss = self.loss + other.loss
if self.name == other.name:
losses = add_dicts(self.losses, other.losses)
metrics = add_dicts(self.metrics, other.metrics)
else:
# IDEA: when the names don't match, store the entire Loss
# object into the 'losses' dict, rather than a single loss tensor.
losses = add_dicts(self.losses, {other.name: other})
# TODO: setting in the 'metrics' dict, we are duplicating the
# metrics, since they now reside in the `self.metrics[other.name]`
# and `self.losses[other.name].metrics` attributes.
metrics = self.metrics
# metrics = add_dicts(self.metrics, {other.name: other.metrics})
tensors = add_dicts(self.tensors, other.tensors, add_values=False)
return Loss(
name=name,
loss=loss,
losses=losses,
tensors=tensors,
metrics=metrics,
_coefficient=self._coefficient,
)
def __iadd__(self, other: Union["Loss", Any]) -> "Loss":
"""Adds Loss to `self` in-place.
Adds the losses, total loss and metrics. Overwrites the tensors.
Keeps the name of the first one. This is useful when doing something
like:
```
loss = Loss("Test")
for x, y in dataloader:
loss += model.get_loss(x=x, y=y)
```
Returns
-------
Loss
`self`: The merged/summed up Loss.
"""
self.loss = self.loss + other.loss
if self.name == other.name:
self.losses = add_dicts(self.losses, other.losses)
self.metrics = add_dicts(self.metrics, other.metrics)
else:
# IDEA: when the names don't match, store the entire Loss
# object into the 'losses' dict, rather than a single loss tensor.
self.losses = add_dicts(self.losses, {other.name: other})
self.tensors = add_dicts(self.tensors, other.tensors, add_values=False)
return self
def __radd__(self, other: Any):
"""Addition operator for when forward addition returned `NotImplemented`.
For example, doing something like `None + Loss()` will use __radd__,
whereas doing `Loss() + None` will use __add__.
"""
if other is None:
return self
elif other == 0:
return self
if isinstance(other, Tensor):
# TODO: Other could be a loss tensor, maybe create a Loss object for it?
pass
return NotImplemented
def __mul__(self, factor: Union[float, Tensor]) -> "Loss":
"""Scale each loss tensor by `coefficient`.
Returns
-------
Loss
returns a scaled Loss instance.
"""
result = Loss(
name=self.name,
loss=self.loss * factor,
losses={k: value * factor for k, value in self.losses.items()},
metrics=self.metrics,
tensors=self.tensors,
_coefficient=self._coefficient * factor,
)
return result
def __rmul__(self, factor: Union[float, Tensor]) -> "Loss":
# assert False, f"rmul: {factor}"
return self.__mul__(factor)
def __truediv__(self, coefficient: Union[float, Tensor]) -> "Loss":
return self * (1 / coefficient)
@property
def unscaled_losses(self):
"""Recovers the 'unscaled' version of this loss.
TODO: This isn't used anywhere. We could probably remove it.
"""
return {k: value / self._coefficient for k, value in self.losses.items()}
def to_log_dict(self, verbose: bool = False) -> Dict[str, Union[str, float, Dict]]:
"""Creates a dictionary to be logged (e.g. by `wandb.log`).
Args:
verbose (bool, optional): Wether to include a lot of information, or
to only log the 'essential' stuff. See the `cleanup` function for
more info. Defaults to False.
Returns:
Dict: A dict containing the things to be logged.
"""
# TODO: Could also produce some wandb plots and stuff here when verbose?
log_dict: Dict[str, Union[str, float, Dict, Tensor]] = {}
# log_dict["loss"] = round(float(self.loss), 6)
# Preserving the Torch Dtype, if present.
log_dict["loss"] = self.loss
for name, metric in self.metrics.items():
if isinstance(metric, Serializable):
log_dict[name] = metric.to_log_dict(verbose=verbose)
else:
log_dict[name] = metric
for name, loss in self.losses.items():
if isinstance(loss, Serializable):
log_dict[name] = loss.to_log_dict(verbose=verbose)
else:
log_dict[name] = loss
log_dict = add_prefix(log_dict, prefix=self.name, sep="/")
keys_to_remove: List[str] = []
if not verbose:
# when NOT verbose, remove any entries with this matching key.
# TODO: add/remove keys here if you want to customize what doesn't get logged to wandb.
# TODO: Could maybe make this a class variable so that it could be
# extended/overwritten, but that sounds like a bit too much rn.
keys_to_remove = [
"n_samples",
"name",
"confusion_matrix",
"class_accuracy",
"_coefficient",
]
result = cleanup(log_dict, keys_to_remove=keys_to_remove, sep="/")
return result
def to_pbar_message(self) -> Dict[str, float]:
"""Smaller, less-detailed version of `to_log_dict()` for progress bars."""
# NOTE: PL actually doesn't seem to accept strings as values
message: Dict[str, Union[str, float]] = {}
message["Loss"] = float(self.loss)
for name, metric in self.metrics.items():
if isinstance(metric, Metrics):
message[name] = metric.to_pbar_message()
else:
message[name] = metric
for name, loss_info in self.losses.items():
message[name] = loss_info.to_pbar_message()
message = add_prefix(message, prefix=self.name, sep=" ")
return cleanup(message, sep=" ")
def clear_tensors(self) -> None:
"""Clears the `tensors` attribute of `self` and of sublosses.
NOTE: This could be useful if you want to save some space/compute, but
it isn't being used atm, and there's no issue. You might want to call
this if you are storing big tensors (or passing them to the constructor)
"""
self.tensors.clear()
for _, loss in self.losses.items():
loss.clear_tensors()
return self
def absorb(self, other: "Loss") -> None:
"""Absorbs `other` into `self`, merging the losses and metrics.
Args:
other (Loss): Another loss to 'merge' into this one.
"""
new_name = self.name
old_name = other.name
# Here we create a new 'other' and use __iadd__ to merge the attributes.
new_other = Loss(name=new_name)
new_other.loss = other.loss
# We also replace the name in the keys, if present.
new_other.metrics = {k.replace(old_name, new_name): v for k, v in other.metrics.items()}
new_other.losses = {k.replace(old_name, new_name): v for k, v in other.losses.items()}
self += new_other
def all_metrics(self) -> Dict[str, Metrics]:
"""Returns a 'cleaned up' dictionary of all the Metrics objects."""
assert self.name
result: Dict[str, Metrics] = {}
result.update(self.metrics)
for name, loss in self.losses.items():
# TODO: Aren't we potentially colliding with 'self.metrics' here?
subloss_metrics = loss.all_metrics()
for key, metric in subloss_metrics.items():
assert key not in result, (
f"Collision in metric keys of subloss {name}: key={key}, " f"result={result}"
)
result[key] = metric
result = add_prefix(result, prefix=self.name, sep="/")
return result
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: sequoia/common/loss_test.py
================================================
"""
TODO: Write some tests that also help illustrate how the Loss class works.
"""
from .loss import Loss
def test_demo():
"""Simple test to demonstrate addition of Loss objects."""
loss = Loss("total")
loss += Loss("task_a", loss=1.23, metrics={"accuracy": 0.95})
loss += Loss("task_b", loss=2.10)
loss += Loss("task_c", loss=3.00)
# Get a dict to be logged, for example with wandb.
loss_dict = loss.to_log_dict()
assert loss_dict == {
"total/loss": 6.33,
"total/task_a/loss": 1.23,
"total/task_a/accuracy": 0.95,
"total/task_b/loss": 2.1,
"total/task_c/loss": 3.0,
}
def test_all_metrics():
"""Using `all_metrics()` gives a dict of all the metrics in the Loss."""
loss = Loss("total")
loss += Loss("task_a", loss=1.23, metrics={"accuracy": 0.95})
loss += Loss("task_b", loss=2.10)
loss += Loss("task_c", loss=3.00)
assert loss.all_metrics() == {
"total/task_a/accuracy": 0.95,
}
def test_to_log_dict_order():
"""Simple test to demonstrate addition of Loss objects."""
task_a_loss = Loss("task_a", loss=1.23, metrics={"accuracy": 0.95})
task_b_loss = Loss("task_b", loss=2.10)
task_c_loss = Loss("task_c", loss=3.00)
total_loss = Loss("total") + task_a_loss + task_b_loss + task_c_loss
loss_dict = total_loss.to_log_dict()
assert loss_dict == {
"total/loss": 6.33,
"total/task_a/loss": 1.23,
"total/task_a/accuracy": 0.95,
"total/task_b/loss": 2.1,
"total/task_c/loss": 3.0,
}
================================================
FILE: sequoia/common/metrics/__init__.py
================================================
from .classification import ClassificationMetrics
from .get_metrics import get_metrics
from .metrics import Metrics, MetricsType
from .metrics_utils import accuracy, class_accuracy, get_class_accuracy, get_confusion_matrix
from .regression import RegressionMetrics
from .rl_metrics import EpisodeMetrics, GradientUsageMetric
================================================
FILE: sequoia/common/metrics/classification.py
================================================
""" Metrics class for classification.
Gives the accuracy, the class accuracy, and the confusion matrix for a given set
of (raw/pre-activation) logits Tensor `y_pred` and the class labels `y`.
"""
from dataclasses import InitVar, dataclass
from typing import Dict, Optional, Union
import numpy as np
import torch
from simple_parsing import field
from torch import Tensor
from sequoia.utils.serialization import detach, move
from .metrics import Metrics
from .metrics_utils import get_accuracy, get_class_accuracy, get_confusion_matrix
# TODO: Might be a good idea to add a `task` attribute to Metrics or
# Loss objects, in order to check that we aren't adding the class
# accuracies or confusion matrices from different tasks by accident.
# We could also maybe add them but fuse them properly, for instance by
# merging the class accuracies and confusion matrices?
#
# For example, if a first metric has class accuracy [0.1, 0.5]
# (n_samples=100) and from a task with classes [0, 1] is added to a
# second Metrics with class accuracy [0.9, 0.8] (n_samples=100) for task
# with classes [0,3], the resulting Metrics object would have a
# class_accuracy of [0.5 (from (0.1+0.9)/2 = 0.5), 0.5, 0 (no data), 0.8]
# n_samples would then also have to be split on a per-class basis.
# n_samples could maybe be just the sum of the confusion matrix entries?
#
# As for the confusion matrices, they could be first expanded to fit the
# range of both by adding empty columns/rows to each and then be added
# together.
@dataclass
class ClassificationMetrics(Metrics):
# fields we generate from the confusion matrix (if provided) or from the
# forward pass tensors.
accuracy: float = 0.0
confusion_matrix: Optional[Union[Tensor, np.ndarray]] = field(
default=None, repr=False, compare=False
)
class_accuracy: Optional[Union[Tensor, np.ndarray]] = field(
default=None, repr=False, compare=False
)
# Optional arguments used to create the attributes of the metrics above.
# NOTE: These wont become attributes on the object, just args to postinit.
x: InitVar[Optional[Tensor]] = None
h_x: InitVar[Optional[Tensor]] = None
logits: InitVar[Optional[Tensor]] = None
y_pred: InitVar[Optional[Tensor]] = None
y: InitVar[Optional[Tensor]] = None
num_classes: InitVar[Optional[int]] = None
def __post_init__(
self,
x: Tensor = None,
h_x: Tensor = None,
logits: Tensor = None,
y_pred: Tensor = None,
y: Tensor = None,
num_classes: int = None,
):
super().__post_init__(x=x, h_x=h_x, logits=logits, y_pred=y_pred, y=y)
if (
self.confusion_matrix is None
and (y_pred is not None or logits is not None)
and y is not None
):
self.confusion_matrix = get_confusion_matrix(
y_pred=logits if logits is not None else y_pred, y=y, num_classes=num_classes
)
# TODO: add other useful metrics (potentially ones using x or h_x?)
if self.confusion_matrix is not None:
self.accuracy = get_accuracy(self.confusion_matrix)
self.accuracy = round(self.accuracy, 6)
self.class_accuracy = get_class_accuracy(self.confusion_matrix)
@property
def objective_name(self) -> str:
return "Accuracy"
def __add__(self, other: "ClassificationMetrics") -> "ClassificationMetrics":
confusion_matrix: Optional[Tensor] = None
if self.n_samples == 0:
return other
if not isinstance(other, ClassificationMetrics):
return NotImplemented
# Create the 'sum' confusion matrix:
confusion_matrix: Optional[np.ndarray] = None
if self.confusion_matrix is None and other.confusion_matrix is not None:
confusion_matrix = other.confusion_matrix.clone()
elif other.confusion_matrix is None:
confusion_matrix = self.confusion_matrix.clone()
else:
confusion_matrix = self.confusion_matrix + other.confusion_matrix
result = ClassificationMetrics(
n_samples=self.n_samples + other.n_samples,
confusion_matrix=confusion_matrix,
num_classes=self.num_classes,
)
return result
def to_log_dict(self, verbose=False):
log_dict = super().to_log_dict(verbose=verbose)
log_dict["accuracy"] = self.accuracy
if verbose:
# Maybe add those as plots, rather than tensors?
log_dict["class_accuracy"] = self.class_accuracy
log_dict["confusion_matrix"] = self.confusion_matrix
return log_dict
# def __str__(self):
# s = super().__str__()
# s = s.replace(f"accuracy={self.accuracy}", f"accuracy={self.accuracy:.3%}")
# return s
def to_pbar_message(self) -> Dict[str, Union[str, float]]:
message = super().to_pbar_message()
message["acc"] = float(self.accuracy)
return message
def detach(self) -> "ClassificationMetrics":
return ClassificationMetrics(
n_samples=detach(self.n_samples),
accuracy=float(self.accuracy),
class_accuracy=detach(self.class_accuracy),
confusion_matrix=detach(self.confusion_matrix),
)
def to(self, device: Union[str, torch.device]) -> "ClassificationMetrics":
"""Returns a new Metrics with all the attributes 'moved' to `device`."""
return ClassificationMetrics(
n_samples=move(self.n_samples, device),
accuracy=move(self.accuracy, device),
class_accuracy=move(self.class_accuracy, device),
confusion_matrix=move(self.confusion_matrix, device),
)
@property
def objective(self) -> float:
return float(self.accuracy)
# def __lt__(self, other: Union["ClassificationMetrics", Any]) -> bool:
# if isinstance(other, ClassificationMetrics):
# return self.accuracy < other.accuracy
# return NotImplemented
# def __ge__(self, other: Union["ClassificationMetrics", Any]) -> bool:
# if isinstance(other, ClassificationMetrics):
# return self.accuracy >= other.accuracy
# return NotImplemented
# def __eq__(self, other: Union["ClassificationMetrics", Any]) -> bool:
# if isinstance(other, ClassificationMetrics):
# return self.accuracy == other.accuracy and self.n_samples == other.n_samples
# return NotImplemented
================================================
FILE: sequoia/common/metrics/classification_test.py
================================================
import numpy as np
import torch
from .classification import ClassificationMetrics
from .get_metrics import get_metrics
def test_classification_metrics_add_properly():
y_pred = torch.as_tensor(
[
[0.01, 0.90, 0.09],
[0.01, 0, 0.99],
[0.01, 0, 0.99],
]
)
y = torch.as_tensor(
[
1,
2,
0,
]
)
m1 = ClassificationMetrics(y_pred=y_pred, y=y)
assert m1.n_samples == 3
assert np.isclose(m1.accuracy, 2 / 3)
y_pred = torch.as_tensor(
[
[0.01, 0.90, 0.09],
[0.01, 0, 0.99],
[0.01, 0, 0.99],
[0.01, 0, 0.99],
[0.01, 0, 0.99],
]
)
y = torch.as_tensor(
[
1,
2,
2,
0,
0,
]
)
m2 = ClassificationMetrics(y_pred=y_pred, y=y)
assert m2.n_samples == 5
assert np.isclose(m2.accuracy, 3 / 5)
assert all(np.isclose(m2.class_accuracy, [0, 1, 1]))
m3 = m1 + m2
assert m3.n_samples == 8
assert np.isclose(m3.accuracy, 5 / 8)
def test_metrics_from_tensors():
y_pred = torch.as_tensor(
[
[0.01, 0.90, 0.09],
[0.01, 0, 0.99],
[0.01, 0, 0.99],
]
)
y = torch.as_tensor(
[
1,
2,
0,
]
)
m = get_metrics(y_pred=y_pred, y=y)
assert m.n_samples == 3
assert np.isclose(m.accuracy, 2 / 3)
================================================
FILE: sequoia/common/metrics/get_metrics.py
================================================
""" Defines the get_metrics function with gives back appropriate metrics
for the given tensors.
TODO: Add more metrics! Maybe even fancy things that are based on the
hidden vectors like wasserstein distance, etc?
"""
from typing import List, Optional, Union
import numpy as np
import torch
from torch import Tensor
from sequoia.utils.logging_utils import get_logger
from .classification import ClassificationMetrics
from .metrics import Metrics
from .regression import RegressionMetrics
logger = get_logger(__name__)
def to_optional_tensor(x: Optional[Union[Tensor, np.ndarray, List]]) -> Optional[Tensor]:
"""Converts `x` into a Tensor if `x` is not None, else None."""
return x if x is None else torch.as_tensor(x)
@torch.no_grad()
def get_metrics(
y_pred: Union[Tensor, np.ndarray],
y: Union[Tensor, np.ndarray],
x: Union[Tensor, np.ndarray] = None,
h_x: Union[Tensor, np.ndarray] = None,
) -> Optional[Metrics]:
y = to_optional_tensor(y)
y_pred = to_optional_tensor(y_pred)
x = to_optional_tensor(x)
h_x = to_optional_tensor(h_x)
if y is not None and y_pred is not None:
if y.shape != y_pred.shape or not torch.is_floating_point(y):
# TODO: I think this condition also works for binary classification,
# at least when the logits have a shape[-1] == 2, but I don't know if it
# would cause some trouble if there is a single logit, rather than 2.
return ClassificationMetrics(x=x, h_x=h_x, y_pred=y_pred, y=y)
return RegressionMetrics(x=x, h_x=h_x, y_pred=y_pred, y=y)
return None
================================================
FILE: sequoia/common/metrics/metrics.py
================================================
""" Cute little dataclass that is used to describe a given type of Metrics.
This is a bit like the Metrics from pytorch-lightning, but seems easier to use,
as far as I know. Also totally transferable between gpus etc. (Haven't used
the metrics from PL much yet, to be honest).
"""
from dataclasses import dataclass, field, fields
from typing import Any, Dict, TypeVar, Union
import numpy as np
from torch import Tensor
from sequoia.utils.serialization import Serializable
MetricsType = TypeVar("MetricsType", bound="Metrics")
@dataclass
class Metrics(Serializable):
# This field isn't used in comparisons between Metrics.
n_samples: int = field(default=0, compare=False)
# TODO: Refactor this to take any kwargs, and then let each metric type
# specify its own InitVars.
def __post_init__(self, **tensors):
"""Creates metrics given `y_pred` and `y`.
NOTE: Doesn't use `x` and `h_x` for now.
Args:
x (Tensor, optional): The input Tensor. Defaults to None.
h_x (Tensor, optional): The hidden representation for x. Defaults to None.
y_pred (Tensor, optional): The predicted label. Defaults to None.
y (Tensor, optional): The true label. Defaults to None.
"""
# get the batch size:
for tensor in tensors.values():
if isinstance(tensor, (np.ndarray, Tensor)) and tensor.shape:
self.n_samples = tensor.shape[0]
break
def __add__(self, other):
# Instances of the Metrics base class shouldn't be added together, as
# the subclasses should implement the method. We just return the other.
return other
def __radd__(self, other):
# Instances of the Metrics base class shouldn't be added together, as
# the subclasses should implement the method. We just return the other.
if isinstance(other, (int, float)) and other == 0.0:
return self
if isinstance(other, Metrics) and type(self) is Metrics:
assert self.n_samples == 0
return other
return NotImplemented
def __mul__(self, factor: Union[float, Tensor]) -> "Loss":
# By default, multiplying or dividing a Metrics object doesn't change
# anything about it.
return self
def __rmul__(self, factor: Union[float, Tensor]) -> "Loss":
# Reverse-order multiply, used to do b * a when a * b returns
# NotImplemented.
return self.__mul__(factor)
def __truediv__(self, coefficient: Union[float, Tensor]) -> "Metrics":
# By default, multiplying or dividing a Metrics object doesn't change
# anything about it.
return self
def to_log_dict(self, verbose: bool = False) -> Dict:
"""Creates a dictionary to be logged (e.g. by `wandb.log`).
Args:
verbose (bool, optional): Wether to include a lot of information, or
to only log the 'essential' metrics. See the `cleanup` function for
more info. Defaults to False.
Returns:
Dict: A dict containing the things to be logged.
TODO: Maybe create a `make_plots()` method to get wandb plots from the
metric?
"""
log_dict = {}
for field in fields(self):
if not (field.repr or verbose):
continue # skip field.
value = getattr(self, field.name)
if isinstance(value, Metrics):
log_dict[field.name] = value.to_log_dict(verbose=verbose)
else:
log_dict[field.name] = value
return log_dict
return {f.name: getattr(self, f.name) for f in fields(self) if f.repr or verbose}
if verbose:
return {"n_samples": self.n_samples}
return {}
def to_pbar_message(self) -> Dict[str, Union[str, float]]:
return {}
def numpy(self):
"""Returns a new object with all the tensor fields converted to numpy arrays."""
def to_numpy(val: Any):
if isinstance(val, Tensor):
return val.detach().cpu().numpy()
if isinstance(val, (list, tuple)):
return np.array(val)
return val
return type(self)(**{name: to_numpy(val) for name, val in self.items()})
@property
def objective(self) -> float:
"""Returns the 'main' metric from this object, as a float.
Returns
-------
float
The most important metric from this object, as a float.
"""
return 0
# raise NotImplementedError(f"TODO: Add the 'objective' property to class {type(self)}")
@property
def objective_name(self) -> str:
"""Returns the name to be associated with the objective of this class.
Returns
-------
float
The name associated with the objective.
"""
raise NotImplementedError(f"TODO: Add the 'objective_name' property to class {type(self)}")
================================================
FILE: sequoia/common/metrics/metrics_utils.py
================================================
""" Utility functions for calculating metrics. """
from typing import Union
import numpy as np
import torch
from torch import Tensor
@torch.no_grad()
def get_confusion_matrix(
y_pred: Union[np.ndarray, Tensor], y: Union[np.ndarray, Tensor], num_classes: int = None
) -> Union[Tensor, np.ndarray]:
"""Taken from https://discuss.pytorch.org/t/how-to-find-individual-class-accuracy/6348
NOTE: `y_pred` is assumed to be the logits with shape [B, C], while the
labels `y` is assumed to have shape either `[B]` or `[B, 1]`, unless `num_classes`
is given, in which case y_pred can be the predicted labels.
"""
if isinstance(y_pred, Tensor):
y_pred = y_pred.detach().cpu().numpy()
if isinstance(y, Tensor):
y = y.detach().cpu().numpy()
# FIXME: How do we properly check if something is an integer type in np?
if len(y_pred.shape) == 1 and y_pred.dtype not in {np.float32, np.float64}:
# y_pred is already the predicted labels.
y_preds = y_pred
if num_classes is None:
raise NotImplementedError(
f"Can't determine the number of classes. Pass logits rather than predicted labels."
)
n_classes = num_classes
elif y_pred.shape[-1] == 1:
n_classes = 2 # y_pred is the logit for binary classification.
y_preds = y_pred.round()
else:
# y_pred is assumed to be the logits.
n_classes = y_pred.shape[-1]
y_preds = y_pred.argmax(-1)
y = y.flatten().astype(int)
y_preds = y_preds.flatten().astype(int)
# BUG: This is failing on the last batch.
assert y.shape == y_preds.shape, (y.shape, y_preds.shape)
# assert y.dtype == y_preds.dtype == np.int, (y.dtype, y_preds.dtype)
confusion_matrix = np.zeros([n_classes, n_classes])
assert 0 <= y.min() and y.max() < n_classes, (y, n_classes)
assert 0 <= y_preds.min() and y_preds.max() < n_classes, (y_preds, n_classes)
for y_t, y_p in zip(y, y_preds):
confusion_matrix[y_t, y_p] += 1
return confusion_matrix
@torch.no_grad()
def accuracy(y_pred: Union[Tensor, np.ndarray], y: Union[Tensor, np.ndarray]) -> float:
confusion_mat = get_confusion_matrix(y_pred=y_pred, y=y)
batch_size = y_pred.shape[0]
_, predicted = y_pred.max(-1)
acc = (predicted == y).sum(dtype=float) / batch_size
return acc.item()
@torch.no_grad()
def get_accuracy(confusion_matrix: Union[Tensor, np.ndarray]) -> float:
if isinstance(confusion_matrix, Tensor):
diagonal = confusion_matrix.diag()
else:
diagonal = np.diag(confusion_matrix)
return (diagonal.sum() / confusion_matrix.sum()).item()
@torch.no_grad()
def class_accuracy(y_pred: Tensor, y: Tensor) -> Tensor:
confusion_mat = get_confusion_matrix(y_pred=y_pred, y=y)
return get_class_accuracy(confusion_mat)
@torch.no_grad()
def get_class_accuracy(confusion_matrix: Tensor) -> Tensor:
if isinstance(confusion_matrix, Tensor):
diagonal = confusion_matrix.diag()
else:
diagonal = np.diag(confusion_matrix)
sum_of_columns = confusion_matrix.sum(1)
if isinstance(confusion_matrix, Tensor):
sum_of_columns.clamp_(min=1e-10)
else:
sum_of_columns = sum_of_columns.clip(min=1e-10)
return diagonal / sum_of_columns
================================================
FILE: sequoia/common/metrics/metrics_utils_test.py
================================================
import numpy as np
import torch
from .metrics_utils import accuracy, class_accuracy, get_confusion_matrix
def test_accuracy():
y_pred = torch.as_tensor(
[
[0.01, 0.90, 0.09],
[0.01, 0, 0.99],
[0.01, 0, 0.99],
]
)
y = torch.as_tensor(
[
1,
2,
0,
]
)
assert np.isclose(accuracy(y_pred, y), 2 / 3)
def test_per_class_accuracy_perfect():
y_pred = torch.as_tensor(
[
[0.1, 0.9, 0.0],
[0.1, 0.0, 0.9],
[0.1, 0.4, 0.5],
[0.9, 0.1, 0.0],
]
)
y = torch.as_tensor(
[
1,
2,
2,
0,
]
)
expected = [1, 1, 1]
class_acc = class_accuracy(y_pred, y).tolist()
assert class_acc == expected
def test_per_class_accuracy_zero():
y_pred = torch.as_tensor(
[
[0.1, 0.9, 0.0],
[0.1, 0.9, 0.0],
[0.1, 0.9, 0.0],
[0.1, 0.9, 0.0],
]
)
y = torch.as_tensor(
[
0,
0,
0,
0,
]
)
expected = [0, 0, 0]
class_acc = class_accuracy(y_pred, y).tolist()
assert class_acc == expected
def test_confusion_matrix():
y_pred = torch.as_tensor(
[
[0.1, 0.9, 0.0],
[0.1, 0.4, 0.5],
[0.1, 0.9, 0.0],
[0.9, 0.0, 0.1],
]
)
y = torch.as_tensor(
[
0,
0,
1,
0,
]
)
expected = [
[1, 1, 1],
[0, 1, 0],
[0, 0, 0],
]
confusion_mat = get_confusion_matrix(y_pred=y_pred, y=y).tolist()
assert confusion_mat == expected
def test_per_class_accuracy_realistic():
y_pred = torch.as_tensor(
[
[0.9, 0.0, 0.0], # correct for class 0
[0.1, 0.5, 0.4], # correct for class 1
[0.1, 0.0, 0.9], # correct for class 2
[0.1, 0.8, 0.1], # wrong, should be 1
[0.1, 0.0, 0.9], # wrong, should be 0
[0.9, 0.0, 0.0], # wrong, should be 1
[0.1, 0.5, 0.4], # wrong, should be 2
[0.1, 0.4, 0.5], # correct for class 2
]
)
y = torch.as_tensor(
[
0,
1,
2,
0,
0,
1,
2,
2,
]
)
expected = [1 / 3, 1 / 2, 2 / 3]
class_acc = class_accuracy(y_pred, y).tolist()
assert all(np.isclose(class_acc, expected))
================================================
FILE: sequoia/common/metrics/regression.py
================================================
""" Metrics class for regression.
Gives the mean squared error between a prediction Tensor `y_pred` and the
target tensor `y`.
"""
from dataclasses import InitVar, dataclass
from functools import total_ordering
from typing import Any, Dict, Optional, Union
import torch
import torch.nn.functional as functional
from torch import Tensor
from sequoia.utils.logging_utils import get_logger
from .metrics import Metrics
logger = get_logger(__name__)
@total_ordering
@dataclass
class RegressionMetrics(Metrics):
"""TODO: Use this in the RL settings!"""
mse: Tensor = 0.0 # type: ignore
l1_error: Tensor = 0.0 # type: ignore
x: InitVar[Optional[Tensor]] = None
h_x: InitVar[Optional[Tensor]] = None
y_pred: InitVar[Optional[Tensor]] = None
y: InitVar[Optional[Tensor]] = None
def __post_init__(
self, x: Tensor = None, h_x: Tensor = None, y_pred: Tensor = None, y: Tensor = None
):
super().__post_init__(x=x, h_x=h_x, y_pred=y_pred, y=y)
if y_pred is not None and y is not None:
if y.shape != y_pred.shape:
logger.warning(
UserWarning(
f"Shapes aren't the same! (y_pred.shape={y_pred.shape}, "
f"y.shape={y.shape}"
)
)
else:
self.mse = functional.mse_loss(y_pred, y)
self.l1_error = functional.l1_loss(y_pred, y)
self.mse = torch.as_tensor(self.mse)
self.l1_error = torch.as_tensor(self.l1_error)
@property
def objective(self) -> float:
return float(self.mse)
def __add__(self, other: "RegressionMetrics") -> "RegressionMetrics":
# NOTE: Creates new tensors, and links them to the previous ones by
# addition so the grads are linked.
if self.mse is not None:
mse = self.mse.clone()
if other.mse is not None:
mse = other.mse.clone()
else:
mse = torch.zeros(1)
if self.l1_error is not None:
l1_error = self.l1_error.clone()
if other.l1_error is not None:
l1_error = other.l1_error.clone()
else:
l1_error = torch.zeros(1)
return RegressionMetrics(
n_samples=self.n_samples + other.n_samples,
mse=mse,
l1_error=l1_error,
)
def to_pbar_message(self) -> Dict[str, Union[str, float]]:
message = super().to_pbar_message()
message["mse"] = float(self.mse.item())
message["l1_error"] = float(self.l1_error.item())
return message
def to_log_dict(self, verbose=False):
log_dict = super().to_log_dict(verbose=verbose)
log_dict["mse"] = self.mse
log_dict["l1_error"] = self.l1_error
return log_dict
def __mul__(self, factor: Union[float, Tensor]) -> "Loss":
# Multiplying a 'RegressionMetrics' object multiplies its 'mse'.
return RegressionMetrics(
n_samples=self.n_samples,
mse=self.mse * factor,
l1_error=self.l1_error * factor,
)
def __rmul__(self, factor: Union[float, Tensor]) -> "Loss":
# Reverse-order multiply, used to do b * a when a * b returns
# NotImplemented.
return self.__mul__(factor)
def __truediv__(self, coefficient: Union[float, Tensor]) -> "RegressionMetrics":
# Dividing a RegressionMetrics object divides its mean squared error.
return RegressionMetrics(
n_samples=self.n_samples,
mse=self.mse / coefficient,
l1_error=self.l1_error / coefficient,
)
def __lt__(self, other: Union["RegressionMetrics", Any]) -> bool:
if isinstance(other, RegressionMetrics):
return self.mse < other.mse
return NotImplemented
def __ge__(self, other: Union["RegressionMetrics", Any]) -> bool:
if isinstance(other, RegressionMetrics):
return self.mse >= other.mse
return NotImplemented
================================================
FILE: sequoia/common/metrics/rl_metrics.py
================================================
from dataclasses import dataclass, field
from typing import Any, Dict, Union
from .metrics import Metrics
@dataclass
class EpisodeMetrics(Metrics):
"""Metrics for Episodes in RL.
n_samples is the number of stored episodes.
"""
n_samples: int = field(default=1, compare=False)
# The average reward per episode.
mean_episode_reward: float = 0.0
# The average length of each episode.
mean_episode_length: float = 0
@property
def n_episodes(self) -> int:
return self.n_samples
@property
def objective_name(self) -> str:
"""Returns the name to be associated with the objective of this class.
Returns
-------
str
The name associated with the objective.
"""
return "Mean Reward per Episode"
@property
def mean_reward_per_step(self) -> float:
return self.mean_episode_reward / self.mean_episode_length
def __add__(self, other: Union["EpisodeMetrics", Any]):
if isinstance(other, (int, float)) and other == 0:
# This makes `sum(list_of_metrics)` work!.
return self
if isinstance(other, Metrics) and other == Metrics():
return self
if not isinstance(other, EpisodeMetrics):
return NotImplemented
other: EpisodeMetrics
other_total_reward = other.mean_episode_reward * other.n_samples
other_total_length = other.mean_episode_length * other.n_samples
self_total_reward = self.mean_episode_reward * self.n_samples
self_total_length = self.mean_episode_length * self.n_samples
new_n_samples = self.n_samples + other.n_samples
new_mean_reward = (self_total_reward + other_total_reward) / new_n_samples
new_mean_length = (self_total_length + other_total_length) / new_n_samples
return EpisodeMetrics(
n_samples=new_n_samples,
mean_episode_reward=new_mean_reward,
mean_episode_length=new_mean_length,
)
@property
def total_reward(self) -> float:
return self.n_episodes * self.mean_episode_reward
@property
def total_steps(self) -> int:
return round(self.n_episodes * self.mean_episode_length)
def to_pbar_message(self) -> Dict[str, Union[str, float]]:
return self.to_log_dict()
@property
def objective(self) -> float:
return self.mean_episode_reward
def to_log_dict(self, verbose: bool = False):
log_dict = {
"Episodes": self.n_episodes,
"Mean reward per episode": self.mean_episode_reward,
"Mean reward per step": self.mean_reward_per_step,
}
if verbose:
log_dict.update(
{
"Total steps": int(self.total_steps),
"Total reward": int(self.total_reward),
"Mean episode length": float(self.mean_episode_length),
}
)
return log_dict
@property
def episodes(self) -> int:
return self.n_samples
@property
def mean_reward_per_episode(self) -> float:
return self.mean_episode_reward
# @dataclass
# class RLMetrics(Metrics):
# episodes: List[EpisodeMetrics] = field(default_factory=list, repr=False)
# average_episode_length: int = field(default=0)
# average_episode_reward: float = field(default=0.)
# def __post_init__(self):
# if self.episodes:
# self.n_samples = len(self.episodes)
# self.average_episode_length = sum(ep.episode_length for ep in self.episodes) / self.n_samples
# self.average_episode_reward = sum(ep.total_reward for ep in self.episodes) / self.n_samples
# def __add__(self, other: Union["RLMetrics", EpisodeMetrics, Any]) -> "RLMetrics":
# if isinstance(other, RLMetrics):
# return RLMetrics(
# episodes = self.episodes + other.episodes,
# )
# if isinstance(other, EpisodeMetrics):
# self.episodes.append(other)
# return self
# return NotImplemented
# def to_pbar_message(self) -> Dict[str, Union[str, float]]:
# log_dict = self.to_log_dict()
# # Rename "n_samples" to "episodes":
# log_dict["episodes"] = log_dict.pop("n_samples")
# return log_dict
@dataclass
class GradientUsageMetric(Metrics):
"""Small Metrics to report the fraction of gradients that were used vs
'wasted', when using batch_size > 1.
"""
used_gradients: int = 0
wasted_gradients: int = 0
used_gradients_fraction: float = 0.0
def __post_init__(self):
self.n_samples = self.used_gradients + self.wasted_gradients
if self.n_samples:
self.used_gradients_fraction = self.used_gradients / self.n_samples
def __add__(self, other: Union["GradientUsageMetric", Any]) -> "GradientUsageMetric":
if not isinstance(other, GradientUsageMetric):
return NotImplemented
return GradientUsageMetric(
used_gradients=self.used_gradients + other.used_gradients,
wasted_gradients=self.wasted_gradients + other.wasted_gradients,
)
def to_pbar_message(self) -> Dict[str, Union[str, float]]:
return {"used_fraction": self.used_gradients_fraction}
================================================
FILE: sequoia/common/replay.py
================================================
""" Labeled, Unlabeled and Semi-supervised Replay buffer objects.
TODO: Unused for now, but could be used in a LightningModule.
"""
import random
from collections import Counter, deque
from dataclasses import dataclass
from typing import *
import torch
from simple_parsing import field
from torch import Tensor
from torch.utils.data import TensorDataset
from sequoia.utils.logging_utils import get_logger
from sequoia.utils.serialization import Pickleable, Serializable
logger = get_logger(__name__)
T = TypeVar("T")
class ReplayBuffer(deque, Deque[T], Pickleable):
"""Simple implementation of a replay buffer.
Uses a doubly-ended Queue, which unfortunately isn't registered as a buffer
for pytorch.
"""
def __init__(self, capacity: int):
super().__init__(maxlen=capacity)
# self.extend("ABC")
self.capacity: int = capacity
# TODO: figure out how to persist the buffer with state_dict maybe?
# self.register_buffer("memory", torch.zeros(1))
self.labeled: Optional[bool] = None
self.current_size: int = 0
def as_dataset(self) -> TensorDataset:
contents = zip(*self)
return TensorDataset(*map(torch.stack, contents))
def _push_and_sample(self, *values: T, size: int) -> List[T]:
"""Pushes `values` into the buffer and samples `size` samples from it.
NOTE: In contrast to `push`, allows sampling more than `len(self)`
samples from the buffer (up to `len(self) + len(values)`)
Args:
*values (T): An iterable of items to push.
size (int): Number of samples to take.
"""
extended = list(self)
extended.extend(values)
# NOTE: Type hints indicate that random.shuffle expects a list, not
# a deque. Seems to work just fine though.
random.shuffle(extended) # type: ignore
assert size <= len(
extended
), f"Asked to sample {size} values, while there are only {len(extended)} in the batch + buffer!"
self.extend(extended)
return extended[:size]
def _sample(self, size: int) -> List[T]:
assert size <= len(
self
), f"Asked to sample {size} values while there are only {len(self)} in the buffer!"
return random.sample(self, size)
@property
def full(self) -> bool:
return len(self) == self.capacity
class UnlabeledReplayBuffer(ReplayBuffer[Tensor]):
def sample_batch(self, size: int) -> Tensor:
batch = super()._sample(size)
return torch.stack(batch)
def push(self, x_batch: Tensor, y_batch: Tensor = None) -> None:
super().extend(x_batch)
def push_and_sample(self, x_batch: Tensor, y_batch: Tensor = None, size: int = None) -> Tensor:
size = x_batch.shape[0] if size is None else size
return torch.stack(super()._push_and_sample(x_batch, size=size))
class LabeledReplayBuffer(ReplayBuffer[Tuple[Tensor, Tensor]]):
def sample(self, size: int) -> Tuple[Tensor, Tensor]:
list_of_pairs = super()._sample(size)
data_list, target_list = zip(*list_of_pairs)
return torch.stack(data_list), torch.stack(target_list)
def push(self, x_batch: Tensor, y_batch: Tensor) -> None:
super().extend(zip(x_batch, y_batch))
def push_and_sample(
self, x_batch: Tensor, y_batch: Tensor, size: int = None
) -> Tuple[Tensor, Tensor]:
size = x_batch.shape[0] if size is None else size
list_of_pairs = super()._push_and_sample(*zip(x_batch, y_batch), size=size)
data_list, target_list = zip(*list_of_pairs)
return torch.stack(data_list), torch.stack(target_list)
def samples_per_class(self) -> Dict[int, int]:
"""Returns a Counter showing how many samples there are per class."""
# TODO: Idea, could use the None key for unlabeled replay buffer.
return Counter(int(y) for x, y in self)
class SemiSupervisedReplayBuffer(object):
def __init__(self, labeled_capacity: int, unlabeled_capacity: int = 0):
"""Semi-Supervised (ish) version of a replay buffer.
With the default parameters, acts just like a regular replay buffer.
When passed `unlabeled_capacity`, allows for storing unlabeled samples
as well as labeled samples. Unlabeled samples are stored in a different
buffer than labeled samples.
Allows sampling both labeled and unlabeled samples.
Args:
labeled_capacity (int): [description]
unlabeled_capacity (int, optional): [description]. Defaults to 0.
"""
super().__init__()
self.labeled_capacity = labeled_capacity
self.unlabeled_capacity = unlabeled_capacity
self.labeled = LabeledReplayBuffer(labeled_capacity)
self.unlabeled = UnlabeledReplayBuffer(unlabeled_capacity)
def sample(self, size: int) -> Tuple[Tensor, Tensor]:
"""Takes `size` (labeled) samples from the buffer.
Args:
size (int): Number of samples to return.
Returns:
Tuple[Tensor, Tensor]: batched data and label tensors.
"""
assert size <= len(self.labeled), (
f"Asked to sample {size} values while there are only "
f"{len(self.labeled)} labeled samples in the buffer! "
)
return self.labeled.sample(size)
def sample_unlabeled(self, size: int, take_from_labeled_buffer_first: bool = None) -> Tensor:
"""Samples `size` unlabeled samples.
Can also use samples from the labeled replay buffer (while discarding
the labels) if there is no unlabeled replay buffer.
Args:
size (int): Number of x's to sample
take_from_labeled_buffer_first (bool, optional):
When `None` (default), doesn't take any samples from the labeled
buffer.
When `True`, prioritizes taking samples from the labeled replay
buffer.
When `False`, prioritizes taking samples from the unlabeled replay
buffer, but take the remaining samples from the labeled buffer.
Returns:
Tensor: A batch of X's.
"""
total = len(self.unlabeled)
if take_from_labeled_buffer_first is not None:
total += len(self.labeled)
assert size <= total, (
f"Asked to sample {size} values while there are only "
f"{total} unlabeled samples in total in the buffer! "
)
# Number of x's we still have to sample.
samples_left = size
tensors: List[Tensor] = []
if take_from_labeled_buffer_first:
# Take labeled samples and drop the label.
n_samples_from_labeled = min(len(self.labeled), samples_left)
if n_samples_from_labeled > 0:
data, _ = self.labeled.sample(size)
samples_left -= data.shape[0]
tensors.append(data)
# Take the rest of the samples from the unlabeled buffer.
n_samples_from_labeled = min(len(self.labeled), samples_left)
data = self.unlabeled.sample_batch(samples_left)
tensors.append(data)
samples_left -= data.shape[0]
if take_from_labeled_buffer_first is False:
# Take the rest of the labeled samples and drop the label.
n_samples_from_labeled = min(len(self.labeled), samples_left)
if n_samples_from_labeled > 0:
data, _ = self.labeled.sample(size)
samples_left -= data.shape[0]
tensors.append(data)
data = torch.cat(tensors)
return data
def push_and_sample(self, x: Tensor, y: Tensor, size: int = None) -> Tuple[Tensor, Tensor]:
size = x.shape[0] if size is None else size
self.unlabeled.push(x)
return self.labeled.push_and_sample(x, y, size=size)
def push_and_sample_unlabeled(self, x: Tensor, y: Tensor = None, size: int = None) -> Tensor:
size = x.shape[0] if size is None else size
if y is not None:
self.labeled.push(x, y)
return self.unlabeled.push_and_sample(x, size=size)
def clear(self):
self.labeled.clear()
self.unlabeled.clear()
@dataclass
class ReplayOptions(Serializable):
"""Options related to Replay."""
# Size of the labeled replay buffer.
labeled_buffer_size: int = field(0, alias="replay_buffer_size")
# Size of the unlabeled replay buffer.
unlabeled_buffer_size: int = 0
# Always use the replay buffer to help "smooth" out the data stream.
always_use_replay: bool = False
# Sampling size, when used as described above to smooth out the data stream.
# If not given, will use the same value as the batch size.
sampled_batch_size: Optional[int] = None
@property
def enabled(self) -> bool:
return self.labeled_buffer_size > 0 or self.unlabeled_buffer_size > 0
================================================
FILE: sequoia/common/spaces/__init__.py
================================================
""" Custom `gym.spaces.Space` subclasses used by Sequoia. """
from .image import Image, ImageTensorSpace
from .named_tuple import NamedTuple, NamedTupleSpace
from .space import Space
from .sparse import Sparse
from .tensor_spaces import TensorBox, TensorDiscrete, TensorMultiDiscrete, TensorSpace
from .typed_dict import TypedDictSpace
================================================
FILE: sequoia/common/spaces/image.py
================================================
""" IDEA: Create a subclass of spaces.Box for images.
"""
from typing import Optional, Tuple, Union
import numpy as np
import torch
from gym import spaces
from gym.vector.utils import batch_space
from .space import Space, T
from .tensor_spaces import TensorBox
def could_become_image(space: spaces.Space) -> bool:
if not isinstance(space, spaces.Box):
return False
shape = space.shape
return len(shape) == 3 and (
shape[0] == shape[1] and shape[2] in {1, 3} or shape[1] == shape[2] and shape[0] in {1, 3}
)
class Image(spaces.Box, Space[T]):
"""Subclass of `gym.spaces.Box` for images.
Comes with a few useful attributes, like `h`, `w`, `c`, `channels_first`,
`channels_last`, etc.
"""
def __init__(
self,
low: Union[float, np.ndarray],
high: Union[float, np.ndarray],
shape: Tuple[int, ...] = None,
dtype: np.dtype = None,
**kwargs,
):
if dtype is None:
if isinstance(low, int) and isinstance(high, int) and low == 0 and high == 255:
dtype = np.uint8
else:
dtype = np.float32
super().__init__(low=low, high=high, shape=shape, dtype=dtype, **kwargs)
self.channels_first: bool = False
# Optional batch dimension
self.b: Optional[int] = None
self.h: int
self.w: int
self.c: int
assert len(self.shape) in {3, 4}, "Need three or four dimensions."
if len(self.shape) == 3:
self.b = None
if self.shape[0] in {1, 3}:
self.c, self.h, self.w = self.shape
self.channels_first = True
elif self.shape[-1] in {1, 3}:
self.h, self.w, self.c = self.shape
else:
# NOTE: will assume that in channels_first for now, but won't set
# `channels_first` property.
self.c, self.h, self.w = self.shape
elif len(self.shape) == 4:
if self.shape[1] in {1, 3}:
self.b, self.c, self.h, self.w = self.shape
self.channels_first = True
elif self.shape[-1] in {1, 3}:
self.b, self.h, self.w, self.c = self.shape
else:
# NOTE: will assume that in channels_first for now:
self.b, self.c, self.h, self.w = self.shape
if any(v is None for v in [self.h, self.w, self.c]):
raise RuntimeError(
f"Shouldn't be using an Image space, since the shape "
f"doesn't appear to be an image: {self.shape}"
)
@property
def channels(self) -> int:
return self.c
@property
def height(self) -> int:
return self.h
@property
def width(self) -> int:
return self.w
@property
def batch_size(self) -> Optional[int]:
return self.b
@classmethod
def from_box(cls, box_space: spaces.Box):
return cls(box_space.low, box_space.high, dtype=box_space.dtype)
@classmethod
def wrap(cls, space: Union["Image", spaces.Box]):
if isinstance(space, Image):
return space
if isinstance(space, spaces.Box):
return cls.from_box(space)
raise NotImplementedError(space)
@property
def channels_last(self) -> bool:
return not self.channels_first
def __repr__(self):
return f"{type(self).__name__}({self.low.min()}, {self.high.max()}, {self.shape}, {self.dtype})"
def sample(self) -> T:
return super().sample()
class ImageTensorSpace(Image, TensorBox):
@classmethod
def from_box(cls, box_space: TensorBox, device: torch.device = None):
device = device or box_space.device
return cls(box_space.low, box_space.high, dtype=box_space.dtype, device=device)
def __repr__(self):
return f"{type(self).__name__}({self.low.min()}, {self.high.max()}, {self.shape}, {self.dtype}, device={self.device})"
def sample(self):
self.dtype = self._numpy_dtype
s = super().sample()
self.dtype = self._torch_dtype
return torch.as_tensor(s, dtype=self._torch_dtype, device=self.device)
# @to_tensor.register
# def _(space: Image,
# sample: Union[np.ndarray, Tensor],
# device: torch.device = None) -> Union[Tensor]:
# """ Converts a sample from the given space into a Tensor. """
# return torch.as_tensor(sample, device=device)
@batch_space.register
def _batch_image_space(space: Image, n: int = 1) -> Union[Image, spaces.Box]:
if space.b is not None:
# This might happen in BatchedVectorEnv, when creating env_a and env_b,
# which have an extra batch/chunk dimension.
if space.b == 1:
if n == 1:
return space
repeats = [n, 1, 1, 1]
else:
# instead maybe we should just fall back to a Box Space?
repeats = [n] + [1] * space.low.ndim
low, high = np.tile(space.low, repeats), np.tile(space.high, repeats)
return spaces.Box(low=low, high=high, dtype=space.dtype)
raise RuntimeError(f"can't batch an already batched image space {space}, n={n}")
else:
repeats = [n, 1, 1, 1]
low, high = np.tile(space.low, repeats), np.tile(space.high, repeats)
img = type(space)(low=low, high=high, dtype=space.dtype)
return img
================================================
FILE: sequoia/common/spaces/named_tuple.py
================================================
""" IDEA: Subclass of `gym.spaces.Tuple` that yields namedtuples,
as a bit of a hybrid between `gym.spaces.Dict` and `gym.spaces.Tuple`.
"""
from collections import namedtuple
from collections.abc import Mapping as MappingABC
from typing import Any, Dict, Iterable, List, Mapping, Sequence, Tuple, Type, Union
import numpy as np
from gym import Space, spaces
from sequoia.utils.generic_functions._namedtuple import NamedTuple
class NamedTupleSpace(spaces.Tuple):
"""
A tuple (i.e., product) of simpler (named) spaces. Samples are namedtuples.
Example usage:
```python
self.observation_space = NamedTupleSpace(x=spaces.Discrete(2), t=spaces.Discrete(3))
```
Note: here the dtype is actually the type of namedtuple to use, not a
numpy dtype.
"""
def __init__(
self,
spaces: Union[Mapping[str, Space], Sequence[Space]] = None,
names: Sequence[str] = None,
dtype: Type[NamedTuple] = None,
**kwargs,
):
self._spaces: Dict[str, Space] = {}
if isinstance(spaces, MappingABC):
assert names is None
self._spaces = dict(spaces.items())
elif kwargs:
assert all(isinstance(k, str) and isinstance(v, Space) for k, v in kwargs.items())
self._spaces = kwargs
else:
# if not names:
# try:
# names = [getattr(space, "__name") for space in spaces]
# except AttributeError:
# pass
assert names is not None, "need to pass names when spaces isn't a mapping."
assert spaces and len(names) == len(spaces), "need to pass a name for each space"
self._spaces = dict(zip(names, spaces))
# NOTE: dict.values() is ordered since python 3.7.
spaces = tuple(self._spaces.values())
super().__init__(spaces)
self.names: Sequence[str] = tuple(self._spaces.keys())
self.dtype: Type[Tuple] = dtype or namedtuple("NamedTuple", self.names)
# idea: could use this _name attribute to change the __repr__ first part
self._name = self.dtype.__name__
assert all(name == key for name, key in zip(self.names, self._spaces.keys()))
def __getitem__(self, index: Union[int, str]) -> Space:
if isinstance(index, str):
return self._spaces[index]
return super().__getitem__(index)
def __getattr__(self, attr: str) -> Space:
if attr == "_spaces":
raise AttributeError(attr)
if attr in self._spaces:
return self._spaces[attr]
raise AttributeError(attr)
def __repr__(self):
# TODO: Tricky: decide what name to show for the space class:
cls_name = type(self).__name__
# cls_name = self._name or type(self).__name__
return (
f"{cls_name}("
+ ", ".join([str(k) + "=" + str(s) for k, s in self._spaces.items()])
+ ")"
)
def _replace(self, **kwargs):
"""replaces the given subspaces with newer ones, maintaining the
current ordering.
"""
spaces = self._spaces.copy()
assert all(k in spaces for k in kwargs), "no new keys allowed"
spaces.update(kwargs)
return type(self)(**spaces)
def __eq__(self, other: Union["NamedTupleSpace", Any]) -> bool:
return isinstance(other, spaces.Tuple) and tuple(self.spaces) == tuple(other.spaces)
def sample(self):
return self.dtype(*super().sample())
def contains(self, x) -> bool:
if isinstance(x, MappingABC):
# TODO: If a namedtuple/dataclass has more items than those required
# by this space, should we consider it valid if all its items are
# contained in their respective spaces in `self`?
x = tuple(x[k] for k in self.names)
# x = tuple(x.values())
return super().contains(x)
def keys(self) -> List[str]:
return self._spaces.keys()
def values(self) -> List[Space]:
return self._spaces.values()
def items(self) -> Iterable[Tuple[str, Space]]:
yield from self._spaces.items()
# See https://github.com/openai/gym/issues/2140 : Fix __eq__ of gym.spaces.Tuple
def __eq__(self, other: Union["NamedTupleSpace", Any]) -> bool:
# BUG in openai gym: spaces passed to the spaces.Tuple constructor could
# be a list of spaces, rather than a tuple, and so this might return
# False when it shouldn't.
return isinstance(other, spaces.Tuple) and tuple(self.spaces) == tuple(other.spaces)
spaces.Tuple.__eq__ = __eq__
from gym.spaces.utils import flatten
from gym.vector.utils import batch_space
@batch_space.register(NamedTupleSpace)
def batch_namedtuple_space(space: NamedTupleSpace, n: int = 1):
return NamedTupleSpace(
**{key: batch_space(space[key], n) for key in space.names}, dtype=space.dtype
)
@flatten.register
def flatten_namedtuple_space_sample(space: NamedTupleSpace, x: NamedTuple):
assert not isinstance(x, Batch), f"NamedTupleSpace, shouldn't have Batch samples: {space} {x}"
return np.concatenate([flatten(s, x_part) for x_part, s in zip(x, space.spaces)])
================================================
FILE: sequoia/common/spaces/named_tuple_test.py
================================================
import numpy as np
import pytest
from gym import spaces
from gym.spaces import Box, Discrete
from gym.vector.utils import batch_space
from .named_tuple import NamedTuple, NamedTupleSpace
pytestmark = pytest.mark.skip(
reason="Removing the NamedTuple space and NamedTuple class in favour of TypedDict.",
)
def test_basic():
named_tuple_space = NamedTupleSpace(
current_state=Box(0, 1, (2, 2)),
action=Discrete(2),
next_state=Box(0, 1, (2, 2)),
)
v = named_tuple_space.sample()
print(v)
assert v in named_tuple_space
# TODO: Maybe re-use all the tests for gym.spaces.Tuple in the gym repo
# somehow?
normal_tuple_space = spaces.Tuple(
[
Box(0, 1, (2, 2)),
Discrete(2),
Box(0, 1, (2, 2)),
]
)
assert normal_tuple_space.sample() in named_tuple_space
assert named_tuple_space.sample() in normal_tuple_space
class StateTransition(NamedTuple):
current_state: np.ndarray
action: int
next_state: np.ndarray
def test_basic_with_dtype():
named_tuple_space = NamedTupleSpace(
current_state=Box(0, 1, (2, 2)),
action=Discrete(2),
next_state=Box(0, 1, (2, 2)),
dtype=StateTransition,
)
v = named_tuple_space.sample()
assert v in named_tuple_space
assert isinstance(v, StateTransition)
normal_tuple_space = spaces.Tuple(
[
Box(0, 1, (2, 2)),
Discrete(2),
Box(0, 1, (2, 2)),
]
)
assert normal_tuple_space.sample() in named_tuple_space
assert named_tuple_space.sample() in normal_tuple_space
@pytest.mark.xfail()
def test_isinstance_namedtuple():
named_tuple_space = NamedTupleSpace(
current_state=Box(0, 1, (2, 2)),
action=Discrete(2),
next_state=Box(0, 1, (2, 2)),
dtype=StateTransition,
)
assert isinstance(named_tuple_space, NamedTupleSpace)
assert isinstance(named_tuple_space.sample(), NamedTuple)
def test_equals_tuple_space_with_same_items():
"""Test that a NamedTupleSpace is considered equal to a Tuple space if
the spaces are in the same order and all equal (regardless of the names).
"""
named_tuple_space = NamedTupleSpace(
current_state=Box(0, 1, (2, 2)),
action=Discrete(2),
next_state=Box(0, 1, (2, 2)),
dtype=StateTransition,
)
tuple_space = spaces.Tuple(
[
Box(0, 1, (2, 2)),
Discrete(2),
Box(0, 1, (2, 2)),
]
)
assert named_tuple_space == tuple_space
assert tuple_space == named_tuple_space
def test_batch_objets_considered_valid_samples():
from dataclasses import dataclass
import numpy as np
from sequoia.common.batch import Batch
@dataclass(frozen=True)
class StateTransitionDataclass(Batch):
current_state: np.ndarray
action: int
next_state: np.ndarray
named_tuple_space = NamedTupleSpace(
current_state=Box(0, 1, (2, 2)),
action=Discrete(2),
next_state=Box(0, 1, (2, 2)),
dtype=StateTransitionDataclass,
)
obs = StateTransitionDataclass(
current_state=np.ones([2, 2]) / 2,
action=1,
next_state=np.zeros([2, 2]),
)
assert obs in named_tuple_space
assert named_tuple_space.sample() in named_tuple_space
assert isinstance(named_tuple_space.sample(), StateTransitionDataclass)
def test_batch_space():
named_tuple_space = NamedTupleSpace(
current_state=Box(0, 1, (2, 2)),
action=Discrete(2),
next_state=Box(0, 1, (2, 2)),
dtype=StateTransition,
)
assert batch_space(named_tuple_space, n=5) == NamedTupleSpace(
current_state=Box(0, 1, (5, 2, 2)),
action=spaces.MultiDiscrete([2, 2, 2, 2, 2]),
next_state=Box(0, 1, (5, 2, 2)),
dtype=StateTransition,
)
## IDEA: Creating a space like this, using the same syntax as with NamedTuple
# class StateTransitionSpace(NamedTupleSpace):
# current_state: Box = Box(0, 1, (2,2))
# action: Discrete = Discrete(2)
# current_state: Box = Box(0, 1, (2,2))
# space = StateTransitionSpace()
# space.sample()
================================================
FILE: sequoia/common/spaces/space.py
================================================
""" Small typing improvements to the `gym.spaces.Space` class. """
from typing import Any, Generic, TypeVar, Union
from gym.spaces import Space as _Space
T = TypeVar("T")
class Space(_Space, Generic[T]):
def sample(self) -> T:
return super().sample()
def __contains__(self, x: Union[T, Any]) -> bool:
return super().__contains__(x)
def contains(self, v: Union[T, Any]) -> bool:
return super().contains(v)
================================================
FILE: sequoia/common/spaces/sparse.py
================================================
""" 'wrapper' around a gym.Space that adds has a probability of sampling `None`
instead of a sample from the 'base' space.
As a result, `None` is always a valid sample from any Sparse space.
"""
import multiprocessing as mp
from ctypes import c_bool
# from gym.spaces.utils import flatdim, flatten
from functools import singledispatch
from multiprocessing.context import BaseContext
from typing import Any, Dict, Optional, Sequence, Tuple, Union
import gym
import gym.spaces.utils
import gym.vector.utils.numpy_utils
import gym.vector.utils.shared_memory
import numpy as np
import torch
from gym import spaces
from gym.vector.utils import batch_space, concatenate
from gym.vector.utils.numpy_utils import concatenate
from torch import Tensor
from .space import Space, T
class Sparse(Space[Optional[T]]):
"""Space which returns a value of `None` `sparsity`% of the time when sampled.
`None` is also a valid sample of this space in addition to those of the wrapped space.
TODO: Maybe refactor this into a mixin class, a bit like `TensorSpace`? If so,
then make sure that we don't suddenly need to create SparseTensorBox and the like.
"""
def __init__(self, base: Space[T], sparsity: float = 0.0):
self.base = base
assert 0 <= sparsity <= 1, "invalid spasity, needs to be in [0, 1]"
self._sparsity = sparsity
# Would it ever cause a problem to have different dtypes for different
# instances of the same space?
# dtype = self.base.dtype if sparsity == 0. else np.object_
super().__init__(shape=self.base.shape, dtype=np.object_)
@property
def sparsity(self) -> float:
return self._sparsity
# def __getattr__(self, attr: str):
# return getattr(self.base, attr)
def seed(self, seed=None):
super().seed(seed)
return self.base.seed(seed=seed)
def sample(self) -> Optional[T]:
if self.sparsity == 0:
return self.base.sample()
if self.sparsity == 1.0:
return None
p = self.np_random.random()
if p <= self.sparsity:
return None
else:
return self.base.sample()
def contains(self, x: Union[Optional[T], Any]) -> bool:
"""
Return boolean specifying if x is a valid
member of this space
"""
return x is None or self.base.contains(x)
def __repr__(self):
return f"Sparse({self.base}, sparsity={self.sparsity})"
def __eq__(self, other: Any):
if not isinstance(other, Sparse):
return NotImplemented
return other.base == self.base and other.sparsity == self.sparsity
def to_jsonable(self, sample_n):
assert False, "TODO: This isn't really ever used anywhere, even in Gym, is it?"
super().to_jsonable
# serialize as dict-repr of vectors
return {
key: space.to_jsonable([sample[key] for sample in sample_n])
for key, space in self.spaces.items()
}
def from_jsonable(self, sample_n):
assert False, "TODO: This isn't really ever used anywhere, even in Gym, is it?"
dict_of_list = {}
for key, space in self.spaces.items():
dict_of_list[key] = space.from_jsonable(sample_n[key])
ret = []
for i, _ in enumerate(dict_of_list[key]):
entry = {}
for key, value in dict_of_list.items():
entry[key] = value[i]
ret.append(entry)
return ret
# Customize how these functions handle `Sparse` spaces by making them
# singledispatch callables and registering a new callable.
def _is_singledispatch(module_function):
return hasattr(module_function, "registry")
def register_sparse_variant(module, module_fn_name: str):
"""Converts a function from the given module to a singledispatch callable,
and registers the wrapped function as the callable to use for Sparse spaces.
The module function must have the space as the first argument for this to
work.
"""
module_function = getattr(module, module_fn_name)
# Convert the function to a singledispatch callable.
if not _is_singledispatch(module_function):
module_function = singledispatch(module_function)
setattr(module, module_fn_name, module_function)
# Register the function as the callable to use when the first arg is a
# Sparse object.
def wrapper(function):
module_function.register(Sparse, function)
return function
return wrapper
@register_sparse_variant(gym.spaces.utils, "flatdim")
def flatdim_sparse(space: Sparse) -> int:
return gym.spaces.utils.flatdim(space.base)
@register_sparse_variant(gym.spaces.utils, "flatten")
def flatten_sparse(space: Sparse[T], x: Optional[T]) -> Optional[np.ndarray]:
return np.array([None]) if x is None else gym.spaces.utils.flatten(space.base, x)
@register_sparse_variant(gym.spaces.utils, "flatten_space")
def flatten_sparse_space(space: Sparse[T]) -> Optional[np.ndarray]:
space = gym.spaces.utils.flatten_space(space.base)
space.dtype = np.object_
return space
@register_sparse_variant(gym.spaces.utils, "unflatten")
def unflatten_sparse(space: Sparse[T], x: np.ndarray) -> Optional[T]:
if len(x) == 1 and x[0] is None:
return None
else:
return gym.spaces.utils.unflatten(space.base, x)
@register_sparse_variant(gym.vector.utils, "create_empty_array")
def create_empty_array_sparse(space: Sparse, n=1, fn=np.zeros) -> np.ndarray:
return fn([n], dtype=np.object_)
@register_sparse_variant(gym.vector.utils.shared_memory, "create_shared_memory")
def create_shared_memory_for_sparse_space(space: Sparse, n: int = 1, ctx: BaseContext = mp):
# The shared memory should be something that can accomodate either 'None'
# or a sample from the space. Therefore we should probably just create the
# array for the base space, but then how would store a 'None' value in that
# space?
# What if we return a tuple or something, in which we actually add an 'is-none'
print(f"Creating shared memory for {n} entries from space {space}")
return {
"is_none": ctx.Array(c_bool, np.zeros(n, dtype=np.bool)),
"value": gym.vector.utils.shared_memory.create_shared_memory(space.base, n, ctx),
}
@register_sparse_variant(gym.vector.utils.shared_memory, "write_to_shared_memory")
def write_to_shared_memory(
index: int,
value: Optional[T],
shared_memory: Union[Dict, Tuple, BaseContext.Array],
space: Union[Sparse[T], gym.Space],
):
print(f"Writing entry from space {space} at index {index} in shared memory")
if isinstance(space, Sparse):
assert isinstance(shared_memory, dict)
is_none_array = shared_memory["is_none"]
value_array = shared_memory["value"]
raise NotImplementedError(f"Still debugging this")
# assert False, index
# assert False, is_none_array
is_none_array[index] = value is None
if value is not None:
return write_to_shared_memory(index, value, value_array, space.base)
else:
# TODO: Would this cause a problem, say in the case where we have a
# regular space like Tuple that contains some Sparse spaces, then would
# calling this "old" function here prevent this "new" function from
# being used on the children?
return gym.vector.utils.shared_memory(index, value, shared_memory, space)
from gym.vector.utils.shared_memory import read_from_shared_memory as read_from_shared_memory_
@register_sparse_variant(gym.vector.utils.shared_memory, "read_from_shared_memory")
def read_from_shared_memory(
shared_memory: Union[Dict, Tuple, BaseContext.Array], space: Sparse, n: int = 1
):
print(f"Reading {n} entries from space {space} from shared memory")
if isinstance(space, Sparse):
assert isinstance(shared_memory, dict)
is_none_array = list(shared_memory["is_none"])
value_array = shared_memory["value"]
assert len(is_none_array) == len(value_array) == n
# This might include some garbage (or default) values, which weren't
# set.
read_values = read_from_shared_memory(value_array, space.base, n)
print(f"Read values from space: {read_values}")
print(f"is_none array: {list(is_none_array)}")
# assert False, (list(is_none_array), read_values, space)
values = [None if is_none_array[index] else read_values[index] for index in range(n)]
print(f"resulting values: {values}")
return values
return read_from_shared_memory_(shared_memory, space.base, n)
return read_from_shared_memory_(shared_memory, space, n)
@register_sparse_variant(gym.vector.utils, "batch_space")
def batch_sparse_space(space: Sparse, n: int = 1) -> gym.Space:
"""Batch this sparse space.
NOTE: The sparsity of `space` currently has an important impact on the kind of space returned!
Taking a base space of type `Discrete` as an example:
- If `space.sparsity == 0 or space.sparsity == 1`, then the result is a Sparse[MultiDiscrete],
- *However*, if `0 < sparsity < 1`, then the result is a `Tuple[Sparse[Discrete], ...]`.
"""
# NOTE: This means we do something different depending on the sparsity.
# Could that become an issue?
# assert _is_singledispatch(batch_space)
sparsity = space.sparsity
# NOTE: It is tempting to just make this more consistent by always returning the same kind of
# result, because it's nice to avoid dealing with arrays like `np.array([None, 1, ])`
# or, even worse, `np.array([None, None])` which are not fun.
# *HOWEVER*, it's not a good idea! As an example, when using VectorEnvs, the spaces are just to
# represent what the observations of the VectorEnv will look like. Since each env has 'its own'
# Sparse[Discrete] space, and they are "sampled" independantly, then if 0 < sparsity < 1 we WILL
# have some entries be None and other not. Therefore, it's better in that case to just return
# the tuple of sparse spaces.
# return Sparse(batch_space(space.base, n), sparsity=sparsity)
# TODO: Use something like this eventually. There are still problem with to_tensor.
# return SparseMultiDiscrete(
# np.full((n,), space.n, dtype=space.base.dtype), sparsity=space.sparsity
# )
if sparsity in {0, 1}:
# If the space has 0 sparsity, then batch it just like you would its
# base space.
# TODO: This is convenient, but not very consistent, as the length of
# the batches changes depending on the sparsity of the space..
return Sparse(batch_space(space.base, n), sparsity=sparsity)
# Sticking to the default behaviour from gym for now, which is to just
# return a tuple of length n with n copies of the space.
return spaces.Tuple(tuple(space for _ in range(n)))
# We could also do this, where we make the sub-spaces sparse:
# batch_space(Sparse>) -> Tuple), batch_space(Sparse)>
if isinstance(space.base, spaces.Tuple):
return spaces.Tuple(
[
spaces.Tuple([Sparse(sub_space, sparsity) for _ in range(n)])
for sub_space in space.base.spaces
]
)
if isinstance(space.base, spaces.Dict):
return spaces.Dict(
{
name: Sparse(batch_space(sub_space, n), sparsity)
for name, sub_space in space.base.spaces.items()
}
)
return batch_space(space.base, n)
@register_sparse_variant(gym.vector.utils.numpy_utils, "concatenate")
def concatenate_sparse_items(
space: Sparse, items: Sequence[Optional[T]], out: Union[tuple, dict, np.ndarray]
) -> Optional[Sequence[T]]:
if space.sparsity == 0:
if not all(item is not None for item in items):
raise ValueError("Space has sparsity of 0, there shouldn't be any `None` items!")
# Assume that the items are samples of the individual spaces.
# In most cases this means they shouldn't be None, but there's the special case where the
# individual spaces are also Sparse, and then it's fine for them to be None.
return concatenate(space.base, items=items, out=out)
if space.sparsity == 1:
if not all(item is None for item in items):
raise ValueError("Space has sparsity of 1, all items should be None!")
# Assume that the items are samples of the individual spaces.
# In most cases this means they shouldn't be None, but there's the special case where the
# individual spaces are also Sparse, and then it's fine for them to be None.
return None
return tuple(items)
# NOTE: Avoiding returning this np.array of type `object`, simply because `np.array([None])` is
# not fun to have to deal with.
# return np.array([None if v == None else v for v in items], dtype=object)
return np.array(items)
# for i, item in enumerate(items):
# out[i] = items
# return out
from sequoia.utils.generic_functions.to_from_tensor import to_tensor
@to_tensor.register(Sparse)
def sparse_sample_to_tensor(
space: Sparse, sample: Union[Optional[Any], np.ndarray], device: torch.device = None
) -> Optional[Union[Tensor, np.ndarray]]:
if space.sparsity == 1.0:
if isinstance(space.base, spaces.MultiDiscrete):
assert all(v == None for v in sample)
return np.array([None if v == None else v for v in sample])
if sample is not None:
assert isinstance(sample, np.ndarray) and sample.dtype == np.object
assert not sample.shape
return None
if space.sparsity == 0.0:
# Do we need to convert dtypes here though?
return to_tensor(space.base, sample, device)
# 0 < sparsity < 1
if isinstance(sample, np.ndarray) and sample.dtype == np.object:
return np.array([None if v == None else v for v in sample])
assert False, (space, sample)
================================================
FILE: sequoia/common/spaces/sparse_test.py
================================================
from typing import Iterable
import gym
import numpy as np
import pytest
from gym import spaces
from .sparse import Sparse
base_spaces = [
spaces.Discrete(n=10),
spaces.Box(0, 1, [3, 32, 32], dtype=np.float32),
spaces.Tuple(
[
spaces.Discrete(n=10),
spaces.Box(0, 1, [3, 32, 32], dtype=np.float32),
]
),
spaces.Dict(
{
"x": spaces.Tuple(
[
spaces.Discrete(n=10),
spaces.Box(0, 1, [3, 32, 32], dtype=np.float32),
]
),
"t": spaces.Discrete(1),
}
),
]
def equals(value, expected) -> bool:
assert type(value) == type(expected)
if isinstance(value, (int, float, bool)):
return value == expected
if isinstance(value, np.ndarray):
return value.tolist() == expected.tolist()
if isinstance(value, (tuple, list)):
assert len(value) == len(expected)
return all(equals(a_v, e_v) for a_v, e_v in zip(value, expected))
if isinstance(value, dict):
assert len(value) == len(expected)
for k in expected.keys():
if k not in value:
return False
if not equals(value[k], expected[k]):
return False
return True
return value == expected
def is_sparse(iterable: Iterable[bool]) -> bool:
"""Returns wether some (but not all) values in the iterable are None."""
none_values: int = 0
non_none_values: int = 0
for value in iterable:
if value is None:
none_values += 1
if non_none_values:
return True
else:
non_none_values += 1
if none_values:
return True
return False
# Equivalent, but with a copy:
values = list(values)
return any(v is None for v in values) and not all(v is None for v in values)
@pytest.mark.parametrize("base_space", base_spaces)
def test_sample(base_space: gym.Space):
space = Sparse(base_space, sparsity=0.0)
samples = [space.sample() for i in range(100)]
assert all(sample is not None for sample in samples)
assert all(sample in base_space for sample in samples)
space = Sparse(base_space, sparsity=0.5)
samples = [space.sample() for i in range(100)]
assert is_sparse(samples)
assert all([sample in base_space for sample in samples if sample is not None])
space = Sparse(base_space, sparsity=1.0)
samples = [space.sample() for i in range(100)]
assert all(sample is None for sample in samples)
@pytest.mark.parametrize("sparsity", [0.0, 0.5, 1.0])
@pytest.mark.parametrize("base_space", base_spaces)
def test_contains(base_space: gym.Space, sparsity: float):
space = Sparse(base_space, sparsity=sparsity)
samples = [space.sample() for i in range(100)]
assert all(sample in space for sample in samples)
from gym.vector.utils import batch_space
@pytest.mark.parametrize("base_space", base_spaces)
def test_batching_works(base_space: gym.Space, n: int = 3):
batched_base_space = batch_space(base_space, n)
sparse_space = Sparse(base_space)
batched_sparse_space = batch_space(sparse_space, n)
base_batch = batched_base_space.sample()
sparse_batch = batched_sparse_space.sample()
assert len(base_batch) == len(sparse_batch)
# @pytest.mark.xfail(reason="TODO: Need to decide how we want the sparsity to "
# "affect the batching of Tuple or Dict spaces.")
@pytest.mark.parametrize("base_space", base_spaces)
@pytest.mark.parametrize("sparsity", [0.0, 0.5, 1.0])
def test_batching_works(base_space: gym.Space, sparsity: float, n: int = 10):
batched_base_space = batch_space(base_space, n)
sparse_space = Sparse(base_space, sparsity=sparsity)
batched_sparse_space = batch_space(sparse_space, n)
batched_base_space.seed(123)
base_batch = batched_base_space.sample()
batched_sparse_space.seed(123)
sparse_batch = batched_sparse_space.sample()
if sparsity == 0:
# When there is no sparsity, the batching is the same as batching the
# same space.
assert equals(base_batch, sparse_batch)
elif sparsity == 1:
assert sparse_batch is None
# assert len(sparse_batch) == n
# assert sparse_batch == tuple([None] * n)
else:
assert len(sparse_batch) == n
assert isinstance(sparse_batch, tuple)
for i, value in enumerate(sparse_batch):
if value is not None:
assert value in base_space
# There should be some sparsity.
assert any(v is None for v in sparse_batch) and not all(
v is None for v in sparse_batch
), sparse_batch
from gym.spaces.utils import flatdim, flatten
@pytest.mark.xfail(
reason="When using the normal gym repo rather than the "
"fork, the change doesn't persist through an import."
)
def test_change_doesnt_persist_after_import():
"""When re-importing the `concatenate` function from `gym.vector.utils`,
the changes aren't preserved.
"""
assert hasattr(gym.vector.utils.numpy_utils.concatenate, "registry")
assert hasattr(gym.vector.utils.batch_space, "registry")
def test_change_persists_after_full_import():
"""When re-importing the `concatenate` function from
`gym.vector.utils.numpy_utils`, the changes are preserved.
"""
assert hasattr(gym.vector.utils.numpy_utils.concatenate, "registry")
assert hasattr(gym.vector.utils.batch_space, "registry")
@pytest.mark.parametrize("base_space", base_spaces)
def test_flatdim(base_space: gym.Space):
sparse_space = Sparse(base_space, sparsity=0.0)
base_flat_dims = flatdim(base_space)
sparse_flat_dims = flatdim(sparse_space)
assert base_flat_dims == sparse_flat_dims
@pytest.mark.parametrize("base_space", base_spaces)
def test_flatdim(base_space: gym.Space):
sparse_space = Sparse(base_space, sparsity=0.0)
base_flat_dims = flatdim(base_space)
sparse_flat_dims = flatdim(sparse_space)
assert base_flat_dims == sparse_flat_dims
# The flattened dimensions shouldn't depend on the sparsity.
sparse_space = Sparse(base_space, sparsity=1.0)
sparse_flat_dims = flatdim(sparse_space)
assert base_flat_dims == sparse_flat_dims
@pytest.mark.parametrize("base_space", base_spaces)
def test_seeding_works(base_space: gym.Space):
sparse_space = Sparse(base_space, sparsity=0.0)
base_space.seed(123)
base_sample = base_space.sample()
sparse_space.seed(123)
sparse_sample = sparse_space.sample()
assert equals(base_sample, sparse_sample)
@pytest.mark.parametrize("base_space", base_spaces)
def test_flatten(base_space: gym.Space):
sparse_space = Sparse(base_space, sparsity=0.0)
base_space.seed(123)
base_sample = base_space.sample()
flattened_base_sample = flatten(base_space, base_sample)
sparse_space.seed(123)
sparse_sample = sparse_space.sample()
flattened_sparse_sample = flatten(sparse_space, sparse_sample)
assert equals(flattened_base_sample, flattened_sparse_sample)
@pytest.mark.parametrize("base_space", base_spaces)
def test_equality(base_space: gym.Space):
sparse_space = Sparse(base_space, sparsity=0.0)
other_space = Sparse(base_space, sparsity=0.0)
assert sparse_space == other_space
sparse_space = Sparse(base_space, sparsity=0.2)
assert sparse_space != other_space
sparse_space = Sparse(spaces.Tuple([base_space, base_space]), sparsity=0.0)
assert sparse_space != other_space
================================================
FILE: sequoia/common/spaces/tensor_spaces.py
================================================
""" TODO: Maybe create a typed version of 'add_tensor_support' of gym_wrappers.convert_tensors
"""
from typing import Optional, Union
import gym
import numpy as np
import torch
from gym import spaces
from torch import Tensor
# Dict of NumPy dtype -> torch dtype (when the correspondence exists)
numpy_to_torch_dtypes = {
bool: torch.bool,
np.uint8: torch.uint8,
np.int8: torch.int8,
np.int16: torch.int16,
np.int32: torch.int32,
np.int64: torch.int64,
np.float16: torch.float16,
np.float32: torch.float32,
np.float64: torch.float64,
np.complex64: torch.complex64,
np.complex128: torch.complex128,
}
# Dict of torch dtype -> NumPy dtype
torch_to_numpy_dtypes = {value: key for (key, value) in numpy_to_torch_dtypes.items()}
def get_numpy_dtype_equivalent_to(torch_dtype: torch.dtype) -> np.dtype:
"""TODO: Gets the numpy dtype equivalent to the given torch dtype."""
def dtypes_equal(a: torch.dtype, b: torch.dtype) -> bool:
return a == b # simple for now.
matching_dtypes = [v for k, v in torch_to_numpy_dtypes.items() if dtypes_equal(k, torch_dtype)]
if len(matching_dtypes) == 0:
raise RuntimeError(f"Unable to find a numpy dtype equivalent to {torch_dtype}")
if len(matching_dtypes) > 1:
raise RuntimeError(f"Found more than one match for dtype {torch_dtype}: {matching_dtypes}")
return np.dtype(matching_dtypes[0])
def get_torch_dtype_equivalent_to(numpy_dtype: np.dtype) -> torch.dtype:
"""TODO: Gets the torch dtype equivalent to the given np dtype."""
def dtypes_equal(a: torch.dtype, b: torch.dtype) -> bool:
return a == b # simple for now.
matching_dtypes = [v for k, v in numpy_to_torch_dtypes.items() if dtypes_equal(k, numpy_dtype)]
if len(matching_dtypes) == 0:
raise RuntimeError(f"Unable to find a torch dtype equivalent to {numpy_dtype}")
if len(matching_dtypes) > 1:
raise RuntimeError(f"Found more than one match for dtype {numpy_dtype}: {matching_dtypes}")
return matching_dtypes[0]
from inspect import isclass
from typing import Any
def is_numpy_dtype(dtype: Any) -> bool:
return isinstance(dtype, np.dtype) or isclass(dtype) and issubclass(dtype, np.generic)
def is_torch_dtype(dtype: Any) -> bool:
return isinstance(dtype, torch.dtype)
from abc import ABC
def supports_tensors(space: gym.Space) -> bool:
raise NotImplementedError(f"TODO: Create a generic function for this.")
return isinstance(space, TensorSpace)
class TensorSpace(gym.Space, ABC):
"""Mixin class that makes a Space's `contains` and `sample` methods accept and
produce tensors, respectively.
"""
def __init__(self, *args, device: torch.device = None, **kwargs):
# super().__init__(*args, **kwargs)
self.device: Optional[torch.device] = torch.device(device) if device else None
# Depending on the value passed to `dtype`
dtype = kwargs.get("dtype")
if dtype is None:
if isinstance(self, (spaces.Discrete, spaces.MultiDiscrete)):
# NOTE: They dont actually give a 'dtype' argument for these.
self._numpy_dtype = np.dtype(np.int64)
self._torch_dtype = torch.int64
else:
raise NotImplementedError(f"Space {self} doesn't have a `dtype`?")
elif is_numpy_dtype(dtype):
self._numpy_dtype = np.dtype(dtype)
self._torch_dtype = get_torch_dtype_equivalent_to(dtype)
elif is_torch_dtype(dtype):
self._numpy_dtype = get_numpy_dtype_equivalent_to(dtype)
self._torch_dtype = dtype
elif str(dtype) == "float32":
self._numpy_dtype = np.dtype(np.float32)
self._torch_dtype = torch.float32
else:
assert not any(dtype == k for k in numpy_to_torch_dtypes)
assert not any(dtype == k for k in torch_to_numpy_dtypes)
raise NotImplementedError(f"Unsupported dtype {dtype} (of type {type(dtype)})")
if "dtype" in kwargs:
kwargs["dtype"] = self._numpy_dtype
super().__init__(*args, **kwargs)
self.dtype: torch.dtype = self._torch_dtype
class TensorBox(TensorSpace, spaces.Box):
"""Box space that accepts both Tensor and ndarrays."""
def __init__(self, low, high, shape=None, dtype=np.float32, device: torch.device = None):
super().__init__(low, high, shape=shape, dtype=dtype, device=device)
self.low_tensor = torch.as_tensor(self.low, device=self.device)
self.high_tensor = torch.as_tensor(self.high, device=self.device)
self.dtype = self._torch_dtype
def sample(self):
self.dtype = self._numpy_dtype
sample = super().sample()
self.dtype = self._torch_dtype
return torch.as_tensor(sample, dtype=self._torch_dtype, device=self.device)
def contains(self, x: Union[list, np.ndarray, Tensor]) -> bool:
if isinstance(x, list):
x = np.array(x) # Promote list to array for contains check
if isinstance(x, Tensor):
if not (x.device == self.low_tensor.device == self.high_tensor.device):
raise RuntimeError(
f"Values aren't on the same device: {x.device}, {self.device}, {self.low_tensor.device}"
)
return (
x.shape == self.shape
and (x >= self.low_tensor).all()
and (x <= self.high_tensor).all()
)
return x.shape == self.shape and np.all(x >= self.low) and np.all(x <= self.high)
def __repr__(self):
return (
f"{type(self).__name__}({self.low.min()}, {self.high.max()}, "
f"{self.shape}, {self.dtype}"
+ (f", device={self.device}" if self.device is not None else "")
+ ")"
)
@classmethod
def from_box(cls, box: spaces.Box, device: torch.device = None):
return cls(
low=box.low.flat[0],
high=box.high.flat[0],
shape=box.shape,
dtype=box.dtype, # NOTE: Gets converted in TensorSpace constructor.
device=device,
)
class TensorDiscrete(TensorSpace, spaces.Discrete):
def contains(self, v: Union[int, Tensor]) -> bool:
if isinstance(v, Tensor):
v = v.detach().cpu().numpy()
return super().contains(v)
def sample(self):
self.dtype = self._numpy_dtype
s = super().sample()
self.dtype = self._torch_dtype
return torch.as_tensor(s, dtype=self.dtype, device=self.device)
class TensorMultiDiscrete(TensorSpace, spaces.MultiDiscrete):
def contains(self, v: Tensor) -> bool:
try:
return super().contains(v)
except:
v_numpy = v.detach().cpu().numpy()
return super().contains(v_numpy)
def sample(self):
self.dtype = self._numpy_dtype
s = super().sample()
self.dtype = self._torch_dtype
return torch.as_tensor(s, dtype=self.dtype, device=self.device)
from gym.vector.utils.spaces import batch_space
@batch_space.register(TensorDiscrete)
def _batch_discrete_space(space: TensorDiscrete, n: int = 1) -> TensorMultiDiscrete:
return TensorMultiDiscrete(torch.full((n,), space.n, dtype=space.dtype))
================================================
FILE: sequoia/common/spaces/tensor_spaces_test.py
================================================
import numpy as np
import pytest
from gym import spaces
from torch import Tensor
from .tensor_spaces import TensorBox, numpy_to_torch_dtypes
@pytest.mark.parametrize("np_dtype", [np.uint8, np.float32])
def test_tensor_box(np_dtype: np.dtype):
torch_dtype = numpy_to_torch_dtypes[np_dtype]
space = spaces.Box(0, 1, (28, 28), dtype=np_dtype)
new_space = TensorBox.from_box(space)
sample = new_space.sample()
assert isinstance(sample, Tensor)
assert sample in new_space
assert sample.cpu().numpy().astype(np_dtype) in space
assert sample.dtype == torch_dtype
================================================
FILE: sequoia/common/spaces/typed_dict.py
================================================
""" Subclass of `spaces.Dict` that allows custom dtypes and uses type annotations.
"""
import dataclasses
from collections import OrderedDict
from collections.abc import Mapping as MappingABC
from copy import deepcopy
from dataclasses import fields, is_dataclass
from inspect import isclass
from typing import (
Any,
ClassVar,
Dict,
Iterable,
List,
Mapping,
Sequence,
Tuple,
Type,
TypeVar,
Union,
get_type_hints,
)
import gym
import numpy as np
from gym import Space, spaces
from gym.vector.utils import batch_space, concatenate
from .sparse import batch_space, concatenate
try:
from typing import get_origin
except ImportError:
# Python 3.7's typing module doesn't have this `get_origin` function, so get it from
# `typing_inspect`.
from typing_inspect import get_origin
M = TypeVar("M", bound=Mapping[str, Any])
S = TypeVar("S")
Dataclass = TypeVar("Dataclass")
class TypedDictSpace(spaces.Dict, Space[M]):
"""Subclass of `spaces.Dict` that allows custom dtypes and uses type annotations.
## Examples:
- Using it just like a regular spaces.Dict:
>>> from gym.spaces import Box
>>> s = TypedDictSpace(x=Box(0, 1, (4,), dtype=np.float64))
>>> s
TypedDictSpace(x:Box(0.0, 1.0, (4,), float64))
>>> _ = s.seed(123)
>>> s.sample()
{'x': array([0.06132501, 0.48141959, 0.41703335, 0.34899889])}
- Using it like a TypedDict: (This equivalent to the above)
>>> class VisionSpace(TypedDictSpace):
... x: Box = Box(0, 1, (4,), dtype=np.float64)
>>> s = VisionSpace()
>>> s
VisionSpace(x:Box(0.0, 1.0, (4,), float64))
>>> _ = s.seed(123)
>>> s.sample()
{'x': array([0.06132501, 0.48141959, 0.41703335, 0.34899889])}
- You can also overwrite the values from the type annotations by passing them to the
constructor:
>>> s = VisionSpace(x=spaces.Box(0, 2, (3,), dtype=np.int64))
>>> s
VisionSpace(x:Box(0, 2, (3,), int64))
>>> _ = s.seed(123)
>>> s.sample()
{'x': array([0, 1, 1])}
### Using custom dtypes
Can use any type here, as long as it can receive the samples from each space as
keyword arguments.
One good example of this is to use a `dataclass` as the custom dtype.
You are strongly encouraged to use a dtype that inherits from the `Mapping` class
from `collections.abc`, so that samples form your space can be handled similarly to
regular dictionaries.
>>> from collections import OrderedDict
>>> s = TypedDictSpace(x=spaces.Box(0, 1, (4,), dtype=float), dtype=OrderedDict)
>>> s
TypedDictSpace(x:Box(0.0, 1.0, (4,), float64), dtype=)
>>> _ = s.seed(123)
>>> s.sample()
OrderedDict([('x', array([0.06132501, 0.48141959, 0.41703335, 0.34899889]))])
### Required items:
If an annotation on the class doesn't have a default value, then it is treated as a
required argument:
>>> class FooSpace(TypedDictSpace):
... a: spaces.Box = spaces.Box(0, 1, (4,), float)
... b: spaces.Discrete
>>> s = FooSpace() # doesn't work!
Traceback (most recent call last):
...
TypeError: Space of type requires a 'b' item!
>>> s = FooSpace(b=spaces.Discrete(5))
>>> s
FooSpace(a:Box(0.0, 1.0, (4,), float64), b:Discrete(5))
NOTE: spaces can also inherit from each other!
>>> class ImageSegmentationSpace(VisionSpace):
... bounding_box: Box
...
>>> s = ImageSegmentationSpace(
... x=spaces.Box(0, 1, (2, 2), dtype=float),
... bounding_box=spaces.Box(0, 4, (4, 2), dtype=int),
... )
>>> s
ImageSegmentationSpace(x:Box(0.0, 1.0, (2, 2), float64), bounding_box:Box(0, 4, (4, 2), int64))
"""
def __init__(self, spaces: Mapping[str, Space] = None, dtype: Type[M] = dict, **spaces_kwargs):
"""Creates the TypedDict space.
Can either pass a dict of spaces, or pass the spaces as keyword arguments.
Parameters
----------
spaces : Mapping[str, Space], optional
Dictionary mapping from strings to spaces, by default None
dtype : Type[M], optional
Type of outputs to return. By default `dict`, but this can also use any
other dtype which will accept the values from each space as a keyword
argument.
NOTE: This `dtype` is usually set to some dataclass type in Sequoia, such as
`Observation`, `Rewards`, etc. (subclasses of `Batch`).
By default, `dtype` is just `dict`, and `space.sample()` will return simple
dictionaries.
Raises
------
RuntimeError
If both `spaces` and **kwargs are used.
TypeError
If the class has a type annotation for a space, and the required space isn't
passed as an argument (emulating a required argument, in a way).
"""
if spaces and spaces_kwargs:
raise RuntimeError("Can only use one of `spaces` or **kwargs, not both.")
spaces_from_args = spaces or spaces_kwargs
# have to use OrderedDict just in case python <= 3.6.x
spaces_from_annotations: Dict[str, gym.Space] = OrderedDict()
cls = type(self)
class_typed_attributes: Dict[str, Type] = get_type_hints(cls)
# NOTE: This is only needed when using `__future__ import annotations` in a
# client file:
# Get the `globals` of the caller when checking type annotations:
# NOTE: Might actually need to get the globals of where that class is defined!
# caller_globals = inspect.stack()[1][0].f_globals
# class_typed_attributes: Dict[str, Type] = get_type_hints(cls, globalns=caller_globals)
if class_typed_attributes:
for attribute, type_annotation in class_typed_attributes.items():
if getattr(type_annotation, "__origin__", "") is ClassVar:
continue
is_space = False
if isclass(type_annotation) and issubclass(type_annotation, gym.Space):
is_space = True
else:
origin = get_origin(type_annotation)
is_space = (
origin is not None and isclass(origin) and issubclass(origin, gym.Space)
)
# NOTE: emulate a 'required argument' when there is a type
# annotation, but no value.
# Note: How about a None value, is that ok?
if is_space:
_missing = object()
value = getattr(cls, attribute, _missing)
if value is _missing and attribute not in spaces_from_args:
raise TypeError(
f"Space of type {type(self)} requires a '{attribute}' item!"
)
if isinstance(value, gym.Space):
# Shouldn't be able to have two annotations with the same name.
assert attribute not in spaces_from_annotations
# TODO: Should copy the space, so that modifying the class
# attribute doesn't affect the instances of that space.
spaces_from_annotations[attribute] = deepcopy(value)
# Avoid the annoying sorting of keys that `spaces.Dict` does if we pass a
# regular dict.
spaces = OrderedDict() # Need to use this for 3.6.x
spaces.update(spaces_from_annotations)
spaces.update(spaces_from_args) # Arguments overwrite the spaces from the annotations.
if not spaces:
raise TypeError(
"Need to either have type annotations on the class, or pass some "
"arguments to the constructor!"
)
assert all(isinstance(s, gym.Space) for s in spaces.values()), spaces
super().__init__(spaces=spaces)
self.spaces = dict(self.spaces) # Get rid of the OrderedDict.
# Sequoia-specific check.
if "x" in self.spaces:
assert list(self.spaces.keys()).index("x") == 0, self.spaces
self.dtype = dtype
# Optional: But just to make sure this works:
if dataclasses.is_dataclass(self.dtype):
dtype_fields: List[str] = [f.name for f in dataclasses.fields(self.dtype)]
# Check that the dtype can handle all the entries of `self.spaces`, so that
# we won't get any issues when calling `self.dtype(**super().sample())`.
for space_name, space in self.spaces.items():
if space_name not in dtype_fields:
raise RuntimeError(
f"dtype {self.dtype} doesn't have a field for space "
f"'{space_name}' ({space})!"
)
def keys(self) -> Sequence[str]:
return self.spaces.keys()
def items(self) -> Iterable[Tuple[str, Space]]:
return self.spaces.items()
def values(self) -> Sequence[Space]:
return self.spaces.values()
def sample(self) -> M:
dict_sample: dict = super().sample()
# Gets rid of OrderedDict.
return self.dtype(**dict_sample)
def __getattr__(self, attr: str) -> Space:
if attr != "spaces":
if attr in self.spaces:
return self.spaces[attr]
raise AttributeError(f"Space doesn't have attribute {attr}")
def __getitem__(self, key: Union[str, int]) -> Space:
if key not in self.spaces:
if isinstance(key, int):
# IDEA: Try to get the item at given index in the keys? a bit like a
# tuple space?
# return self[list(self.spaces.keys())[key]]
pass
return super().__getitem__(key)
def __len__(self) -> int:
return len(self.spaces)
# def __setitem__(self, key, value):
# return super().__setitem__(key, value)
def contains(self, x: Union[M, Mapping[str, Space]]) -> bool:
if is_dataclass(x):
if is_dataclass(self.dtype):
if not isinstance(x, self.dtype):
# NOTE: This could be a bit controversial, since it departs a bit how Dict
# does things.
return False
# NOTE: We don't use dataclasses.asdict as it doesn't work with Tensor
# items with grad attributes.
x = {f.name: getattr(x, f.name) for f in fields(x)}
# NOTE: Modifying this so that we allow samples with more values, as long as it
# has all the required keys.
if not isinstance(x, (dict, MappingABC)) or not all(k in x for k in self.spaces):
return False
for k, space in self.spaces.items():
if k not in x:
return False
if not space.contains(x[k]):
return False
return True
# return super().contains(x)
def __repr__(self) -> str:
return (
f"{str(type(self).__name__)}("
+ ", ".join([f"{k}:{s}" for k, s in self.spaces.items()])
+ (f", dtype={self.dtype}" if self.dtype is not dict else "")
+ ")"
)
def __eq__(self, other):
if isinstance(other, TypedDictSpace) and self.dtype != other.dtype:
return False
return super().__eq__(other)
@batch_space.register(TypedDictSpace)
def _batch_typed_dict_space(space: TypedDictSpace, n: int = 1) -> spaces.Dict:
return type(space)(
{key: batch_space(subspace, n=n) for (key, subspace) in space.spaces.items()},
dtype=space.dtype,
)
@concatenate.register(TypedDictSpace)
def _concatenate_typed_dicts(
space: TypedDictSpace,
items: Union[list, tuple],
out: Union[tuple, dict, np.ndarray],
) -> Dict:
return space.dtype(
**{
key: concatenate(subspace, [item[key] for item in items], out=out[key])
for (key, subspace) in space.spaces.items()
}
)
from sequoia.utils.generic_functions.to_from_tensor import from_tensor, to_tensor
T = TypeVar("T")
@from_tensor.register(TypedDictSpace)
def _(space: TypedDictSpace, sample: Union[T, Mapping]) -> T:
return space.dtype(
**{key: from_tensor(sub_space, sample[key]) for key, sub_space in space.spaces.items()}
)
import torch
@to_tensor.register(TypedDictSpace)
def _(
space: TypedDictSpace[T],
sample: Dict[str, Union[np.ndarray, Any]],
device: torch.device = None,
) -> T:
return space.dtype(
**{
key: to_tensor(subspace, sample=sample[key], device=device)
for key, subspace in space.items()
}
)
================================================
FILE: sequoia/common/spaces/typed_dict_test.py
================================================
from dataclasses import Field, dataclass, fields
from typing import Dict, Iterable, Mapping, Tuple, TypeVar
import gym
import numpy as np
from gym import spaces
from gym.spaces import Box, Discrete
from gym.vector.utils import batch_space
from .typed_dict import TypedDictSpace
T = TypeVar("T")
def test_basic():
space = TypedDictSpace(
current_state=Box(0, 1, (2, 2)),
action=Discrete(2),
next_state=Box(0, 1, (2, 2)),
)
v = space.sample()
print(v)
assert v in space
# TODO: Maybe re-use all the tests for gym.spaces.Tuple in the gym repo
# somehow?
vanilla_space = spaces.Dict(
current_state=Box(0, 1, (2, 2)),
action=Discrete(2),
next_state=Box(0, 1, (2, 2)),
)
assert vanilla_space.sample() in space
assert space.sample() in vanilla_space
def test_supports_dataclasses():
# IDEA: Wrapper that makes the 'default factory' of each field actually use
# the 'sample' method from a space associated with each class.
@dataclass
class Sample:
a: np.ndarray
b: bool
c: Tuple[int, int]
space = spaces.Dict(
a=spaces.Box(0, 1, [2, 2], dtype=np.float64),
b=spaces.Box(False, True, (), np.bool),
c=spaces.MultiDiscrete([2, 2]),
)
wrapped_space: TypedDictSpace = TypedDictSpace(spaces=space.spaces, dtype=Sample)
assert isinstance(wrapped_space, spaces.Dict)
s = Sample(
a=np.ones([2, 2]),
b=np.array(False),
c=np.array([0, 1]),
)
assert s in wrapped_space
assert isinstance(wrapped_space.sample(), Sample)
@dataclass
class StateTransition(Mapping[str, T]):
current_state: T
action: int
next_state: T
def __post_init__(self):
self._fields: Dict[str, Field] = {f.name: f for f in fields(self)}
def __len__(self) -> int:
return len(self._fields)
def __getitem__(self, attr: str) -> T:
if attr not in self._fields:
raise KeyError(attr)
return getattr(self, attr)
def __iter__(self) -> Iterable[str]:
return iter(self._fields)
def test_basic_with_dtype():
space = TypedDictSpace(
current_state=Box(0, 1, (2, 2)),
action=Discrete(2),
next_state=Box(0, 1, (2, 2)),
dtype=StateTransition,
)
v = space.sample()
assert v in space
assert isinstance(v, StateTransition)
normal_space = spaces.Dict(
current_state=Box(0, 1, (2, 2)),
action=Discrete(2),
next_state=Box(0, 1, (2, 2)),
)
assert normal_space.sample() in space
# NOTE: this doesn't work when using a dtype that isn't a subclass of dict!
if issubclass(space.dtype, dict):
assert space.sample() in normal_space
def test_isinstance():
space = TypedDictSpace(
current_state=Box(0, 1, (2, 2)),
action=Discrete(2),
next_state=Box(0, 1, (2, 2)),
dtype=StateTransition,
)
assert isinstance(space, spaces.Dict)
assert isinstance(space.sample(), StateTransition)
def test_equals_dict_space_with_same_items():
"""Test that a TypedDictSpace is considered equal to aDict space if
the spaces are in the same order and all equal.
"""
space = TypedDictSpace(
current_state=Box(0, 1, (2, 2)),
action=Discrete(2),
next_state=Box(0, 1, (2, 2)),
dtype=StateTransition,
)
dict_space = spaces.Dict(
current_state=Box(0, 1, (2, 2)),
action=Discrete(2),
next_state=Box(0, 1, (2, 2)),
)
assert space == dict_space
assert dict_space == space
def test_batch_objets_considered_valid_samples():
from dataclasses import dataclass
import numpy as np
from sequoia.common.batch import Batch
@dataclass(frozen=True)
class StateTransitionDataclass(Batch):
current_state: np.ndarray
action: int
next_state: np.ndarray
space = TypedDictSpace(
current_state=Box(0, 1, (2, 2), dtype=np.float64),
action=Discrete(2),
next_state=Box(0, 1, (2, 2), dtype=np.float64),
dtype=StateTransitionDataclass,
)
obs = StateTransitionDataclass(
current_state=np.ones([2, 2]) / 2,
action=1,
next_state=np.zeros([2, 2]),
)
assert obs in space
assert space.sample() in space
assert isinstance(space.sample(), StateTransitionDataclass)
def test_batch_space():
space = TypedDictSpace(
current_state=Box(0, 1, (2, 2)),
action=Discrete(2),
next_state=Box(0, 1, (2, 2)),
dtype=StateTransition,
)
assert batch_space(space, n=5) == TypedDictSpace(
current_state=Box(0, 1, (5, 2, 2)),
action=spaces.MultiDiscrete([2, 2, 2, 2, 2]),
next_state=Box(0, 1, (5, 2, 2)),
dtype=StateTransition,
)
def test_batch_space_preserves_dtype():
space = TypedDictSpace(
current_state=Box(0, 1, (2, 2)),
action=Discrete(2),
next_state=Box(0, 1, (2, 2)),
dtype=StateTransition,
)
batched_space = batch_space(space, n=5)
assert isinstance(batched_space, TypedDictSpace)
assert list(batched_space.spaces.keys()) == list(batched_space.spaces.keys())
assert list(batched_space.spaces.keys()) == [
"current_state",
"action",
"next_state",
]
assert batched_space.dtype is StateTransition
space = TypedDictSpace(
dict(
current_state=Box(0, 1, (2, 2)),
action=Discrete(2),
next_state=Box(0, 1, (2, 2)),
),
dtype=StateTransition,
)
batched_space = batch_space(space, n=5)
assert isinstance(batched_space, TypedDictSpace)
assert list(batched_space.spaces.keys()) == list(batched_space.spaces.keys())
assert list(batched_space.spaces.keys()) == [
"current_state",
"action",
"next_state",
]
assert list(batched_space.sample().keys()) == [
"current_state",
"action",
"next_state",
]
assert list(v[0] for v in space.spaces.items()) == [
"current_state",
"action",
"next_state",
]
assert batched_space.dtype is StateTransition
space = TypedDictSpace(
dict(
x=Box(0, 1, (2, 2)),
action=Discrete(2),
next_state=Box(0, 1, (2, 2)),
),
)
batched_space = batch_space(space, n=5)
assert batched_space.x == Box(0, 1, (5, 2, 2))
assert isinstance(batched_space, TypedDictSpace)
assert list(batched_space.spaces.keys()) == list(batched_space.spaces.keys())
assert list(batched_space.spaces.keys()) == ["x", "action", "next_state"]
assert list(batched_space.sample().keys()) == ["x", "action", "next_state"]
assert list(v[0] for v in space.spaces.items()) == ["x", "action", "next_state"]
class DummyDictEnv(gym.Env):
def __init__(self):
super().__init__()
self.observation_space = TypedDictSpace(
x=Box(0, 1, (2, 2)),
t=Discrete(2),
done=Box(False, True, (1,), bool),
)
self.action_space = spaces.Discrete(10)
self.reward_space = spaces.Box(-10, 10, shape=(1,), dtype=np.float32)
def reset(self):
return self.observation_space.sample()
def step(self, action):
return self.observation_space.sample(), self.reward_space.sample(), False, {}
def seed(self, seed=None):
seeds = []
seeds += self.observation_space.seed(seed)
seeds += self.action_space.seed(seed)
seeds += self.reward_space.seed(seed)
return seeds
def test_vector_env():
env = DummyDictEnv()
from gym.envs.registration import register
from gym.vector import make
register("dummy_foo-v0", entry_point=DummyDictEnv)
env = make("dummy_foo-v0", num_envs=10)
from typing import Optional
from numpy.typing import ArrayLike
from sequoia.common.batch import Batch
def test_object_with_extra_keys_fits():
@dataclass(frozen=True)
class Observation(Batch):
x: np.ndarray
t: ArrayLike
done: Optional[ArrayLike] = None
space = TypedDictSpace(
x=spaces.Box(0, 10, (10,), dtype=np.float64), t=spaces.Box(0, 1, (1,), dtype=np.int32)
)
obs = Observation(
x=np.arange(10, dtype=np.float64),
t=np.array([1], dtype=np.int32),
done=False,
)
assert obs.x in space.x
assert obs.t in space.t
assert obs in space
def test_order_of_keys_is_same_in_samples():
space = TypedDictSpace(x=spaces.Box(0, 10, (10,), dtype=np.int32), t=spaces.Discrete(10))
expected = ["x", "t"]
assert list(space.keys()) == expected
assert list(k for k, v in space.items()) == expected
assert list(space.sample().keys()) == expected
assert list(k for k, v in space.sample().items()) == expected
space.seed(123)
s = space.sample()
assert str(s) == f"{{'x': {repr(s['x'])}, 't': {repr(s['t'])}}}"
def test_debugging():
assert {
"task_labels": 0,
"x": np.array([-0.25162117, -0.43992427, 0.42706016, 1.47862901]),
} in TypedDictSpace(
x=spaces.Box(-3.4028234663852886e38, 3.4028234663852886e38, (4,), np.float64),
task_labels=spaces.Discrete(5),
dtype=dict,
)
def test_equality():
s1 = TypedDictSpace(
x=spaces.Box(-np.inf, np.inf, (39,), np.float32),
task_labels=spaces.Discrete(10),
dtype=dict,
)
s2 = TypedDictSpace(
x=spaces.Box(-np.inf, np.inf, (39,), np.float32),
task_labels=spaces.Discrete(10),
dtype=dict,
)
assert s1 == s2
## IDEA: Creating a space like this, using the same syntax as with TypedDict
# class StateTransitionSpace(TypedDict):
# current_state: Box = Box(0, 1, (2,2))
# action: Discrete = Discrete(2)
# current_state: Box = Box(0, 1, (2,2))
# space = StateTransitionSpace()
# space.sample()
================================================
FILE: sequoia/common/task.py
================================================
""" NOTE: Unused at the moment.
This defines a `Task` object that is just used to represent the information
about a 'Task'.
"""
from dataclasses import dataclass, field
from typing import List
from simple_parsing import list_field
from sequoia.utils.serialization import Serializable
@dataclass
class Task(Serializable):
"""Dataclass that represents a task.
TODO (@lebrice): This isn't being used anymore, but we could probably
use it / add it to the Continuum package, if it doesn't already have something
like it.
TODO: Maybe the this could also specify from which dataset(s) it is sampled.
"""
# The index of this task (the order in which it was encountered)
index: int = field(default=-1, repr=False)
# All the unique classes present within this task. (order matters)
classes: List[int] = list_field()
================================================
FILE: sequoia/common/transforms/__init__.py
================================================
from .channels import (
ChannelsFirst,
ChannelsFirstIfNeeded,
ChannelsLast,
ChannelsLastIfNeeded,
ThreeChannels,
)
from .compose import Compose
from .split_batch import SplitBatch, split_batch
from .to_tensor import ToTensor, image_to_tensor
from .transform import Transform
from .transform_enum import Transforms
================================================
FILE: sequoia/common/transforms/channels.py
================================================
# from torchvision.transforms import Lambda
from collections.abc import Mapping
from dataclasses import dataclass
from functools import singledispatch
from typing import Any, Iterable, Tuple, Union
import numpy as np
import torch
from gym import spaces
from torch import Tensor
from sequoia.common.spaces import NamedTupleSpace, TypedDictSpace
from sequoia.utils.logging_utils import get_logger
from .transform import Img, Transform
from .utils import is_image
logger = get_logger(__name__)
@singledispatch
def has_channels_last(img_or_shape: Union[Img, Tuple[int, ...], spaces.Box]) -> bool:
"""Returns wether the given image, or image batch, shape, or Space is in
the channels last format.
"""
shape = getattr(img_or_shape, "shape", img_or_shape)
return len(shape) and shape[-1] in {1, 3}
def has_channels_first(img_or_shape: Union[Img, Tuple[int, ...], spaces.Box]) -> bool:
"""Returns wether the given image or image batch, shape, or Space is in
the channels first format.
"""
shape = getattr(img_or_shape, "shape", img_or_shape)
if len(shape) == 3:
return shape[0] in {1, 3}
elif len(shape) == 4:
return shape[1] in {1, 3}
return False
# return len(shape) and shape[0 if len(shape) == 3 else 1] in {1, 3}
def channels_last_if_needed(x: Any) -> Any:
if has_channels_first(x):
return channels_last(x)
elif has_channels_last(x):
return x
raise RuntimeError(f"Input isn't channels_first or channels_last! {x.shape}")
def channels_first_if_needed(x: Any) -> Any:
if has_channels_last(x):
return channels_first(x)
elif has_channels_first(x):
return x
raise RuntimeError(f"Input isn't channels_first or channels_last! {x.shape}")
class NamedDimensions(Transform[Tensor, Tensor]):
"""'Transform' that gives names to the dimensions of input tensors.
Overwrites existing named dimensions, if any.
"""
def __init__(self, names: Iterable[str]):
self.names = tuple(names)
def __call__(self, tensor: Tensor) -> Tensor:
return tensor.refine_names(*self.names)
@singledispatch
def three_channels(x: Any) -> Any:
"""Transform that makes the input images have three channels if they don't.
* New: Also adds names to each dimension, when possible. (edit: off for now)
For instance, if the input shape is:
[28, 28] -> [3, 28, 28] (copy the image three times)
[1, 28, 28] -> [3, 28, 28] (same idea)
[10, 1, 28, 28] -> [10, 3, 28, 28] (keep batch intact, do the same again.)
"""
raise NotImplementedError(f"This doesn't currently support input {x} of type {type(x)}")
@three_channels.register(Tensor)
def _(x: Tensor) -> Tensor:
names: Tuple[str, ...] = ()
if x.ndim == 2:
x = x.reshape([1, *x.shape])
x = x.repeat(3, 1, 1)
names = ("C", "H", "W")
if x.ndim == 3:
if x.shape[0] == 1:
x = x.repeat(3, 1, 1)
names = ("C", "H", "W")
elif x.shape[-1] == 1:
x = x.repeat(1, 1, 3)
names = ("H", "W", "C")
if x.ndim == 4:
if x.shape[1] == 1:
x = x.repeat(1, 3, 1, 1)
names = ("N", "C", "H", "W")
elif x.shape[-1] == 1:
x = x.repeat(1, 1, 1, 3)
names = ("N", "H", "W", "C")
# FIXME: Turning this off for now, since using named dimensions
# generates a whole lot of UserWarnings atm.
# if isinstance(x, Tensor) and names:
# # Cool new pytorch feature!
# x.rename(*names)
return x
@three_channels.register(np.ndarray)
def _(x: np.ndarray) -> np.ndarray:
if x.ndim == 2:
# names = ("C", "H", "W")
x = x.reshape([1, *x.shape])
x = np.tile(x, [3, 1, 1])
if x.ndim == 3:
if x.shape[0] == 1:
# names = ("C", "H", "W")
x = np.tile(x, [3, 1, 1])
elif x.shape[-1] == 1:
# names = ("H", "W", "C")
x = np.tile(x, [1, 1, 3])
if x.ndim == 4:
if x.shape[1] == 1:
# names = ("N", "C", "H", "W")
x = np.tile(x, [1, 3, 1, 1])
elif x.shape[-1] == 1:
# names = ("N", "H", "W", "C")
x = np.tile(x, [1, 1, 1, 3])
return x
@three_channels.register(spaces.Box)
def _(x: spaces.Box) -> spaces.Box:
return type(x)(low=three_channels(x.low), high=three_channels(x.high), dtype=x.dtype)
@three_channels.register(torch.Size)
@three_channels.register(tuple)
def _(x: Tuple[int, ...]) -> Tuple[int, ...]:
dims = len(x)
if dims == 2:
return (3, *x)
elif dims == 3:
if x[0] == 1:
return (3, *x[1:])
elif x[-1] == 1:
return (*x[:-1], 3)
elif dims == 4:
if x[1] == 1:
return (x[0], 3, *x[2:])
elif x[-1] == 1:
return (*x[:-1], 3)
return x
@three_channels.register(NamedTupleSpace)
def _three_channels(x: Any) -> Any:
return type(x)(
**{key: three_channels(value) if is_image(value) else value for key, value in x.items()},
dtype=x.dtype,
)
@three_channels.register(spaces.Dict)
@three_channels.register(Mapping)
def _three_channels(x: Any) -> Any:
return type(x)(
**{key: three_channels(value) if is_image(value) else value for key, value in x.items()}
)
@three_channels.register(TypedDictSpace)
def _three_channels(x: TypedDictSpace) -> TypedDictSpace:
return type(x)(
{key: three_channels(value) if is_image(value) else value for key, value in x.items()},
dtype=x.dtype,
)
@dataclass
class ThreeChannels(Transform[Tensor, Tensor]):
"""Transform that makes the input images have three tensors.
* New: Also adds names to each dimension, when possible.
For instance, if the input shape is:
[28, 28] -> [3, 28, 28] (copy the image three times)
[1, 28, 28] -> [3, 28, 28] (same idea)
[10, 1, 28, 28] -> [10, 3, 28, 28] (keep batch intact, do the same again.)
"""
def __call__(self, x: Tensor) -> Tensor:
return three_channels(x)
@singledispatch
def channels_first(x: Any) -> Any:
"""Re-orders the dimensions of the input from ((n), H, W, C) to ((n), C, H, W).
If the tensor doesn't have named dimensions, this will ALWAYS re-order the
dimensions, regarless of if the image or space already has channels first.
Also converts non-Tensor inputs to tensors using `to_tensor`.
"""
raise RuntimeError(f"Transform isn't applicable to input {x} of type {type(x)}.")
@channels_first.register(Tensor)
def _(x: Tensor) -> Tensor:
if x.ndim == 3:
if any(x.names):
return x.align_to("C", "H", "W")
return x.permute(2, 0, 1) # .to(memory_format=torch.contiguous_format)
if x.ndim == 4:
if any(x.names):
return x.align_to("N", "C", "H", "W")
return x.permute(0, 3, 1, 2).contiguous()
return x
@channels_first.register(tuple)
def _(x: Tuple[int, ...]) -> Tuple[int, ...]:
if len(x) == 3:
# TODO: Re-enable the naming of the dimensions at some point.
return type(x)(x[i] for i in (2, 0, 1))
if len(x.shape) == 4:
return type(x)(x[i] for i in (0, 3, 1, 2))
raise NotImplementedError(x)
@channels_first.register(np.ndarray)
def _(x: spaces.Box) -> spaces.Box:
if x.ndim == 4:
return np.moveaxis(x, 3, 1)
elif x.ndim == 3:
return np.moveaxis(x, 2, 0)
else:
raise NotImplementedError(f"Expected 3-d or 4-d input, got {x}")
@channels_first.register(tuple)
def _(x: Tuple[int, ...]) -> Tuple[int, ...]:
if len(x) == 4:
return type(x)(x[i] for i in (0, 3, 1, 2))
if len(x) == 3:
return type(x)(x[i] for i in (2, 0, 1))
raise NotImplementedError(x)
@channels_first.register(spaces.Box)
def _(x: spaces.Box) -> spaces.Box:
return type(x)(
low=channels_first(x.low),
high=channels_first(x.high),
dtype=x.dtype,
)
@dataclass
class ChannelsFirst(Transform[Union[np.ndarray, Tensor], Tensor]):
"""Re-orders the dimensions of the tensor from ((n), H, W, C) to ((n), C, H, W).
If the tensor doesn't have named dimensions, this will ALWAYS re-order the
dimensions, regarless of the length of the last dimension.
Also converts non-Tensor inputs to tensors using `to_tensor`.
"""
def __call__(self, x: Tensor) -> Tensor:
return self.apply(x)
@classmethod
def apply(cls, x: Tensor) -> Tensor:
return channels_first(x)
# if not isinstance(x, Tensor):
# raise RuntimeError(f"Transform only applies to Tensors. (Not {x} of type {type(x)}).")
# # if has_channels_first(x):
# # logger.warning(RuntimeWarning(f"Input already seems to have channels first, but this transform will be applied anyway.."))
# if x.ndim == 3:
# if any(x.names):
# return x.align_to("C", "H", "W")
# return x.permute(2, 0, 1)#.to(memory_format=torch.contiguous_format)
# if x.ndim == 4:
# if any(x.names):
# return x.align_to("N", "C", "H", "W")
# return x.permute(0, 3, 1, 2).contiguous()
# return x
# @staticmethod
# def shape_change(input_shape: Union[Tuple[int, ...], torch.Size]) -> Tuple[int, ...]:
# ndim = len(input_shape)
# if ndim == 3:
# return tuple(input_shape[i] for i in (2, 0, 1))
# elif ndim == 4:
# return tuple(input_shape[i] for i in (0, 3, 1, 2))
# return input_shape
@dataclass
class ChannelsFirstIfNeeded(ChannelsFirst):
"""Only puts the channels first if the input has channels last."""
@classmethod
def apply(cls, x: Tensor) -> Tensor:
if has_channels_last(x):
return super().apply(x)
return x
# @classmethod
# def shape_change(cls, input_shape: Union[Tuple[int, ...], torch.Size]) -> Tuple[int, ...]:
# if has_channels_last(input_shape):
# return super().shape_change(input_shape)
# return input_shape
@singledispatch
def channels_last(x: Any) -> Any:
raise NotImplementedError(f"This doesn't support input {x} of type {type(x)}")
@channels_last.register(Tensor)
def _(x: Tensor) -> Tensor:
if len(x.shape) == 3:
# TODO: Re-enable the naming of the dimensions at some point.
# if not x.names:
# x.rename("C", "H", "W")
# return x.align_to("H", "W", "C")
return x.permute(1, 2, 0)
if len(x.shape) == 4:
return x.permute(0, 2, 3, 1)
@channels_last.register(tuple)
def _(x: Tuple[int, ...]) -> Tuple[int, ...]:
if len(x) == 3:
# TODO: Re-enable the naming of the dimensions at some point.
return type(x)(x[i] for i in (1, 2, 0))
if len(x.shape) == 4:
return type(x)(x[i] for i in (0, 2, 3, 1))
raise NotImplementedError(x)
@channels_last.register(np.ndarray)
def _(x: np.ndarray) -> np.ndarray:
if len(x.shape) == 4:
return np.moveaxis(x, 1, 3)
elif len(x.shape) == 3:
return np.moveaxis(x, 0, 2)
raise NotImplementedError(x.shape)
@channels_last.register(spaces.Box)
def _(x: spaces.Box) -> spaces.Box:
return type(x)(
low=channels_last(x.low),
high=channels_last(x.high),
dtype=x.dtype,
)
@dataclass
class ChannelsLast(Transform[Tensor, Tensor]):
def __call__(self, x: Tensor) -> Tensor:
return self.apply(x)
@classmethod
def apply(cls, x: Tensor) -> Tensor:
return channels_last(x)
@dataclass
class ChannelsLastIfNeeded(ChannelsLast):
"""Only puts the channels last if the input has channels first."""
@classmethod
def apply(cls, x: Tensor) -> Tensor:
return channels_last_if_needed(x)
================================================
FILE: sequoia/common/transforms/compose.py
================================================
from typing import Callable, List, TypeVar
from gym import spaces
from torchvision.transforms import Compose as ComposeBase
from sequoia.utils.logging_utils import get_logger
from .transform import InputType, OutputType, Transform
logger = get_logger(__name__)
T = TypeVar("T", bound=Callable)
class Compose(List[T], ComposeBase, Transform[InputType, OutputType]):
"""Extend the Compose class of torchvision with methods of `list`.
This can also be passed in members of the `Transforms` enum, which makes it
possible to do something like this:
>>> from .transform_enum import Compose, Transforms
>>> transforms = Compose([Transforms.to_tensor, Transforms.three_channels,])
>>> Transforms.three_channels in transforms
True
>>> transforms += [Transforms.random_grayscale]
>>> transforms
[, , ]
"""
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
ComposeBase.__init__(self, transforms=self)
def __call__(self, img):
if isinstance(img, spaces.Space):
for t in self:
try:
img = t(img)
except:
logger.debug(
f"Unable to apply transform {t} on space {img}: assuming that transform {t} doesn't change the space."
)
return img
else:
for t in self:
img = t(img)
return img
# def shape_change(self, input_shape: Union[Tuple[int, ...], torch.Size]) -> Tuple[int, ...]:
# logger.debug(f"shape_change on Compose: input shape: {input_shape}")
# # TODO: Give the impact of this transform on a given input shape.
# for transform in self:
# logger.debug(f"Shape before transform {transform}: {input_shape}")
# shape_change_method: Optional[Callable] = getattr(transform, "shape_change", None)
# if shape_change_method and callable(shape_change_method):
# input_shape = transform(input_shape) # type: ignore
# else:
# logger.debug(
# f"Unable to detect the change of shape caused by "
# f"transform {transform}, assuming its output has same "
# f"shape as its input."
# )
# logger.debug(f"Final shape: {input_shape}")
# return input_shape
# def space_change(self, input_space: gym.Space) -> gym.Space:
# from .transform_enum import Transforms
# for transform in self:
# if isinstance(transform, Transforms):
# transform = transform.value
# input_space = transform(input_space)
# return input_space
================================================
FILE: sequoia/common/transforms/resize.py
================================================
from collections.abc import Mapping
from functools import singledispatch
from typing import Dict, List, Tuple
import numpy as np
import torch
from gym import spaces
from PIL import Image
from torch import Tensor
from torch.nn.functional import interpolate
from torchvision.transforms import InterpolationMode
from torchvision.transforms import Resize as Resize_
from torchvision.transforms import functional as F
from sequoia.common.gym_wrappers.convert_tensors import add_tensor_support, has_tensor_support
from sequoia.common.spaces import NamedTupleSpace, TypedDictSpace
from sequoia.common.spaces.image import Image as ImageSpace
from sequoia.utils.logging_utils import get_logger
from .channels import channels_first, channels_last, has_channels_first, has_channels_last
from .transform import Img, Transform
from .utils import is_image
logger = get_logger(__name__)
@singledispatch
def resize(x: Img, size: Tuple[int, ...], **kwargs) -> Img:
"""Resizes a PIL.Image, a Tensor, ndarray, or a Box space."""
raise NotImplementedError(f"Transform doesn't support input {x} of type {type(x)}")
@resize.register
def _(x: Image.Image, size: Tuple[int, ...], **kwargs) -> Image.Image:
return F.resize(x, size, **kwargs)
@resize.register(np.ndarray)
@resize.register(Tensor)
def _resize_array_or_tensor(x: np.ndarray, size: Tuple[int, ...], **kwargs) -> np.ndarray:
"""TODO: This resizes numpy arrays by converting them to tensors and then
using the `interpolate` function. There is for sure a more efficient way to
do this.
"""
original = x
if isinstance(original, np.ndarray):
# Need to convert to tensor (for interpolate to work).
x = torch.as_tensor(x)
if len(original.shape) == 3:
# Need to add a batch dimension (for interpolate to work).
x = x.unsqueeze(0)
if has_channels_last(original):
# Need to make it channels first (for interpolate to work).
x = channels_first(x)
assert has_channels_first(x), f"Image needs to have channels first (shape is {x.shape})"
x = interpolate(x, size, mode="area")
if isinstance(original, np.ndarray):
x = x.numpy()
if len(original.shape) == 3:
x = x[0]
if has_channels_last(original):
x = channels_last(x)
return x
@resize.register
def _resize_namedtuple_space(
x: NamedTupleSpace, size: Tuple[int, ...], **kwargs
) -> NamedTupleSpace:
"""When presented with a NamedTupleSpace input, this transform will be
applied to all 'Image' spaces.
"""
return type(x)(
**{
key: resize(v, size, **kwargs) if isinstance(v, ImageSpace) else v
for key, v in x._spaces.items()
}
)
@resize.register(Mapping)
def _resize_namedtuple(x: Dict, size: Tuple[int, ...], **kwargs) -> Dict:
"""When presented with a Mapping-like input, this transform will be
applied to all 'Image' spaces.
"""
return type(x)(
**{
key: resize(value, size, **kwargs) if is_image(value) else value
for key, value in x.items()
}
)
@resize.register(TypedDictSpace)
def _resize_typed_dict(x: TypedDictSpace, size: Tuple[int, ...], **kwargs) -> TypedDictSpace:
"""When presented with a Mapping-like input, this transform will be
applied to all 'Image' spaces.
"""
return type(x)(
{
key: resize(value, size, **kwargs) if is_image(value) else value
for key, value in x.items()
},
dtype=x.dtype,
)
@resize.register(tuple)
def _resize_image_shape(x: Tuple[int, ...], size: Tuple[int, ...], **kwargs) -> Tuple[int, ...]:
"""Give the resized image shape, given the input shape."""
new_shape: List[int] = list(size)
if len(size) == 2:
# Preserve the number of channels.
if len(x) == 4:
if has_channels_first(x):
new_shape = [*x[:2], *size]
elif has_channels_last(x):
new_shape = [x[0], *size, x[-1]]
else:
raise NotImplementedError(x)
elif len(x) == 3:
if has_channels_first(x):
new_shape = [x[0], *size]
elif has_channels_last(x):
new_shape = [*size, x[-1]]
else:
raise NotImplementedError(x)
else:
NotImplementedError(size)
return type(x)(new_shape)
@resize.register(spaces.Box)
def _resize_space(x: spaces.Box, size: Tuple[int, ...], **kwargs) -> spaces.Box:
# Hmm, not sure if the bounds would actually also be respected though.
new_space = type(x)(
low=resize(x.low, size, **kwargs),
high=resize(x.high, size, **kwargs),
dtype=x.dtype,
)
# If the 'old' space supported tensors as samples, then so will the new space.
if has_tensor_support(x):
return add_tensor_support(new_space)
return new_space
class Resize(Resize_, Transform[Img, Img]):
def __init__(self, size: Tuple[int, ...], interpolation=InterpolationMode.BILINEAR):
super().__init__(size, interpolation)
# self.size = size
# self.interpolation = interpolation
def __call__(self, img):
# TODO: (@lebrice) Weirdly enough, it seems that even though we
# implement forward below, and __call__ is supposed to just use
# `forward`, the base class somehow doesn't use our implementation, so
# the test
# env_dataset_test.py::test_iteration_with_more_than_one_wrapper would
# fail if we don't have this __call__ explicitly implemented,
return self.forward(img)
def forward(self, img: Img) -> Img:
return resize(img, size=self.size)
================================================
FILE: sequoia/common/transforms/split_batch.py
================================================
import dataclasses
from typing import Any, Callable, Optional, Tuple, Type, TypeVar
import numpy as np
from torch import Tensor
from ..batch import Batch
from .transform import Transform
# Type variables just for the below function.
ObservationType = TypeVar("ObservationType", bound=Batch)
RewardType = TypeVar("RewardType", bound=Batch)
class SplitBatch(Transform[Any, Tuple[ObservationType, RewardType]]):
"""
Transform that will split batches into Observations and Rewards.
The provided observation and reward types (which have to be subclasses of
the `Batch` class) will be used to construct the observation and reward
objects, respectively.
To make this simpler, this callable will always return an Observation and a
Reward object, even when the batch is unlabeled. In that case, the Reward
object will have a 'None' passed for any of its required arguments.
Parameters
----------
observation_type : Type[ObservationType]
[description]
reward_type : Type[RewardType]
[description]
Returns
-------
Callable[[Any], Tuple[ObservationType, RewardType]]
[description]
Raises
------
RuntimeError
If the observation_type or reward_type don't both subclass Batch.
NotImplementedError
If the type of the batch isn't supported.
RuntimeError
[description]
NotImplementedError
[description]
"""
def __init__(self, observation_type: Type[ObservationType], reward_type: Type[RewardType]):
self.Observations = observation_type
self.Rewards = reward_type
self.func = split_batch(observation_type=observation_type, reward_type=reward_type)
def __call__(self, batch: Any) -> Tuple[ObservationType, RewardType]:
return self.func(batch)
def split_batch(
observation_type: Type[ObservationType], reward_type: Type[RewardType]
) -> Callable[[Any], Tuple[ObservationType, Optional[RewardType]]]:
"""Makes a callable that will split batches into Observations and Rewards.
The provided observation and reward types (which have to be subclasses of
the `Batch` class) will be used to construct the observation and reward
objects, respectively.
To make this simpler, this callable will always return a tuple with an
Observation and an optional Reward object, even when the batch is unlabeled.
In that case, the Reward will be None.
Parameters
----------
observation_type : Type[ObservationType]
[description]
reward_type : Type[RewardType]
[description]
Returns
-------
Callable[[Any], Tuple[ObservationType, RewardType]]
[description]
Raises
------
RuntimeError
If the observation_type or reward_type don't both subclass Batch.
NotImplementedError
If the type of the batch isn't supported.
RuntimeError
[description]
NotImplementedError
[description]
"""
if not (issubclass(observation_type, Batch) and issubclass(reward_type, Batch)):
raise RuntimeError(
"Both `observation_type` and `reward_type` need to " "inherit from `Batch`!"
)
# Get the min, max and total number of args for each object type.
min_for_obs = n_required_fields(observation_type)
max_for_obs = n_fields(observation_type)
n_required_for_obs = min_for_obs
n_optional_for_obs = max_for_obs - min_for_obs
min_for_rew = n_required_fields(reward_type)
max_for_reward = n_fields(reward_type)
n_required_for_rew = min_for_rew
n_optional_for_rew = max_for_reward - min_for_obs
min_items = min_for_obs + min_for_rew
max_items = max_for_obs + max_for_reward
def split_batch_transform(batch: Any) -> Tuple[ObservationType, RewardType]:
if isinstance(batch, (Tensor, np.ndarray)):
batch = (batch,)
if isinstance(batch, dict):
obs_fields = observation_type.field_names
rew_fields = reward_type.field_names
assert not set(obs_fields).intersection(
set(rew_fields)
), "Observation and Reward shouldn't share fields names"
obs_kwargs = {k: v for k, v in batch.items() if k in obs_fields}
obs = observation_type(**obs_kwargs)
reward_kwargs = {k: v for k, v in batch.items() if k in rew_fields}
reward = reward_type(**reward_kwargs)
return obs, reward
if isinstance(batch, observation_type):
return batch, None
if not isinstance(batch, (tuple, list)):
# TODO: Add support for more types maybe? Or just wrap it in a tuple
# and call it a day?
raise RuntimeError(f"Batch is of an unsuported type: {type(batch)}.")
# If the batch already has two elements, check if they are already of
# the right type, to avoid unnecessary computation below.
if len(batch) == 2:
obs, rew = batch
if isinstance(obs, observation_type) and isinstance(rew, reward_type):
return obs, rew
n_items = len(batch)
if n_items < min_items or n_items > max_items:
raise RuntimeError(
f"There aren't the right number of elements in the batch to "
f"create both an Observation and a Reward!\n"
f"(batch has {n_items} items, but type "
f"{observation_type} requires from {min_for_obs} to "
f"{max_for_obs} args, while {reward_type} requires from "
f"{min_for_rew} to {max_for_reward} args. "
)
# Batch looks like:
# [
# O_1, O_2, ..., O_{min_obs}, (O_{min_obs+1}), ..., (O_{max_obs}),
# R_1, R_2, ..., R_{min_rew}, (R_{min_rew+1}), ..., (R_{max_rew}),
# ]
if n_items == 0:
obs = observation_type()
rew = reward_type()
if n_items == max_items:
# Easiest case! Just use all the values.
obs = observation_type(*batch[:max_for_obs])
rew = reward_type(*batch[max_for_obs:])
elif n_items == min_items:
# Easy case as well. Also simply uses all the values directly.
obs = observation_type(*batch[:min_for_obs])
rew = reward_type(*batch[min_for_obs:])
elif n_optional_for_obs == 0 and n_optional_for_rew != 0:
# All the extra args go in the reward.
obs = observation_type(*batch[:min_for_obs])
rew = reward_type(*batch[min_for_obs:])
elif n_optional_for_obs != 0 and n_optional_for_rew == 0:
# All the extra args go in the observation.
obs = observation_type(*batch[:max_for_obs])
rew = reward_type(*batch[max_for_obs:])
else:
# We can't tell where the 'extra' tensors should go.
# TODO: Maybe just assume that all the 'extra' tensors are meant to
# be part of the observation? or the reward? For instance:
# Option 1: All the extra args go in the observation:
# obs = Observation(*batch[:n_items-n_required_for_rew])
# rew = Observation(*batch[n_items-n_required_for_rew:])
# Option 2: All the extra args go in the reward:
# obs = Observation(*batch[:n_required_for_obs])
# rew = Observation(*batch[n_required_for_obs:])
n_extra = n_items - min_items
max_extra = n_optional_for_obs + n_optional_for_rew
raise NotImplementedError(
f"Can't tell where to put these extra tensors!\n"
f"(batch has {n_items} items, but type "
f"{observation_type} requires from {min_for_obs} to "
f"{max_for_obs} args, while {reward_type} requires from "
f"{min_for_rew} to {max_for_reward} args. There are "
f"{n_extra} extra items out of a potential of {max_extra}."
)
return obs, rew
return split_batch_transform
def n_fields(batch_type: Type[Batch]) -> int:
"""Helper function, gives back the total number of fields in Batch subclass.
Parameters
----------
batch_type : Type
A subclass of Batch.
Returns
-------
int
The total number of fields in the type. See the `fields` function of the
`dataclasses` package for more info.
"""
return len(dataclasses.fields(batch_type))
def n_required_fields(batch_type: Type) -> int:
"""Helper function, gives the number of required fields in the dataclass.
Parameters
----------
batch_type : Type
[description]
Returns
-------
int
The number of fields which don't have a default value or a default
factory and are required by the constructor (have init=True).
"""
# Need to figure out a way to get the number fields through the
# class itself.
fields = dataclasses.fields(batch_type)
required_fields_names = [
f.name
for f in fields
if f.default is dataclasses.MISSING and f.default_factory is dataclasses.MISSING and f.init
]
# print(f"class {batch_type}: required fields: {required_fields_names}")
return len(required_fields_names)
================================================
FILE: sequoia/common/transforms/to_tensor.py
================================================
""" Slight modification of the ToTensor transform from TorchVision.
@lebrice: I wrote this because I would often get weird 'negative stride in
images' errors when converting PIL images from some gym environments when
using `ToTensor` from torchvision.
"""
from collections.abc import Mapping
from dataclasses import dataclass
from functools import singledispatch
from typing import Dict, Sequence, Tuple, Union
import gym
import numpy as np
import torch
from gym import spaces
from PIL.Image import Image
from torch import Tensor
from torchvision.transforms import ToTensor as ToTensor_
from torchvision.transforms import functional as F
from sequoia.common.gym_wrappers.convert_tensors import add_tensor_support
from sequoia.common.spaces import NamedTupleSpace, TypedDictSpace
from sequoia.utils.logging_utils import get_logger
from .channels import channels_first_if_needed
from .transform import Img, Transform
logger = get_logger(__name__)
def copy_if_negative_strides(image: Img) -> Img:
# It sometimes happens when taking images from a gym env that the strides
# are negative, for some reason. Therefore we need to copy the array
# before we can call torchvision.transforms.functional.to_tensor(image).
if isinstance(image, Image):
image = np.array(image)
if isinstance(image, np.ndarray):
strides = image.strides
elif isinstance(image, Tensor):
strides = image.stride()
elif hasattr(image, "strides"):
strides = image.strides
else:
raise NotImplementedError(f"Can't get strides of object {image}")
if any(s < 0 for s in strides):
return image.copy()
return image
@singledispatch
def image_to_tensor(image: Union[Img, Sequence[Img], gym.Space]) -> Union[Tensor, gym.Space]:
"""
Converts a PIL Image or numpy.ndarray ((N) x H x W x C) in the range
[0, 255] to a torch.FloatTensor of shape ((N) x C x H x W) in the range
[0.0, 1.0] if the PIL Image belongs to one of the modes (L, LA, P, I, F,
RGB, YCbCr, RGBA, CMYK, 1) or if the numpy.ndarray has dtype = np.uint8
Parameters
----------
image : Union[Img, Sequence[Img]]
[description]
Returns
-------
Tensor
[description]
"""
raise NotImplementedError(f"Don't know how to convert {image} to a Tensor.")
# @image_to_tensor.register
# def _(image: Tensor) -> Tensor:
# return channels_first_if_needed(image)
@image_to_tensor.register(Tensor)
@image_to_tensor.register(np.ndarray)
@image_to_tensor.register(Image)
def _(image: Union[Image, np.ndarray]) -> Tensor:
"""Converts a PIL Image, or np.uint8 ndarray to a Tensor. Also reshapes it
to channels_first format (because ToTensor from torchvision does it also).
"""
from .channels import channels_first_if_needed
image = copy_if_negative_strides(image)
if len(image.shape) == 2:
return F.to_tensor(image)
if isinstance(image, np.ndarray):
# Convert to channels last if needed, because ToTensor expects to
# receive that.
image = channels_first_if_needed(image)
image = torch.from_numpy(image).contiguous()
# backward compatibility
if isinstance(image, torch.ByteTensor):
image = image.float().div(255)
return image
if len(image.shape) == 4:
return channels_first_if_needed(torch.stack(list(map(image_to_tensor, image))))
if not isinstance(image, Tensor):
image = F.to_tensor(image)
return channels_first_if_needed(image)
@image_to_tensor.register(list)
def _list_of_images_to_tensor(image: Sequence[Img]) -> Tensor:
return torch.stack(list(map(image_to_tensor, image)))
@image_to_tensor.register(tuple)
def _to_tensor_effect_on_image_shape(image: Tuple[int, ...]) -> Tuple[int, ...]:
"""Give the output shape given the input shape of an image."""
if len(image) == 3:
from .channels import channels_first_if_needed
return channels_first_if_needed(image)
return image
@image_to_tensor.register(spaces.Box)
def _(image: spaces.Box) -> spaces.Box:
if image.dtype == np.uint8:
# images get their bounds changed to [0. 1.] and their shape changed to
# channels_first.
image = type(image)(
low=0.0, high=1.0, shape=channels_first_if_needed(image.shape), dtype=np.float32
)
# TODO: it sometimes happens that the `image` space has already been
# through 'to_tensor`, not sure what to do in that case.
# elif not has_tensor_support(image):
# raise RuntimeError(f"image spaces should have dtype np.uint8!: {image}")
# Since the transform would convert images / ndarrays to tensors, then we
# add 'Tensor' support when applying the same transform on the Space of
# images!
image = add_tensor_support(image)
return image
@image_to_tensor.register(NamedTupleSpace)
def _(space: Dict, device: torch.device = None) -> Dict:
from .resize import is_image
return type(space)(
**{
key: image_to_tensor(value) if is_image(value) else value
for key, value in space.items()
},
dtype=space.dtype,
)
@image_to_tensor.register(Mapping)
@image_to_tensor.register(spaces.Dict)
def _space_with_images_to_tensor(space: Dict, device: torch.device = None) -> Dict:
from .resize import is_image
return type(space)(
**{
key: image_to_tensor(value) if is_image(value) else value
for key, value in space.items()
}
)
@image_to_tensor.register(TypedDictSpace)
def _space_with_images_to_tensor(
space: TypedDictSpace, device: torch.device = None
) -> TypedDictSpace:
from .resize import is_image
return type(space)(
{key: image_to_tensor(value) if is_image(value) else value for key, value in space.items()},
dtype=space.dtype,
)
# @image_to_tensor.register(Image)
# def to_tensor(image: Union[Img, Sequence[Img]]) -> Tensor:
# tensor: Tensor
# if isinstance(image, Tensor):
# return channels_first(image)
# return image
# # return channels_first(image)
# if isinstance(image, (list, tuple)) or (isinstance(image, np.ndarray) and image.ndim == 4):
# return torch.stack(list(map(to_tensor, image)))
# assert isinstance(image, (np.ndarray, Image))
# image = copy_if_negative_strides(image)
# if isinstance(image, np.ndarray):
# # Convert to channels last if needed, because ToTensor expects to
# # receive that.
# if len(image.shape) == 2:
# pass
# elif image.shape[-1] not in {1, 3}:
# assert image.shape[0] in {1, 3}, image.shape
# image = image.transpose(1, 2, 0)
# # image = channels_last(image)
# image = F.to_tensor(image)
# assert isinstance(image, Tensor), image.shape
# return image
@dataclass
class ToTensor(ToTensor_, Transform):
def __call__(self, image):
"""
Args:
image (PIL Image or numpy.ndarray): Image to be converted to tensor.
Returns:
Tensor: Converted image.
NOTE: torchvision's ToTensor transform assumes that whatever it is given
is always in channels_last format (as is usually the case with PIL
images) and always returns images with the channels *first*!
Converts a PIL Image or numpy.ndarray (H x W x C) in the range
[0, 255] to a torch.FloatTensor of shape (C x H x W) in the range
[0.0, 1.0] if the PIL Image belongs to one of the modes (L, LA, P,
I, F, RGB, YCbCr, RGBA, CMYK, 1) or if the numpy.ndarray has
dtype = np.uint8
"""
return image_to_tensor(image)
# @classmethod
# def shape_change(cls, input_shape: Union[Tuple[int, ...], torch.Size]) -> Tuple[int, ...]:
# from .channels import ChannelsFirstIfNeeded
# return ChannelsFirstIfNeeded.shape_change(input_shape)
# @classmethod
# def space_change(cls, input_space: gym.Space) -> gym.Space:
# if not isinstance(input_space, spaces.Box):
# logger.warning(UserWarning(f"Transform {cls} is only meant for Box spaces, not {input_space}"))
# return input_space
# return spaces.Box(
# low=0.,
# high=1.,
# shape=cls.shape_change(input_space.shape),
# dtype=np.float32,
# )
================================================
FILE: sequoia/common/transforms/transform.py
================================================
""" Defines a 'smarter' Transform class. """
from abc import abstractmethod
from typing import Generic, Tuple, TypeVar, Union, overload
import numpy as np
from gym import Space
from PIL.Image import Image
from torch import Tensor
InputType = TypeVar("InputType")
OutputType = TypeVar("OutputType")
Img = TypeVar("Img", Image, np.ndarray, Tensor)
Shape = TypeVar("Shape", bound=Tuple[int, ...])
class Transform(Generic[InputType, OutputType]):
"""Callable that can also tell you its impact on the shape of inputs."""
@overload
def __call__(self, input: InputType) -> OutputType:
...
@overload
def __call__(self, input: Shape) -> Shape:
...
@overload
def __call__(self, input: Space) -> Space:
...
@abstractmethod
def __call__(self, input: Union[InputType, Space, Shape]) -> Union[OutputType, Space, Shape]:
pass
================================================
FILE: sequoia/common/transforms/transform_enum.py
================================================
""" Transforms and such. Trying to make it possible to parse such from the
command-line.
Also, playing around with the idea of adding the ability to predict the change
in shape resulting from the transforms, à-la-Tensorflow.
"""
from enum import Enum
from typing import Any, Callable, List, Tuple, TypeVar, Union
import gym
import torch
from simple_parsing.helpers.serialization.encoding import encode
from torchvision.transforms import Compose as ComposeBase
from torchvision.transforms import RandomGrayscale
from sequoia.utils.logging_utils import get_logger
from sequoia.utils.serialization import decode
logger = get_logger(__name__)
from .channels import (
ChannelsFirst,
ChannelsFirstIfNeeded,
ChannelsLast,
ChannelsLastIfNeeded,
ThreeChannels,
)
from .resize import Resize
from .to_tensor import ToTensor
from .transform import Transform
# TODO: Add names to the dimensions in the transforms!
# from pl_bolts.models.self_supervised.simclr import (SimCLREvalDataTransform,
# SimCLRTrainDataTransform)
class Transforms(Enum):
"""Enum of possible transforms.
By having this as an Enum, we can choose which transforms to use from the
command-line.
This also makes it easier to check for identity, e.g. to check wether a
particular transform was used.
TODO: Add the SimCLR/MOCO/etc transforms from https://pytorch-lightning-bolts.readthedocs.io/en/latest/transforms.html
TODO: Figure out a way to let people customize the arguments to the transforms?
"""
three_channels = ThreeChannels()
to_tensor = ToTensor()
random_grayscale = RandomGrayscale()
channels_first = ChannelsFirst()
channels_first_if_needed = ChannelsFirstIfNeeded()
channels_last = ChannelsLast()
channels_last_if_needed = ChannelsLastIfNeeded()
resize_64x64 = Resize((64, 64))
resize_32x32 = Resize((32, 32))
def __call__(self, x):
return self.value(x)
@classmethod
def _missing_(cls, value: Any):
# called whenever performing something like Transforms[]
# with not being one of the enum values.
for e in cls:
if e.name == value:
return e
elif type(e.value) == type(value):
return e
return super()._missing_(value)
def shape_change(self, input_shape: Union[Tuple[int, ...], torch.Size]) -> Tuple[int, ...]:
raise NotImplementedError(f"TODO: Add shape (tuple) support to {self}")
if isinstance(self.value, Transform):
return self.value.shape_change(input_shape)
def space_change(self, input_space: gym.Space) -> gym.Space:
raise NotImplementedError(f"TODO: Add space support to {self}")
if isinstance(self.value, Transform):
return self.value.space_change(input_space)
T = TypeVar("T", bound=Callable)
class Compose(List[T], ComposeBase):
"""Extend the Compose class of torchvision with methods of `list`.
This can also be passed in members of the `Transforms` enum, which makes it
possible to do something like this:
>>> transforms = Compose([Transforms.to_tensor, Transforms.three_channels,])
>>> Transforms.three_channels in transforms
True
>>> transforms += [Transforms.resize_32x32]
>>> from pprint import pprint
>>> pprint(transforms)
[,
,
]
NEW: This Compose transform also applies on gym spaces:
>>> import numpy as np
>>> from gym.spaces import Box
>>> image_space = Box(0, 255, (28, 28, 1), dtype=np.uint8)
>>> transforms(image_space)
TensorBox(0.0, 1.0, (3, 32, 32), torch.float32)
"""
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
ComposeBase.__init__(self, transforms=self)
# def shape_change(self, input_shape: Union[Tuple[int, ...], torch.Size]) -> Tuple[int, ...]:
# for transform in self:
# if isinstance(transform, Transforms):
# transform = transform.value
# if isinstance(transform, Transform) or hasattr(transform, "shape_change"):
# input_shape = transform.shape_change(input_shape)
# else:
# logger.debug(
# f"Unable to detect the change of shape caused by "
# f"transform {transform}, assuming its output has same "
# f"shape as its input."
# )
# logger.debug(f"Final shape: {input_shape}")
# return input_shape
@encode.register
def encode_transforms(v: Transforms) -> str:
return v.name
@decode.register
def decode_transforms(v: str) -> Transforms:
return Transforms[v]
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: sequoia/common/transforms/transforms_test.py
================================================
from dataclasses import dataclass, field
from typing import List, Tuple
import gym
import numpy as np
import pytest
import torch
from gym import spaces
from sequoia.conftest import requires_pyglet
from sequoia.utils.serialization import Serializable
from . import Compose, Transforms
@pytest.mark.parametrize(
"transform,input_shape,output_shape",
[
## Channels first:
(Transforms.channels_first, (9, 9, 3), (3, 9, 9)),
# Check that the ordering doesn't get messed up:
(Transforms.channels_first, (9, 12, 3), (3, 9, 12)),
(Transforms.channels_first, (400, 600, 3), (3, 400, 600)),
# Axes get permuted even when the channels are already 'first'.
(Transforms.channels_first, (3, 12, 9), (9, 3, 12)),
## Channels first (if needed):
(Transforms.channels_first_if_needed, (9, 9, 3), (3, 9, 9)),
(Transforms.channels_first_if_needed, (9, 12, 3), (3, 9, 12)),
(Transforms.channels_first_if_needed, (400, 600, 3), (3, 400, 600)),
# Axes do NOT get permuted when the channels are already 'first'.
(Transforms.channels_first_if_needed, (3, 12, 9), (3, 12, 9)),
# Does nothing when the channel dim isn't in {1, 3}:
(Transforms.channels_first_if_needed, (7, 12, 13), (7, 12, 13)),
(Transforms.channels_first_if_needed, (7, 12, 123), (7, 12, 123)),
# when the input is 4-dimensional with batch size of 1 or 3, still works:
(Transforms.channels_first_if_needed, (1, 28, 12, 3), (1, 3, 28, 12)),
(Transforms.channels_first_if_needed, (1, 400, 600, 3), (1, 3, 400, 600)),
(Transforms.channels_first_if_needed, (1, 3, 28, 27), (1, 3, 28, 27)),
(Transforms.channels_first_if_needed, (3, 28, 12, 3), (3, 3, 28, 12)),
(Transforms.channels_first_if_needed, (3, 400, 600, 3), (3, 3, 400, 600)),
(Transforms.channels_first_if_needed, (3, 3, 28, 27), (3, 3, 28, 27)),
## Channels Last:
(Transforms.channels_last, (3, 9, 9), (9, 9, 3)),
# Check that the ordering doesn't get messed up:
(Transforms.channels_last, (3, 9, 12), (9, 12, 3)),
# Axes get permuted even when the channels are already 'last'.
(Transforms.channels_last, (5, 6, 1), (6, 1, 5)),
## Channels Last (if needed):
(Transforms.channels_last_if_needed, (3, 9, 9), (9, 9, 3)),
# Check that the ordering doesn't get messed up:
(Transforms.channels_last_if_needed, (3, 9, 12), (9, 12, 3)),
# Axes do NOT get permuted when the channels are already 'last':
(Transforms.channels_last_if_needed, (5, 6, 1), (5, 6, 1)),
(Transforms.channels_last_if_needed, (12, 13, 3), (12, 13, 3)),
# Test out the 'ThreeChannels' transform
(Transforms.three_channels, (7, 12, 13), (7, 12, 13)),
(Transforms.three_channels, (1, 28, 28), (3, 28, 28)),
(Transforms.three_channels, (28, 28, 1), (28, 28, 3)),
# Test out the 'Resize' transforms
(Transforms.resize_64x64, (3, 128, 128), (3, 64, 64)),
(Transforms.resize_64x64, (128, 128, 3), (64, 64, 3)),
(Transforms.resize_64x64, (3, 64, 64), (3, 64, 64)),
(Transforms.resize_64x64, (64, 64, 3), (64, 64, 3)),
(Transforms.resize_64x64, (3, 111, 128), (3, 64, 64)),
(Transforms.resize_64x64, (111, 128, 3), (64, 64, 3)),
],
)
def test_transform(transform: Transforms, input_shape, output_shape):
x = torch.rand(input_shape)
assert transform(x).shape == output_shape, transform
# Apply the transform onto the input shape directly:
assert transform(input_shape) == output_shape
input_space = spaces.Box(low=0, high=1, shape=input_shape)
output_space = spaces.Box(low=0, high=1, shape=output_shape)
# Apply the transform onto the input space directly:
actual_output_space = transform(input_space)
assert actual_output_space == output_space
# TODO: Test that serializing / deserializing the transforms works correctly.
@dataclass
class Foo(Serializable):
transforms: List[Transforms] = field(default_factory=list)
foo = Foo(transforms=[transform])
foo_ = Foo.loads_json(foo.dumps_json())
assert foo_ == foo
assert Compose(foo_.transforms)(x).shape == output_shape
assert Compose(foo_.transforms)(input_space) == output_space
@pytest.mark.parametrize(
"transform,input_shape,output_shape",
[
# NOTE: to_tensor also does the channels-first operation (because since the
# torchvision transform ToTensor does it, we do it also).
(Transforms.to_tensor, (9, 9, 3), (3, 9, 9)),
(Transforms.to_tensor, (3, 9, 9), (3, 9, 9)),
],
)
def test_to_tensor(transform: Transforms, input_shape, output_shape):
x = np.random.randint(0, 255, input_shape, dtype=np.uint8)
# x = PIL.Image.fromarray(x, mode="RGB")
y = transform(x)
assert y.shape == output_shape
assert transform(input_shape) == output_shape
assert isinstance(y, torch.Tensor)
input_space = spaces.Box(low=0, high=255, shape=input_shape, dtype=np.uint8)
output_space = spaces.Box(low=0, high=1, shape=output_shape, dtype=np.float32)
assert transform(input_space) == output_space
@pytest.mark.parametrize(
"transform, input_shape",
[
(Transforms.channels_last_if_needed, (7, 12, 13)),
],
)
def test_applying_transforms_on_weird_input_raises_error(
transform: Transforms, input_shape: Tuple[int, ...]
):
with pytest.raises(Exception):
transform(input_shape)
input_space = spaces.Box(low=0, high=255, shape=input_shape, dtype=np.uint8)
with pytest.raises(Exception):
transform(input_space)
with pytest.raises(Exception):
transform(input_space.sample())
def test_compose_applied_on_shape():
transform = Compose([Transforms.channels_first])
start_shape = (9, 9, 3)
x = transform(torch.rand(start_shape))
assert x.shape == (3, 9, 9)
assert x.shape == transform(start_shape)
assert x.shape == transform(start_shape) == (3, 9, 9)
import gym
from sequoia.common.gym_wrappers import PixelObservationWrapper, TransformObservation
@requires_pyglet
def test_channels_first_transform_on_gym_env():
env = gym.make("CartPole-v0")
env = PixelObservationWrapper(env)
assert env.reset().shape == (400, 600, 3)
transform = Compose(
[
Transforms.to_tensor,
Transforms.channels_first_if_needed,
]
)
env = TransformObservation(env, transform)
assert env.reset().shape == (3, 400, 600)
assert env.observation_space.shape == (3, 400, 600)
obs, *_ = env.step(env.action_space.sample())
assert obs.shape == (3, 400, 600)
def test_preserves_device_when_possible():
# TODO: Write a test that checks which transforms can be run on GPU, and checks
# that they preserve the `device` attribute of a space when it's applied on a space.
pass
================================================
FILE: sequoia/common/transforms/utils.py
================================================
from typing import Any
import numpy as np
from gym import spaces
from PIL import Image
from torch import Tensor
from sequoia.common.spaces.image import Image as ImageSpace
def is_image(v: Any) -> bool:
"""Returns wether the value is an Image, an image tensor, or an image
space.
"""
return (
isinstance(v, Image.Image)
or (isinstance(v, (Tensor, np.ndarray)) and len(v.shape) >= 3)
or isinstance(v, ImageSpace)
or isinstance(v, spaces.Box)
and len(v.shape) >= 3
)
================================================
FILE: sequoia/common.puml
================================================
@startuml common
!include gym.puml
' class List
package common {
abstract class Batch {}
package transforms as common.transforms {
enum Transforms {
to_tensor: ToTensor
three_channels: ThreeChannels
random_grayscale: RandomGrayscale
channels_first: ChannelsFirst
channels_last: ChannelsLast
resize_64x64: Resize
resize_32x32: Resize
...
}
abstract class Transform
class Compose extends torchvision.transforms.Compose {
}
}
package gym_wrappers as common.gym_wrappers {}
package spaces as common.spaces {}
}
@enduml
================================================
FILE: sequoia/conftest.py
================================================
import json
import logging
import sys
from pathlib import Path
from typing import Any, Iterable, List, Optional, Type, get_type_hints
import gym
import numpy as np
import pytest
from sequoia.common.config import Config
from sequoia.methods.trainer import TrainerConfig
from sequoia.settings import Method
from sequoia.settings.rl.envs import (
ATARI_PY_INSTALLED,
METAWORLD_INSTALLED,
MONSTERKONG_INSTALLED,
MTENV_INSTALLED,
MUJOCO_INSTALLED,
)
from sequoia.methods import AVALANCHE_INSTALLED, SB3_INSTALLED
# Prevent the collection of these modules if the requirements for them aren't installed.
collect_ignore = []
collect_ignore_glob = []
if not MONSTERKONG_INSTALLED:
collect_ignore.append("settings/rl/envs/monsterkong.py")
if not MUJOCO_INSTALLED:
collect_ignore.append("settings/rl/envs/mujoco")
if not AVALANCHE_INSTALLED:
collect_ignore.append("methods/avalanche_methods")
if not SB3_INSTALLED:
collect_ignore.append("methods/stable_baselines3_methods")
logger = logging.getLogger(__name__)
parametrize = pytest.mark.parametrize
xfail = pytest.mark.xfail
def xfail_param(*args, reason: str):
return pytest.param(*args, marks=pytest.mark.xfail(reason=reason))
def skip_param(*args, reason: str):
return pytest.param(*args, marks=pytest.mark.skip(reason=reason))
def skipif_param(condition, *args, reason: str):
return pytest.param(*args, marks=pytest.mark.skipif(condition, reason=reason))
@pytest.fixture(autouse=True)
def add_np(doctest_namespace):
doctest_namespace["np"] = np
@pytest.fixture()
def trainer_config(tmp_path_factory):
tmp_path = tmp_path_factory.mktemp("log_dir")
return TrainerConfig(
fast_dev_run=True,
# TODO: What if we don't have a GPU when testing?
# TODO: Parametrize with the distributed backend, skip param if no GPU?
distributed_backend="dp",
default_root_dir=tmp_path,
)
@pytest.fixture()
def config(tmp_path: Path):
# TODO: Set the results dir somehow with the value of this `tmp_path` fixture.
tmp_results_dir = tmp_path / "tmp_results"
tmp_results_dir.mkdir()
return Config(debug=True, seed=123, log_dir=tmp_results_dir)
@pytest.fixture(scope="session")
def session_config(tmp_path_factory: Path):
test_log_dir = tmp_path_factory.mktemp("test_log_dir")
# TODO: Set the results dir somehow with the value of this `tmp_path` fixture.
return Config(debug=True, seed=123, log_dir=test_log_dir)
def id_fn(params: Any) -> str:
"""Creates a 'name' for an execution of a parametrized test.
Args:
params (Dict): [description]
Returns:
str: [description]
"""
# if not params:
# return "default"
if isinstance(params, dict):
return json.dumps(params, sort_keys=True, separators=(",", ":"))
return str(params)
def get_all_dataset_names(method_class: Type[Method] = None) -> List[str]:
# When not given a method class, use the Method class (gives ALL the
# possible datasets).
method_class = method_class or Method
dataset_names: Iterable[List[str]] = map(
lambda s: list(s.available_datasets), method_class.get_applicable_settings()
)
return sorted(list(set(sum(dataset_names, []))))
def get_dataset_params(
method_type: Type[Method],
supported_datasets: List[str],
skip_unsuported: bool = True,
) -> List[str]:
all_datasets = get_all_dataset_names(method_type)
dataset_params = []
for dataset in all_datasets:
if dataset in supported_datasets:
dataset_params.append(dataset)
elif skip_unsuported:
dataset_params.append(skip_param(dataset, reason="Not supported yet"))
else:
dataset_params.append(xfail_param(dataset, reason="Not supported yet"))
return dataset_params
test_datasets_option_name: str = "datasets"
def pytest_addoption(parser):
parser.addoption("--slow", action="store_true", default=False)
parser.addoption(f"--{test_datasets_option_name}", action="store", nargs="*", default=[])
slow = pytest.mark.skipif(
"--slow" not in sys.argv,
reason="This test is slow so we only run it when necessary.",
)
def slow_param(*args):
"""Mark a parameter as 'slow', so it's only run when using the "--slow" flag."""
return pytest.param(*args, marks=slow)
def find_class_under_test(
module, function, name: str = "method", global_var_name: str = None
) -> Optional[Type]:
cls: Optional[Type] = None
module_name: str = module.__name__
function_name: str = function.__name__
type_hints = get_type_hints(function)
global_var_name = global_var_name or name.capitalize()
for k in [name, f"{name}_class", f"{name}_type"]:
cls = type_hints.get(k)
if cls:
logger.debug(
f"function {function_name} has annotation of type " f"{cls} for argument {k}."
)
break
if cls is None:
# Try to get the class to test from a global variable on the module.
cls = getattr(module, global_var_name, None)
logger.debug(
f"Test module {module_name} has a '{global_var_name}' gloval variable of type {cls}"
)
return cls
def parametrize_test_datasets(metafunc):
# We want to get these from inspecting the test function:
# The datasets to test on.
test_datasets: List[str] = []
default_test_datasets = ["mnist", "cifar10"]
func_param_name = "test_dataset"
global_var_names = ["test_datasets", "supported_datasets"]
if func_param_name not in metafunc.fixturenames:
return
module = metafunc.module
function = metafunc.function
module_name: str = module.__name__
function_name: str = function.__name__
# Get the test datasets from the command-line option.
datasets_from_command_line = metafunc.config.getoption(test_datasets_option_name)
if "ALL" in datasets_from_command_line:
method_class: Optional[Type[Method]] = find_class_under_test(
module,
function,
name="method",
)
test_datasets = get_all_dataset_names(method_class)
elif "NONE" in datasets_from_command_line:
test_datasets = [skip_param("?", reason="Set to skip, with command line arg.")]
elif datasets_from_command_line:
assert isinstance(datasets_from_command_line, list) and all(
isinstance(v, str) for v in datasets_from_command_line
)
# If any datasets were set, use them.
test_datasets = datasets_from_command_line
else:
# The default datasets to try are the ones specified at the global
# variable with name {module_test_datasets_name} in the module.
for global_var_name in global_var_names:
test_datasets = getattr(module, global_var_name, None)
if test_datasets is not None:
break
else:
logger.warning(
RuntimeWarning(
f"Test module {module_name} didn't specify a test_datasets "
f"global variable, defaulting to {default_test_datasets}"
)
)
test_datasets = default_test_datasets
test_datasets = sorted(test_datasets)
logger.info(
f"Parametrizing the '{func_param_name}' param of test "
f"{module_name} :: {function_name} with {test_datasets}."
)
metafunc.parametrize(func_param_name, test_datasets)
def pytest_generate_tests(metafunc):
"""Automatically Parametrize the tests.
TODO: Having some fun parametrizing tests automatically, but should check
that it's worth it, because otherwise it might make things too confusing.
"""
parametrize_test_datasets(metafunc)
class DummyEnvironment(gym.Env):
"""Dummy environment for testing.
The reward is how close to the target value the state (a counter) is. The
actions are:
0: keep the counter the same.
1: Increment the counter.
2: Decrement the counter.
"""
def __init__(self, start: int = 0, target: int = 5, max_value: int = None):
self.i = start
self.start = start
max_value = max_value if max_value is not None else target * 2
assert 0 <= target <= max_value
self.max_value = max_value
self.reward_range = (0, max_value)
self.action_space = gym.spaces.Discrete(n=3)
self.observation_space = gym.spaces.Discrete(n=max_value)
self.target = target
self.reward_range = (0, max(target, max_value - target))
self.done: bool = False
self._reset: bool = False
def step(self, action: int):
# The action modifies the state, producing a new state, and you get the
# reward associated with that transition.
if not self._reset:
raise RuntimeError("Need to reset before you can step.")
if action == 1:
self.i += 1
elif action == 2:
self.i -= 1
self.i %= self.max_value
done = self.i == self.target
reward = abs(self.i - self.target)
# print(self.i, reward, done, action)
return self.i, reward, done, {}
def reset(self):
self._reset = True
self.i = self.start
return self.i
def seed(self, seed: Optional[int]) -> List[int]:
seeds = []
seeds.append(self.observation_space.seed(seed))
seeds.append(self.action_space.seed(seed))
return seeds
monsterkong_required = pytest.mark.skipif(
not MONSTERKONG_INSTALLED, reason="monsterkong is required for this test."
)
def param_requires_monsterkong(*args):
return skipif_param(
not MONSTERKONG_INSTALLED,
*args,
reason="monsterkong is required for this parameter.",
)
atari_py_required = pytest.mark.skipif(
not ATARI_PY_INSTALLED, reason="atari_py is required for this test."
)
def param_requires_atari_py(*args):
return skipif_param(
not ATARI_PY_INSTALLED,
*args,
reason="atari_py is required for this parameter.",
)
mtenv_required = pytest.mark.skipif(not MTENV_INSTALLED, reason="mtenv is required for this test.")
def param_requires_mtenv(*args):
return skipif_param(
not MTENV_INSTALLED,
*args,
reason="mtenv is required for this parameter.",
)
# Metaworld needs mujoco
metaworld_required = pytest.mark.skipif(
not METAWORLD_INSTALLED, reason="metaworld is required for this test."
)
def param_requires_metaworld(*args):
return skipif_param(
not METAWORLD_INSTALLED,
*args,
reason="metaworld is required for this parameter.",
)
mujoco_required = pytest.mark.skipif(
not MUJOCO_INSTALLED, reason="mujoco-py is required for this test."
)
def param_requires_mujoco(*args):
return skipif_param(
not MUJOCO_INSTALLED,
*args,
reason="mujoco-py is required for this parameter.",
)
PYGLET_INSTALLED = False
try:
import pyglet
PYGLET_INSTALLED = True
except ImportError:
pass
requires_pyglet = pytest.mark.skipif(
not PYGLET_INSTALLED, reason="pyglet is required to render envs."
)
def param_requires_pyglet(*args):
return skipif_param(
not PYGLET_INSTALLED,
*args,
reason="pyglet is required to render envs.",
)
================================================
FILE: sequoia/experiments/__init__.py
================================================
""" Package that defines a list of "Experiments".
"""
from .experiment import Experiment
from .hpo_sweep import HPOSweep
================================================
FILE: sequoia/experiments/experiment.py
================================================
""" Module used for launching an Experiment (applying a Method to one or more
Settings).
"""
import os
import shlex
import sys
from dataclasses import dataclass
from inspect import isclass
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple, Type, Union
from simple_parsing import ArgumentParser, choice, mutable_field
from sequoia.common.config import Config, WandbConfig
from sequoia.methods import Method, get_all_methods
from sequoia.settings import Results, Setting, all_settings
from sequoia.settings.presets import setting_presets
from sequoia.utils import Parseable, Serializable, get_logger
from sequoia.utils.logging_utils import get_logger
logger = get_logger(__name__)
source_dir = Path(os.path.dirname(__file__))
def get_method_names() -> Dict[str, Type[Method]]:
all_methods = get_all_methods()
return {method.get_full_name(): method for method in all_methods}
@dataclass
class Experiment(Parseable, Serializable):
"""Applies a Method to an experimental Setting to obtain Results.
When the `setting` is not set, this will apply the chosen method on all of
its "applicable" settings. (i.e. all subclasses of its target setting).
When the `method` is not set, this will apply all applicable methods on the
chosen setting.
"""
# Which experimental setting to use. When left unset, will evaluate the
# provided method on all applicable settings.
setting: Optional[Union[Setting, Type[Setting]]] = choice(
{setting.get_name(): setting for setting in all_settings},
default=None,
type=str,
)
# Path to a json/yaml file containing preset options for the chosen setting.
# Can also be one of the key from the `setting_presets` dictionary,
# for convenience.
benchmark: Optional[Union[str, Path]] = None
# Which experimental method to use. When left unset, will evaluate all
# compatible methods on the provided setting.
method: Optional[Union[str, Method, Type[Method]]] = choice(get_method_names(), default=None)
# All the other configuration options, which are independant of the choice
# of Setting or of Method, go in this next dataclass here! For example,
# things like the log directory, wether Cuda is used, etc.
config: Config = mutable_field(Config)
wandb: Optional[WandbConfig] = None
def __post_init__(self):
if not (self.setting or self.method):
raise RuntimeError("One of `setting` or `method` must be set!")
# All settings have a unique name.
if isinstance(self.setting, str):
self.setting = get_class_with_name(self.setting, all_settings)
# Each Method also has a unique name.
if isinstance(self.method, str):
self.method = get_class_with_name(self.method, all_methods)
if self.benchmark:
# If the provided benchmark isn't a path, try to get the value from
# the `setting_presets` dict. If it isn't in the dict, raise an
# error.
if not Path(self.benchmark).is_file():
if self.benchmark in setting_presets:
self.benchmark = setting_presets[self.benchmark]
else:
raise RuntimeError(
f"Could not find benchmark '{self.benchmark}': it "
f"is neither a path to a file or a key of the "
f"`setting_presets` dictionary. \n\n"
f"Available presets: \n"
+ "\n".join(
f"- {preset_name}: \t{preset_file.relative_to(os.getcwd())}"
for preset_name, preset_file in setting_presets.items()
)
)
# Creating an experiment for the given setting, loaded from the
# config file.
# TODO: IDEA: Do the same thing for loading the Method?
logger.info(
f"Will load the options for the setting from the file " f"at path {self.benchmark}."
)
drop_extras = True
if self.setting is None:
logger.warn(
UserWarning(
f"You didn't specify which setting to use, so this will "
f"try to infer the correct type of setting to use from the "
f"contents of the file, which might not work!\n (Consider "
f"running this with the `--setting` option instead."
)
)
# Find the first type of setting that fits the given file.
drop_extras = False
self.setting = Setting
# Raise an error if any of the args in sys.argv would have been used
# up by the Setting, just to prevent any ambiguities.
try:
_, unused_args = self.setting.from_known_args()
except (ImportError, AssertionError) as exc:
# NOTE: An ImportError can occur here because of a missing OpenGL
# dependency, since when no arguments are passed, the default RL setting
# is created (cartpole with pixel observations), which requires a render
# wrapper to be added (which itself uses pyglet, which uses OpenGL).
logger.warning(RuntimeWarning(f"Unable to check for unused args: {exc}"))
# In this case, we just pretend that no arguments would have been used.
unused_args = sys.argv[1:]
ignored_args = list(set(sys.argv[1:]) - set(unused_args))
if ignored_args:
# TODO: This could also be trigerred if there were arguments
# in the method with the same name as some from the Setting.
raise RuntimeError(
f"Cannot pass command-line arguments for the Setting when "
f"loading a preset, since these arguments whould have been "
f"ignored when creating the setting of type {self.setting} "
f"anyway: {ignored_args}"
)
assert isclass(self.setting) and issubclass(self.setting, Setting)
# Actually load the setting from the file.
# TODO: Why isn't this using `load_benchmark`?
self.setting = self.setting.load(path=self.benchmark, drop_extra_fields=drop_extras)
self.setting.wandb = self.wandb
if self.method is None:
raise NotImplementedError(
f"For now, you need to specify a Method to use using the "
f"`--method` argument when loading the setting from a file."
)
if self.setting is not None and self.method is not None:
if not self.method.is_applicable(self.setting):
raise RuntimeError(
f"Method {self.method} isn't applicable to " f"setting {self.setting}!"
)
assert (
self.setting is None
or isinstance(self.setting, Setting)
or issubclass(self.setting, Setting)
)
assert (
self.method is None
or isinstance(self.method, Method)
or issubclass(self.method, Method)
)
@staticmethod
def run_experiment(
setting: Union[Setting, Type[Setting]],
method: Union[Method, Type[Method]],
config: Config,
argv: Union[str, List[str]] = None,
strict_args: bool = False,
) -> Results:
"""Launches an experiment, applying `method` onto `setting`
and returning the corresponding results.
This assumes that both `setting` and `method` are not None.
This always returns a single `Results` object.
If either `setting` or `method` are classes, then instances of these
classes from the command-line arguments `argv`.
If `strict_args` is True and there are leftover arguments (not consumed
by either the Setting or the Method), a RuntimeError is raised.
This then returns the result of `setting.apply(method)`.
Parameters
----------
argv : Union[str, List[str]], optional
List of command-line args. When not set, uses the contents of
`sys.argv`. Defaults to `None`.
strict_args : bool, optional
Wether to raise an error when encountering command-line arguments
that are unexpected by both the Setting and the Method. Defaults to
`False`.
Returns
-------
Results
"""
assert setting is not None and method is not None
assert isinstance(
setting, Setting
), f"TODO: Fix this, need to pass a wandb config to the Setting from the experiment!"
if not (isinstance(setting, Setting) and isinstance(method, Method)):
setting, method = parse_setting_and_method_instances(
setting=setting, method=method, argv=argv, strict_args=strict_args
)
assert isinstance(setting, Setting)
assert isinstance(method, Method)
assert isinstance(config, Config)
return setting.apply(method, config=config)
def launch(
self,
argv: Union[str, List[str]] = None,
strict_args: bool = False,
) -> Results:
"""Launches the experiment, applying `self.method` onto `self.setting`
and returning the corresponding results.
This differs from `main` in that this assumes that both `self.setting`
and `self.method` are not None, and so this always returns a single
`Results` object.
NOTE: Internally, this is equivalent to calling `run_experiment`,
passing in the `setting`, `method` and `config` arguments from `self`.
Parameters
----------
argv : Union[str, List[str]], optional
List of command-line args. When not set, uses the contents of
`sys.argv`. Defaults to `None`.
strict_args : bool, optional
Wether to raise an error when encountering command-line arguments
that are unexpected by both the Setting and the Method. Defaults to
`False`.
Returns
-------
Results
An object describing the results of applying Method `self.method` onto
the Setting `self.setting`.
"""
assert self.setting is not None
assert self.method is not None
assert self.config is not None
if not (isinstance(self.setting, Setting) and isinstance(self.method, Method)):
self.setting, self.method = parse_setting_and_method_instances(
setting=self.setting, method=self.method, argv=argv, strict_args=strict_args
)
assert isinstance(self.setting, Setting)
assert isinstance(self.method, Method)
self.setting.wandb = self.wandb
self.setting.config = self.config
return self.setting.apply(self.method, config=self.config)
@classmethod
def main(
cls,
argv: Union[str, List[str]] = None,
strict_args: bool = False,
) -> Union[Results, Tuple[Dict, Any], List[Tuple[Dict, Results]]]:
"""Launches one or more experiments from the command-line.
First, we get the choice of method and setting using a first parser.
Then, we parse the Setting and Method objects using the remaining args
with two other parsers.
Parameters
----------
- argv : Union[str, List[str]], optional, by default None
command-line arguments to use. When None (default), uses sys.argv.
Returns
-------
Union[Results,
Dict[Tuple[Type[Setting], Type[Method], Config], Results]]
Results of the experiment, if only applying a method to a setting.
Otherwise, if either of `--setting` or `--method` aren't set, this
will be a dictionary mapping from
(setting_type, method_type) tuples to Results.
"""
# TODO: Clean this up with the new command-line API.
if argv is None:
argv = sys.argv[1:]
if isinstance(argv, str):
argv = shlex.split(argv)
argv_copy = argv.copy()
experiment: Experiment
experiment, argv = cls.from_known_args(argv)
setting: Optional[Type[Setting]] = experiment.setting
method: Optional[Type[Method]] = experiment.method
config: Config = experiment.config
if method is None and setting is None:
raise RuntimeError(f"One of setting or method must be set.")
if setting and method:
# One 'job': Launch it directly.
results = experiment.launch(argv, strict_args=strict_args)
print("\n\n EXPERIMENT IS DONE \n\n")
print(f"Results: {results}")
return results
# TODO: Test out this other case. Haven't used it in a while.
# TODO: Move this to something like a BatchExperiment?
all_results = launch_batch_of_runs(setting=setting, method=method, argv=argv)
return all_results
def launch_batch_of_runs(
setting: Optional[Setting],
method: Optional[Method],
argv: Union[str, List[str]] = None,
) -> List[Tuple[Dict, Results]]:
if argv is None:
argv = sys.argv[1:]
if isinstance(argv, str):
argv = shlex.split(argv)
argv_copy = argv.copy()
experiment: Experiment
experiment, argv = Experiment.from_known_args(argv)
setting: Optional[Type[Setting]] = experiment.setting
method: Optional[Type[Method]] = experiment.method
config = experiment.config
# TODO: Maybe if everything stays exactly identical, we could 'cache'
# the results of some experiments, so we don't re-run them all the time?
all_results: Dict[Tuple[Type[Setting], Type[Method]], Results] = {}
# The lists of arguments for each 'job'.
method_types: List[Type[Method]] = []
setting_types: List[Type[Setting]] = []
run_configs: List[Config] = []
if setting:
logger.info(f"Evaluating all applicable methods on Setting {setting}.")
method_types = setting.get_applicable_methods()
setting_types = [setting for _ in method_types]
elif method:
logger.info(f"Applying Method {method} on all its applicable settings.")
setting_types = method.get_applicable_settings()
method_types = [method for _ in setting_types]
# Create a 'config' for each experiment.
# Use a log_dir for each run using the 'base' log_dir (passed
# when creating the Experiment), the name of the Setting, and
# the name of the Method.
for setting_type, method_type in zip(setting_types, method_types):
run_log_dir = config.log_dir / setting_type.get_name() / method_type.get_name()
run_config_kwargs = config.to_dict()
run_config_kwargs["log_dir"] = run_log_dir
run_config = Config(**run_config_kwargs)
run_configs.append(run_config)
arguments_of_each_run: List[Dict] = []
results_of_each_run: List[Result] = []
# Create one 'job' per setting-method combination:
for setting_type, method_type, run_config in zip(setting_types, method_types, run_configs):
# NOTE: Some methods might use all the values in `argv`, and some
# might not, so we set `strict=False`.
arguments_of_each_run.append(
dict(
setting=setting_type,
method=method_type,
config=run_config,
argv=argv,
strict_args=False,
)
)
# TODO: Use submitit or somethign like it, to run each of these in parallel:
# See https://github.com/lebrice/Sequoia/issues/87 for more info.
for run_arguments in arguments_of_each_run:
result = Experiment.run_experiment(**run_arguments)
logger.info(f"Results for arguments {run_arguments}: {result}")
results_of_each_run.append(result)
all_results = list(zip(arguments_of_each_run, results_of_each_run))
logger.info(f"All results: ")
for run_arguments, run_results in all_results:
print(f"Arguments: {run_arguments}")
print(f"Results: {run_results}")
return all_results
def parse_setting_and_method_instances(
setting: Union[Setting, Type[Setting]],
method: Union[Method, Type[Method]],
argv: Union[str, List[str]] = None,
strict_args: bool = False,
) -> Tuple[Setting, Method]:
# TODO: Should we raise an error if an argument appears both in the Setting
# and the Method?
parser = ArgumentParser(description=__doc__, add_dest_to_option_strings=False)
if not isinstance(setting, Setting):
assert issubclass(setting, Setting)
setting.add_argparse_args(parser)
if not isinstance(method, Method):
assert method is not None
assert issubclass(method, Method)
method.add_argparse_args(parser)
if strict_args:
args = parser.parse_args(argv)
else:
args, unused_args = parser.parse_known_args(argv)
if unused_args:
logger.warning(UserWarning(f"Unused command-line args: {unused_args}"))
if not isinstance(setting, Setting):
setting = setting.from_argparse_args(args)
if not isinstance(method, Method):
method = method.from_argparse_args(args)
return setting, method
def get_class_with_name(
class_name: str,
all_classes: Union[List[Type[Setting]], List[Type[Method]]],
) -> Union[Type[Method], Type[Setting]]:
potential_classes = [c for c in all_classes if c.get_name() == class_name]
# if target_class:
# potential_classes = [
# m for m in potential_classes
# if m.is_applicable(target_class)
# ]
if len(potential_classes) == 1:
return potential_classes[0]
if not potential_classes:
raise RuntimeError(
f"Couldn't find any classes with name {class_name} in the list of "
f"available classes {all_classes}!"
)
raise RuntimeError(
f"There are more than one potential methods with name "
f"{class_name}, which isn't supposed to happen! "
f"(all_classes: {all_classes})"
)
def check_has_descendants(potential_classes: List[Type[Method]]) -> List[bool]:
"""Returns a list where for each method in the list, check if it has
any descendants (subclasses of itself) also within the list.
"""
def _has_descendant(method: Type[Method]) -> bool:
"""For a given method, check if it has any descendants within
the list of potential methods.
"""
return any(
(issubclass(other_method, method) and other_method is not method)
for other_method in potential_classes
)
return [_has_descendant(method) for method in potential_classes]
def main():
logger.debug(
"Registered Settings: \n"
+ "\n".join(
f"- {setting.get_name()}: {setting} ({setting.get_path_to_source_file()})"
for setting in all_settings
)
)
logger.debug(
"Registered Methods: \n"
+ "\n".join(
f"- {method.get_name()}: {method} ({method.get_path_to_source_file()})"
for method in get_all_methods()
)
)
Experiment.main()
exit(0)
================================================
FILE: sequoia/experiments/experiment_test.py
================================================
import shlex
import sys
from pathlib import Path
from typing import Optional, Type
import pytest
from sequoia.common.config import Config
from sequoia.conftest import slow
from sequoia.methods import Method, get_all_methods
from sequoia.methods.method_test import key_fn
from sequoia.settings import Results, Setting, all_settings
from .experiment import Experiment, get_method_names
method_names = get_method_names()
@pytest.mark.xfail(
reason="@lebrice: I changed my mind on this. For example, it could make "
"sense to have multiple methods called 'baseline' when a new Setting needs "
"to create a new subclass of the BaseMethod or a new Method altogether."
)
def test_no_collisions_in_method_names():
methods = get_all_methods()
assert len(set(method.get_name() for method in methods)) == len(methods)
def test_no_collisions_in_setting_names():
assert len(set(setting.get_name() for setting in all_settings)) == len(all_settings)
def test_applicable_methods():
from sequoia.methods import BaseMethod
from sequoia.settings import TraditionalSLSetting
assert BaseMethod in TraditionalSLSetting.get_applicable_methods()
def mock_apply(self: Setting, method: Method, config: Config) -> Results:
# 1. Configure the method to work on the setting.
# method.configure(self)
# 2. Train the method on the setting.
# method.train(self)
# 3. Evaluate the method on the setting and return the results.
# return self.evaluate(method)
return type(method), type(self)
@pytest.fixture()
def set_argv_for_debug(monkeypatch):
monkeypatch.setattr(sys, "argv", shlex.split("main.py --debug --fast_dev_run"))
@pytest.fixture(params=sorted(get_all_methods(), key=str))
def method_type(request, monkeypatch, set_argv_for_debug):
method_class: Type[Method] = request.param
return method_class
@pytest.fixture(params=sorted(all_settings, key=key_fn))
def setting_type(request, monkeypatch, set_argv_for_debug):
setting_class: Type[Setting] = request.param
monkeypatch.setattr(setting_class, "apply", mock_apply)
for method_type in setting_class.get_applicable_methods():
pass
return setting_class
def test_experiment_from_args(
method_type: Optional[Type[Method]], setting_type: Optional[Type[Setting]]
):
"""Test that when parsing the 'Experiment' from the command-line, the
`setting` and `method` fields get set to the classes corresponding to their
names.
"""
# method = method_type.get_name()
method_name = [k for k, v in method_names.items() if v is method_type][0]
setting = setting_type.get_name()
if not method_type.is_applicable(setting_type):
pytest.skip(
msg=f"Skipping test since Method {method_type} isn't applicable on "
f"settings of type {setting_type}."
)
experiment = Experiment.from_args(f"--setting {setting} --method {method_name}")
assert experiment.method is method_type
assert experiment.setting is setting_type
def test_launch_experiment_with_constructor(
method_type: Optional[Type[Method]], setting_type: Optional[Type[Setting]]
):
if not method_type.is_applicable(setting_type):
pytest.skip(
msg=f"Skipping test since Method {method_type} isn't applicable on "
f"settings of type {setting_type}."
)
experiment = Experiment(method=method_type, setting=setting_type)
all_results = experiment.launch("--debug --fast_dev_run --batch_size 1")
assert all_results == (method_type, setting_type)
@slow
@pytest.mark.timeout(300)
def test_none_setting(method_type: Optional[Type[Method]], tmp_path: Path, monkeypatch):
"""Test that leaving the Setting unset runs on all applicable setting."""
method = method_type.get_name()
for setting_type in method_type.get_applicable_settings():
monkeypatch.setattr(setting_type, "apply", mock_apply)
all_results = Experiment.main(
f"--method {method} --debug --fast_dev_run " f"--log_dir {tmp_path}"
)
for setting_type in method_type.get_applicable_settings():
monkeypatch.setattr(setting_type, "apply", mock_apply)
result = all_results[(setting_type, method_type)]
assert result == (method_type, setting_type)
@slow
@pytest.mark.timeout(300)
def test_none_method(setting_type: Optional[Type[Setting]]):
"""Test that leaving the method unset runs all applicable methods on the
setting.
"""
setting = setting_type.get_name()
all_results = Experiment.main(f"--setting {setting} --debug --fast_dev_run --batch-size 1")
for method_type in setting_type.get_applicable_methods():
result = all_results[(setting_type, method_type)]
assert result == (method_type, setting_type)
# assert all_results == {
# method_type: (method_type, setting_type)
# for method_type in setting_type.get_applicable_methods()
# }
================================================
FILE: sequoia/experiments/hpo_sweep.py
================================================
import json
import shlex
import sys
from dataclasses import dataclass
from pathlib import Path
from typing import Dict, List, Optional, Tuple, Type, Union
from simple_parsing.helpers import choice
from sequoia.settings import Method, Results, Setting
from .experiment import Experiment, parse_setting_and_method_instances
@dataclass
class HPOSweep(Experiment):
"""Experiment which launches an HPO Sweep using Orion.
TODO: Maybe use this somewhere in main.py once we redesign the command-line API.
"""
# Path to a json file containing the orion-formatted search space dictionary.
# When `None` (by default), the result of `get_search_space` will be used instead.
search_space_path: Optional[Path] = None
# Path indicating where the pickle database will be loaded or be created.
database_path: Path = Path("orion_db.pkl")
# manual, unique identifier for this experiment. This should only really be used
# when launching multiple different experiments that involve the same method and
# the same exact setting configurations, but where some other aspect of the
# experiment is changed.
experiment_id: Optional[str] = None
# Maximum number of runs to perform.
max_runs: Optional[int] = 10
hpo_algorithm: str = choice(
{
"random": "random",
"bayesian": "BayesianOptimizer",
},
default="bayesian",
) # TODO: BayesianOptimizer does not support num > 1
def __post_init__(self):
super().__post_init__()
self.search_space: Dict = {}
if self.search_space_path:
with open(self.search_space_path, "r") as f:
self.search_space = json.load(f)
def launch(self, argv: Union[str, List[str]] = None, strict_args: bool = False):
"""Launch the experiment, using its attributes and possibly also using the
provided command-line arguments.
This differs from `Experiment.launch` in that this will actually launch a
sequence of runs.
Parameters
----------
argv : Union[str, List[str]], optional
[description], by default None
strict_args : bool, optional
[description], by default False
Returns
-------
[type]
[description]
"""
if not (isinstance(self.setting, Setting) and isinstance(self.method, Method)):
self.setting, self.method = parse_setting_and_method_instances(
setting=self.setting,
method=self.method,
argv=argv,
strict_args=strict_args,
)
assert isinstance(self.setting, Setting)
assert isinstance(self.method, Method)
self.setting.wandb = self.wandb
# TODO: IDEA: It could actually be really cool if we created a list of
# Experiment objects here, and just call their 'launch' methods in parallel,
# rather than do the sweep logic in the Method class!
best_params, best_objective = self.method.hparam_sweep(
self.setting,
search_space=self.search_space,
database_path=self.database_path,
experiment_id=self.experiment_id,
max_runs=self.max_runs,
hpo_algorithm=self.hpo_algorithm,
)
print(
"Best params:\n" + "\n".join(f"\t{key}: {value}" for key, value in best_params.items())
)
print(f"Best objective: {best_objective}")
return (best_params, best_objective)
@classmethod
def main(
cls,
argv: Union[str, List[str]] = None,
strict_args: bool = False,
) -> List[Tuple[Dict, Results]]:
"""Launches this experiment from the command-line.
First, we get the choice of method and setting using a first parser.
Then, we parse the Setting and Method objects using the remaining args.
Parameters
----------
- argv : Union[str, List[str]], optional, by default None
command-line arguments to use. When None (default), uses sys.argv.
Returns
-------
List[Tuple[Dict, Results]]
Best trial parameters and objective found during the sweep.
"""
if argv is None:
argv = sys.argv[1:]
if isinstance(argv, str):
argv = shlex.split(argv)
_ = argv.copy()
experiment: HPOSweep
experiment, argv = cls.from_known_args(argv)
setting: Optional[Type[Setting]] = experiment.setting
method: Optional[Type[Method]] = experiment.method
# config: Config = experiment.config
if method is None or setting is None:
raise RuntimeError("Both `--setting` and `--method` must be set to run a sweep.")
return experiment.launch(argv, strict_args=strict_args)
def main():
HPOSweep.main()
if __name__ == "__main__":
main()
================================================
FILE: sequoia/experiments/hpo_sweep_test.py
================================================
import random
import shlex
import sys
from pathlib import Path
from typing import Optional, Type
import pytest
from sequoia.common.config import Config
from sequoia.methods import Method, get_all_methods
from sequoia.methods.method_test import key_fn
from sequoia.methods.random_baseline import RandomBaselineMethod
from sequoia.settings import Results, Setting, all_settings
from sequoia.utils.serialization import Serializable
from .hpo_sweep import HPOSweep
class MockResults(Results):
def __init__(self, hparams):
self.haprams = hparams
self._objective = random.random()
@property
def objective(self) -> float:
return self._objective
def make_plots(self):
return {}
def to_log_dict(self, verbose: bool = False):
return {
"hparams": self.hparams.to_dict()
if isinstance(self.hparams, Serializable)
else self.hparams,
"objective": self.objective,
}
def summary(self):
return str(self.to_log_dict())
def mock_apply(self: Setting, method: Method, config: Config = None) -> Results:
# 1. Configure the method to work on the setting.
# method.configure(self)
# 2. Train the method on the setting.
# method.train(self)
# 3. Evaluate the method on the setting and return the results.
# return self.evaluate(method)
# assert False, method.hparams
return MockResults(getattr(method, "hparams", {}))
# return type(method), type(self)
@pytest.fixture()
def set_argv_for_debug(monkeypatch):
monkeypatch.setattr(sys, "argv", shlex.split("main.py --debug --fast_dev_run"))
@pytest.fixture(params=sorted(get_all_methods(), key=str))
def method_type(request, monkeypatch, set_argv_for_debug):
method_class: Type[Method] = request.param
return method_class
@pytest.fixture(params=sorted(all_settings, key=key_fn))
def setting_type(request, monkeypatch, set_argv_for_debug):
setting_class: Type[Setting] = request.param
monkeypatch.setattr(setting_class, "apply", mock_apply)
# TODO: Not sure what this was doing, but I think it was important that all methods
# get imported here.
for method_type in setting_class.get_applicable_methods():
pass
return setting_class
@pytest.mark.skip(reason="BUG: seems to make other tests hang, because of Orion's bug.")
def test_launch_sweep_with_constructor(
method_type: Optional[Type[Method]],
setting_type: Optional[Type[Setting]],
tmp_path: Path,
):
if not method_type.is_applicable(setting_type):
pytest.skip(
msg=f"Skipping test since Method {method_type} isn't applicable on settings of type {setting_type}."
)
if issubclass(method_type, RandomBaselineMethod):
pytest.skip(
"BUG: RandomBaselineMethod has a hparam space that causes the HPO algo to go into an infinite loop."
)
return
experiment = HPOSweep(
method=method_type,
setting=setting_type,
database_path=tmp_path / "debug.pkl",
config=Config(debug=True),
max_runs=3,
)
best_hparams, best_performance = experiment.launch(["--debug"])
assert best_hparams
assert best_performance
================================================
FILE: sequoia/main.py
================================================
"""Sequoia - The Research Tree
Used to run experiments, which consist in applying a Method to a Setting.
"""
from argparse import _SubParsersAction
from dataclasses import dataclass
from pathlib import Path
from typing import Optional, Type, Union
from simple_parsing import ArgumentParser
from simple_parsing.help_formatter import SimpleHelpFormatter
from simple_parsing.helpers import choice
import sequoia
from sequoia.common.config import Config
from sequoia.common.config.wandb_config import WandbConfig
from sequoia.methods import get_all_methods
from sequoia.settings import all_settings
from sequoia.settings.base import Method, Results, Setting
from sequoia.utils import get_logger
# TODO: Fix all the `get_logger` to use __name__ instead of __file__.
logger = get_logger(__name__)
def main():
"""Adds all command-line arguments, parses the args, and runs the selected action."""
parser = ArgumentParser(prog="sequoia", description=__doc__, add_dest_to_option_strings=False)
parser.add_argument(
"--version",
action="version",
version=sequoia.__version__,
help="Displays the installed version of Sequoia and exits.",
)
command_subparsers = parser.add_subparsers(
title="command",
dest="command",
description="Command to execute",
parser_class=ArgumentParser,
required=False,
)
add_run_command(command_subparsers)
add_sweep_command(command_subparsers)
add_info_command(command_subparsers)
args = parser.parse_args()
command: str = getattr(args, "command", None)
if command is None:
parser.print_help()
elif command == "run":
method_type: Type[Method] = args.method_type
setting_type: Type[Setting] = args.setting_type
method: Method = method_type.from_argparse_args(args)
setting: Setting = setting_type.from_argparse_args(args)
config: Config = args.config
# TODO: Make this a bit cleaner, current need to set this `wandb` config as a property on
# the setting. Could either subclass Config and add an Optional[WandbConfig] field, or just
# add it directly to the existing Config class.
wandb_config: WandbConfig = args.wandb
setting.wandb = wandb_config
run(setting=setting, method=method, config=config)
elif command == "sweep":
method_type: Type[Method] = args.method_type
setting_type: Type[Setting] = args.setting_type
method: Method = method_type.from_argparse_args(args)
setting: Setting = setting_type.from_argparse_args(args)
config: Config = args.config
# TODO: Fix this up a bit: Currently need to set this on the setting
wandb_config: WandbConfig = args.wandb
setting.wandb = wandb_config
sweep(setting=args.setting, method=method, config=args.config)
elif command == "info":
info(component=args.component)
def add_run_command(command_subparsers: _SubParsersAction) -> None:
run_parser = command_subparsers.add_parser(
"run",
description="Run an experiment on a given setting.",
help="Run an experiment on a given setting.",
add_dest_to_option_strings=False,
formatter_class=SimpleHelpFormatter,
)
run_parser.add_arguments(Config, dest="config")
run_parser.add_arguments(WandbConfig, dest="wandb")
add_args_for_settings_and_methods(run_parser)
def run(setting: Setting, method: Method, config: Config) -> Results:
"""Performs a single run, applying a method to a setting, and returns the results."""
logger.debug("Setting:")
# BUG: TypeError: __reduce_ex__() takes exactly one argument (0 given)
try:
logger.debug(setting.dumps_yaml())
except TypeError:
logger.debug(setting)
logger.debug("Config:")
logger.debug(config.dumps_yaml())
logger.debug("Method")
logger.debug(str(method))
results = setting.apply(method, config=config)
logger.debug("Results:")
logger.debug(results.summary())
return results
@dataclass
class SweepConfig(Config):
"""Configuration options for a HPO sweep."""
# Path indicating where the pickle database will be loaded or be created.
database_path: Path = Path("orion_db.pkl")
# manual, unique identifier for this experiment. This should only really be used
# when launching multiple different experiments that involve the same method and
# the same exact setting configurations, but where some other aspect of the
# experiment is changed.
experiment_id: Optional[str] = None
# Maximum number of runs to perform.
max_runs: Optional[int] = 10
# Which hyper-parameter optimization algorithm to use.
hpo_algorithm: str = choice(
{
"random": "random",
"bayesian": "BayesianOptimizer",
},
default="bayesian",
) # TODO: BayesianOptimizer does not support num > 1
def sweep(setting: Setting, method: Method, config: SweepConfig) -> Setting.Results:
"""Performs a Hyper-Parameter Optimization sweep, consisting in running the method
on the given setting, each run having a different set of hyper-parameters.
"""
print("Sweep!")
logger.debug("Setting:")
# BUG: TypeError: __reduce_ex__() takes exactly one argument (0 given)
try:
logger.debug(setting.dumps_yaml())
except TypeError:
logger.debug(setting)
logger.debug("Config:")
logger.debug(config.dumps_yaml())
logger.debug(f"Method: {method}")
# TODO: IDEA: It could actually be really cool if we created a list of
# Experiment objects here, and just call their 'launch' methods in parallel,
# rather than do the sweep logic in the Method class!
# TODO: Need to add these arguments again to the parser?
best_params, best_objective = method.hparam_sweep(
setting,
database_path=config.database_path,
experiment_id=config.experiment_id,
max_runs=config.max_runs,
hpo_algorithm=config.hpo_algorithm,
)
logger.info(
"Best params:\n" + "\n".join(f"\t{key}: {value}" for key, value in best_params.items())
)
logger.info(f"Best objective: {best_objective}")
return (best_params, best_objective)
def add_sweep_command(command_subparsers: _SubParsersAction) -> None:
sweep_parser = command_subparsers.add_parser(
"sweep",
description="Run a hyper-parameter optimization sweep.",
help="Run a hyper-parameter optimization sweep.",
add_dest_to_option_strings=False,
)
sweep_parser.set_defaults(action=sweep)
sweep_parser.add_arguments(SweepConfig, dest="config")
add_args_for_settings_and_methods(sweep_parser)
def add_info_command(command_subparsers: _SubParsersAction) -> None:
"""Add commands to display some information about the settings or methods."""
info_parser = command_subparsers.add_parser(
"info",
# NOTE: Not 100% sure what the difference is between help and description.
description="Displays some information about a Setting or Method.",
help="Displays some information about a Setting or Method.",
add_dest_to_option_strings=False,
)
info_parser.set_defaults(**{"component": None})
info_parser.set_defaults(action=lambda namespace: info(namespace.component))
component_subparser = info_parser.add_subparsers(
title="component",
dest="component",
description="Setting or Method to display more information about.",
help="heyo",
required=False,
)
for setting in all_settings:
setting_name = setting.get_name()
component_parser: ArgumentParser = component_subparser.add_parser(
name=setting_name,
description=f"Show more info about the {setting_name} setting.",
help=get_help(setting),
add_dest_to_option_strings=False,
)
component_parser.set_defaults(**{"component": setting})
for method in get_all_methods():
method_name = method.get_full_name()
component_parser: ArgumentParser = component_subparser.add_parser(
name=method_name,
description=f"Show more info about the {method_name} method.",
help=get_help(method),
add_dest_to_option_strings=False,
)
component_parser.set_defaults(**{"component": method})
def info(component: Union[Type[Setting], Type[Method]] = None) -> None:
"""Prints some info about a given component (method class or setting class), or
prints the list of available settings and methods.
"""
if component is None:
from sequoia.utils.readme import get_tree_string
print(get_tree_string())
# print("Registered Settings:")
# for setting in all_settings:
# print(f"- {setting.get_name()}: {setting.get_path_to_source_file()}")
print()
print("Registered Methods:")
print()
for method in get_all_methods():
src = method.get_path_to_source_file()
print(f"- {method.get_full_name()}: {src}")
else:
# IDEA: Could colorize the tree with red or green depending on if the method is
# applicable to the setting or not!
help(component)
def get_help(component: Type[Setting]) -> str:
"""Returns the string to be passed as the 'help' argument to the parser."""
# todo
docstring = component.__doc__
if not docstring:
docstring = f"Help for class {component.__name__} (missing docstring)"
# IDEA: Get the first two sentences, or a shortened version of the docstring,
# whichever one is shorter.
first_two_sentences = ". ".join(docstring.split(".")[:2]) + "."
# shortened_docstring = textwrap.shorten(docstring, 150)
# return min(shortened_docstring, first_two_sentences, key=len) + "(help)"
# NOTE: Seems to be nicer in general to have two whole sentences, even if they are a bit longer.
return first_two_sentences
# def get_description(command: str, setting: Type[Setting], method: Type[Method] = None) -> str:
# """ Returns the text to be displayed right under the "usage" line in the command-line
# when either
# `sequoia run --help`
# or
# `sequoia run --help` is invoked.
# """
# if command == "run":
# if method is not None:
# return f"Run an experiment consisting of applying method {method.get_full_name()} on the {setting.get_name()} setting. (desc.)"
# else:
# return f"Run an experiment in the {setting.get_name()} setting. (desc.)"
def add_args_for_settings_and_methods(command_subparser: ArgumentParser):
"""Adds a subparser for each Setting class and method subparsers for each of those.
NOTE: Only adds subparsers for setting classes that have a non-empty 'available_datasets'
attribute, so that choosing `Setting`, `SLSetting` or `RLSetting` isn't an option.
This is used by the `sequoia run` and `sequoia sweep` commands.
"""
# ===== RUN ========
setting_subparsers = command_subparser.add_subparsers(
title="setting_choice",
description="choice of experimental setting",
dest="setting_type",
metavar="",
required=True,
)
def key_fn(setting_class: Type[Setting]):
return (
len(setting_class.parents()),
setting_class.__name__,
)
# Sort the settings so the actions come up in a nice order.
for setting in sorted(all_settings, key=key_fn):
setting_name = setting.get_name()
# IDEA:
if not getattr(setting, "available_datasets", {}):
# Don't add a parser for this setitng, since it has no available datasets.
# e.g.: Setting, SL, RL
continue
setting_parser: ArgumentParser = setting_subparsers.add_parser(
setting_name,
help=get_help(setting),
description=f"Run an experiment in the {setting.get_name()} setting.",
add_dest_to_option_strings=False,
formatter_class=SimpleHelpFormatter,
)
setting_parser.set_defaults(**{"setting_type": setting})
# NOTE: By removing the `dest` argument to `add_argparse_args, we're moving the place where
# the setting's values are stored from 'setting' to `camel_case(setting_class.__name__).
# Alternative would be to just assume that the settings are dataclasses and add arguments
# for the setting at destination 'setting' as before.
setting.add_argparse_args(parser=setting_parser)
# setting_parser.add_arguments(setting, dest="setting")
method_subparsers = setting_parser.add_subparsers(
title="method",
dest="method_name",
metavar="",
description=f"which method to apply to the {setting_name} Setting.",
required=True,
)
for method in setting.get_applicable_methods():
method_name = method.get_full_name()
method_parser: ArgumentParser = method_subparsers.add_parser(
method_name,
help=get_help(method),
description=(
f"Run an experiment where the {method_name} method is "
f"applied to the {setting.get_name()} setting."
),
formatter_class=SimpleHelpFormatter,
)
method_parser.set_defaults(method_type=method)
# TODO: Could also pass the setting to the method's `add_argparse_args` so
# that it gets to change its default values!
# method.add_argparse_args_for_setting(
# parser=method_parser, setting=setting,
# )
method.add_argparse_args(parser=method_parser)
if __name__ == "__main__":
main()
================================================
FILE: sequoia/methods/README.md
================================================
# Sequoia - Methods
### Adding a new Method:
#### Prerequisites:
**- First, please take a look at the [examples](examples/)**
#### Steps:
1. Choose a target setting from the tree (See the "Available Settings" section below).
2. Create a new subclass of [`Method`](settings/base/bases.py), with the chosen target setting.
Your class should implement the following methods:
- `fit(train_env, valid_env)`
- `get_actions(observations, action_space) -> Actions`
The following methods are optional, but can be very useful to help customize how your method is used at train/test time:
- `configure(setting: Setting)`
- `on_task_switch(task_id: Optional[int])`
- `test(test_env)`
```python
class MyNewMethod(Method, target_setting=ClassIncrementalSetting):
... # Your code here.
def fit(self, train_env: DataLoader, valid_env: DataLoader):
# Train your model however you want here.
self.trainer.fit(
self.model,
train_dataloader=train_env,
val_dataloaders=valid_env,
)
def get_actions(self,
observations: Observations,
observation_space: gym.Space) -> Actions:
# Return an "Action" (prediction) for the given observations.
# Each Setting has its own Observations, Actions and Rewards types,
# which are based on those of their parents.
return self.model.predict(observations.x)
def on_task_switch(self, task_id: Optional[int]):
#This method gets called if task boundaries are known in the current
#setting. Furthermore, if task labels are available, task_id will be
# the index of the new task. If not, task_id will be None.
# For example, you could do something like this:
self.model.current_output_head = self.model.output_heads[task_id]
```
3. Running / Debugging your method:
(at the bottom of your script, for example)
```python
if __name__ == "__main__":
## 1. Create the setting you want to apply your method on.
# First option: Create the Setting directly in code:
setting = ClassIncrementalSetting(dataset="cifar10", nb_tasks=5)
# Second option: Create the Setting from the command-line:
setting = ClassIncrementalSetting.from_args()
## 2. Create your Method, however you want.
my_method = MyNewMethod()
## 3. Apply your method on the setting to obtain results.
results = setting.apply(my_method)
# Optionally, display the results.
print(results.summary())
results.make_plots()
```
4. (WIP): Adding your new method to the tree:
- Place the script/package that defines your Method inside of the `methods` folder.
- Add the `@register_method` decorator to your Method definition, for example:
```python
from sequoia.methods import register_method
@register_method
class MyNewMethod(Method, target_setting=ClassIncrementalSetting):
name: ClassVar[str] = "my_new_method"
...
```
- To launch an experiment using your method, run the following command:
```console
python main.py --setting --method my_new_method
```
To customize how your method gets created from the command-line, override the two following class methods:
- `add_argparse_args(cls, parser: ArgumentParser)`
- `from_argparse_args(cls, args: Namespace) -> Method`
- Create a `_test.py` file next to your method script. In it, write unit tests for every module/component used in your Method. Have them be easy to read so people can ideally understand how the components of your Method work by simply reading the tests.
- (WIP) To run the unittests locally, use the following command: `pytest methods/my_new_method_test.py`
- Then, write a functional test that demonstrates how your new method should behave, and what kind of results it expects to produce. The easiest way to do this is to implement a `validate_results(setting: Setting, results: Results)` method.
- (WIP) To debug/run the "integration tests" locally, use the following command: `pytest -x methods/my_new_method_test.py --slow`
- Create a Pull Request, and you're good to go!
## Registered Methods (so far):
- ## [BaseMethod](sequoia/methods/base_method.py)
- Target setting: [Setting](sequoia/settings/base/setting.py)
Versatile Baseline method which targets all settings.
Uses pytorch-lightning's Trainer for training and a LightningModule as a model.
Uses a [BaseModel](methods/models/base_model/base_model.py), which
can be used for:
- Self-Supervised training with modular auxiliary tasks;
- Semi-Supervised training on partially labeled batches;
- Multi-Head prediction (e.g. in task-incremental scenario);
- ## [RandomBaselineMethod](sequoia/methods/random_baseline.py)
- Target setting: [Setting](sequoia/settings/base/setting.py)
Baseline method that gives random predictions for any given setting.
This method doesn't have a model or any parameters. It just returns a random
action for every observation.
- ## [pnn.PnnMethod](sequoia/methods/pnn/pnn_method.py)
- Target setting: [IncrementalAssumption](sequoia/settings/assumptions/incremental.py)
PNN Method.
Applicable to both RL and SL Settings, as long as there are clear task boundaries
during training (IncrementalAssumption).
- ## [avalanche.AGEMMethod](sequoia/methods/avalanche/agem.py)
- Target setting: [ContinualSLSetting](sequoia/settings/sl/continual/setting.py)
Average Gradient Episodic Memory (AGEM) strategy from Avalanche.
See AGEM plugin for details.
This strategy does not use task identities.
See the parent class `AvalancheMethod` for the other hyper-parameters and methods.
- ## [avalanche.AR1Method](sequoia/methods/avalanche/ar1.py)
- Target setting: [ContinualSLSetting](sequoia/settings/sl/continual/setting.py)
AR1 strategy from Avalanche.
See AR1 plugin for details.
This strategy does not use task identities.
See the parent class `AvalancheMethod` for the other hyper-parameters and methods.
- ## [avalanche.CWRStarMethod](sequoia/methods/avalanche/cwr_star.py)
- Target setting: [ContinualSLSetting](sequoia/settings/sl/continual/setting.py)
CWRStar strategy from Avalanche.
See CWRStar plugin for details.
This strategy does not use task identities.
See the parent class `AvalancheMethod` for the other hyper-parameters and methods.
- ## [avalanche.EWCMethod](sequoia/methods/avalanche/ewc.py)
- Target setting: [ContinualSLSetting](sequoia/settings/sl/continual/setting.py)
Elastic Weight Consolidation (EWC) strategy from Avalanche.
See EWC plugin for details.
This strategy does not use task identities.
See the parent class `AvalancheMethod` for the other hyper-parameters and methods.
- ## [avalanche.GEMMethod](sequoia/methods/avalanche/gem.py)
- Target setting: [ContinualSLSetting](sequoia/settings/sl/continual/setting.py)
Gradient Episodic Memory (GEM) strategy from Avalanche.
See GEM plugin for details.
This strategy does not use task identities.
See the parent class `AvalancheMethod` for the other hyper-parameters and methods.
- ## [avalanche.GDumbMethod](sequoia/methods/avalanche/gdumb.py)
- Target setting: [ContinualSLSetting](sequoia/settings/sl/continual/setting.py)
GDumb strategy from Avalanche.
See GDumbPlugin for more details.
This strategy does not use task identities.
See the parent class `AvalancheMethod` for the other hyper-parameters and methods.
- ## [avalanche.LwFMethod](sequoia/methods/avalanche/lwf.py)
- Target setting: [ContinualSLSetting](sequoia/settings/sl/continual/setting.py)
Learning without Forgetting strategy from Avalanche.
See LwF plugin for details.
This strategy does not use task identities.
See the parent class `AvalancheMethod` for the other hyper-parameters and methods.
- ## [avalanche.ReplayMethod](sequoia/methods/avalanche/replay.py)
- Target setting: [ContinualSLSetting](sequoia/settings/sl/continual/setting.py)
Replay strategy from Avalanche.
See Replay plugin for details.
This strategy does not use task identities.
See the parent class `AvalancheMethod` for the other hyper-parameters and methods.
- ## [avalanche.SynapticIntelligenceMethod](sequoia/methods/avalanche/synaptic_intelligence.py)
- Target setting: [ContinualSLSetting](sequoia/settings/sl/continual/setting.py)
The Synaptic Intelligence strategy from Avalanche.
This is the Synaptic Intelligence PyTorch implementation of the
algorithm described in the paper
"Continuous Learning in Single-Incremental-Task Scenarios"
(https://arxiv.org/abs/1806.08568)
The original implementation has been proposed in the paper
"Continual Learning Through Synaptic Intelligence"
(https://arxiv.org/abs/1703.04200).
The Synaptic Intelligence regularization can also be used in a different
strategy by applying the :class:`SynapticIntelligencePlugin` plugin.
See the parent class `AvalancheMethod` for the other hyper-parameters and methods.
- ## [sb3.A2CMethod](sequoia/methods/stable_baselines3_methods/a2c.py)
- Target setting: [ContinualRLSetting](sequoia/settings/rl/continual/setting.py)
Method that uses the A2C model from stable-baselines3.
- ## [sb3.DQNMethod](sequoia/methods/stable_baselines3_methods/dqn.py)
- Target setting: [ContinualRLSetting](sequoia/settings/rl/continual/setting.py)
Method that uses a DQN model from the stable-baselines3 package.
- ## [sb3.DDPGMethod](sequoia/methods/stable_baselines3_methods/ddpg.py)
- Target setting: [ContinualRLSetting](sequoia/settings/rl/continual/setting.py)
Method that uses the DDPG model from stable-baselines3.
- ## [sb3.TD3Method](sequoia/methods/stable_baselines3_methods/td3.py)
- Target setting: [ContinualRLSetting](sequoia/settings/rl/continual/setting.py)
Method that uses the TD3 model from stable-baselines3.
- ## [sb3.SACMethod](sequoia/methods/stable_baselines3_methods/sac.py)
- Target setting: [ContinualRLSetting](sequoia/settings/rl/continual/setting.py)
Method that uses the SAC model from stable-baselines3.
- ## [sb3.PPOMethod](sequoia/methods/stable_baselines3_methods/ppo.py)
- Target setting: [ContinualRLSetting](sequoia/settings/rl/continual/setting.py)
Method that uses the PPO model from stable-baselines3.
- ## [EwcMethod](sequoia/methods/ewc_method.py)
- Target setting: [IncrementalAssumption](sequoia/settings/assumptions/incremental.py)
Subclass of the BaseMethod, which adds the EWCTask to the `BaseModel`.
This Method is applicable to any CL setting (RL or SL) where there are clear task
boundaries, regardless of if the task labels are given or not.
- ## [ExperienceReplayMethod](sequoia/methods/experience_replay.py)
- Target setting: [IncrementalSLSetting](sequoia/settings/sl/incremental/setting.py)
Simple method that uses a replay buffer to reduce forgetting.
- ## [HatMethod](sequoia/methods/hat.py)
- Target setting: [TaskIncrementalSLSetting](sequoia/settings/sl/task_incremental/setting.py)
Hard Attention to the Task
```
@inproceedings{serra2018overcoming,
title={Overcoming Catastrophic Forgetting with Hard Attention to the Task},
author={Serra, Joan and Suris, Didac and Miron, Marius and Karatzoglou, Alexandros},
booktitle={International Conference on Machine Learning},
pages={4548--4557},
year={2018}
}
```
================================================
FILE: sequoia/methods/__init__.py
================================================
""" Methods: solutions to research problems (Settings).
Methods contain the logic related to the training of the algorithm. Methods are
encouraged to use a model to keep the networks / architecture / engineering code
separate from the training loop.
Sequoia includes a `BaseMethod`, along with an accompanying `Model`, which can be
used as a jumping-off point for new users.
You're obviously also free to write your own method/model from scratch if you want!
The recommended way to start is by creating a new subclass of the Base
The best way to do so is to create your new model as a subclass of the `Model`,
which already has some neat capabilities, and can easily be extended/customized.
This `Model` is an instance of Pytorch-Lightning's `LightningModule` class, and can be
trained on the environments/dataloaders of Sequoia with a `pl.Trainer`, enabling all the
goodies associated with Pytorch-Lightning.
You can also easily add callbacks to measure your own metrics and such as you would in
Pytorch-Lightning.
"""
import glob
import inspect
import os
import warnings
from functools import lru_cache
from importlib import import_module
from os.path import abspath, basename, dirname, isfile, join
from pathlib import Path
from typing import Dict, List, Type
import pkg_resources
from pkg_resources import EntryPoint
from setuptools import find_packages
from sequoia.settings.base import Method
from sequoia.utils.logging_utils import get_logger
logger = get_logger(__name__)
AbstractMethod = Method
_registered_methods: List[Type[Method]] = []
"""
TODO: IDEA: Add arguments to register_method that help configure the tests we
add the that method! E.g.:
```
@register_method(slow=True, requires_cuda=True, required_memory_gb=4)
class MyMethod(Method, target_setting=ContinualRLSetting):
...
```
"""
def register_method(
method_class: Type[Method] = None, *, name: str = None, family: str = None
) -> Type[Method]:
"""Decorator around a method class, which is used to register the method.
Can set the name of the method as well as the family when they are passed, and also
adds the Method to the list of registered methods.
"""
def _register_method(
method_class: Type[Method] = None, *, name: str = None, family: str = None
) -> Type[Method]:
if name is not None:
method_class.name = name
if family is not None:
method_class.family = family
if not issubclass(method_class, Method):
raise TypeError(
"The `register_method` decorator should only be used on subclasses of " "`Method`."
)
if method_class not in _registered_methods:
_registered_methods.append(method_class)
return method_class
# This is based on `dataclasses.dataclass`:
def wrap(method_class: Type[Method]) -> Type[Method]:
return _register_method(method_class, name=name, family=family)
# See if we're being called as @register_method or @register_method().
if method_class is None:
# We're called with parens.
return wrap
# We're called as @register_method without parens.
return wrap(method_class)
from .base_method import BaseMethod, BaseModel
from .ewc_method import EwcMethod
from .experience_replay import ExperienceReplayMethod
from .hat import HatMethod
from .pnn import PnnMethod
from .random_baseline import RandomBaselineMethod
@lru_cache(1)
def get_external_methods() -> Dict[str, Type[Method]]:
"""Returns a dictionary of the Methods defined outside of Sequoia.
Packages outside of Sequoia can register methods by putting a `Method` entry-point
in their setup.py, like so:
```python
# (inside /setup.py)
setup(
name="my_package",
packages=setuptools.find_packages(include=["cn_dpm*"])
...
entry_points={
"Method": [
"foo_method = my_package.my_methods.foo_method:FooMethod",
"bar_method = my_package.my_methods.bar_method:BarMethod",
],
},
)
```
Compared with using the `@register_method` decorator, this has the benefit that the
module containing the Method does not need to be imported/"live" for the method to
be available. This is very relevant when using Sequoia through the command-line, for
instance, since Sequoia would have no way of knowing what other methods are
available:
```console
sequoia setting foo_setting method foo_method
```
"""
methods: Dict[str, Type[Method]] = {}
for entry_point in pkg_resources.iter_entry_points("Method"):
entry_point: EntryPoint
try:
method_class = entry_point.load()
except Exception as exc:
logger.error(
f"Unable to load external Method: '{entry_point.name}', from package "
f"{entry_point.dist.project_name}, version={entry_point.dist.version}: "
f"{exc}"
)
else:
logger.debug(
f"Imported an external Method: '{entry_point.name}', from package "
f"{entry_point.dist.project_name}, (version = {entry_point.dist.version})."
)
methods[entry_point.name] = method_class
return methods
# Keeping a pointer to the old name, just to help with backward-compatibility a bit.
BaselineMethod = BaseMethod
# TODO: Eventually these could become external repos, with their own tests / etc, based
# on a 'cookiecutter' repo of some sort. This would make it easier to maintain and to
# delegate work!
# IDEA: Could also do the same for the datasets somehow? Like have an extendable
# `sequoia.datasets` cookiecutter repo? How would that work with Settings?
# Assumption + Assumption -> Assumption (combined)
# Setting := fn(dataset, **kwargs) -> Callable[[Method], Results]
AVALANCHE_INSTALLED = False
try:
from avalanche.training.strategies import BaseStrategy # type: ignore
AVALANCHE_INSTALLED = True
except ImportError:
pass
if AVALANCHE_INSTALLED:
from sequoia.methods.avalanche_methods import *
SB3_INSTALLED = False
try:
import stable_baselines3
SB3_INSTALLED = True
except ImportError:
pass
if SB3_INSTALLED:
from sequoia.methods.stable_baselines3_methods import *
try:
from sequoia.methods.pl_bolts_methods import *
except ImportError:
pass
def add_external_methods(all_methods: List[Type[Method]]) -> List[Type[Method]]:
for name, method_class in get_external_methods().items():
if method_class not in all_methods:
logger.debug(f"Adding method {name} from external package.")
all_methods.append(method_class)
return all_methods
def get_all_methods() -> List[Type[Method]]:
# This may change over time, and includes ALL subclasses of 'Method'.
# methods = Method.__subclasses__()
# This includes all registered methods, e.g. not any base classes.
methods = _registered_methods
methods = add_external_methods(methods)
methods = list(set(methods))
return list(sorted(methods, key=lambda method: method.get_full_name()))
================================================
FILE: sequoia/methods/aux_tasks/__init__.py
================================================
from .auxiliary_task import AuxiliaryTask
from .ewc import EWCTask
from .reconstruction import AEReconstructionTask, VAEReconstructionTask
from .transformation_based import RotationTask
VAE: str = VAEReconstructionTask.name
AE: str = AEReconstructionTask.name
EWC: str = EWCTask.name
================================================
FILE: sequoia/methods/aux_tasks/auxiliary_task.py
================================================
import typing
from abc import abstractmethod
from dataclasses import dataclass
from typing import Callable, ClassVar, Dict, Optional, Tuple
import torch
from pytorch_lightning import LightningModule
from torch import Tensor, nn
from sequoia.common.hparams import HyperParameters, uniform
from sequoia.common.loss import Loss
if typing.TYPE_CHECKING:
from sequoia.methods.models.base_model import Model
class AuxiliaryTask(nn.Module):
"""Represents an additional loss to apply to a `Classifier`.
The main logic should be implemented in the `get_loss` method.
In general, it should apply some deterministic transformation to its input,
and treat that same transformation as a label to predict.
That loss should be backpropagatable through the feature extractor (the
`encoder` attribute).
"""
name: ClassVar[str] = ""
input_shape: ClassVar[Tuple[int, ...]] = ()
hidden_size: ClassVar[int] = -1
_model: ClassVar["Model"]
# Class variables for holding the Modules shared with the classifier.
encoder: ClassVar[nn.Module]
output_head: ClassVar[nn.Module] # type: ignore
preprocessing: ClassVar[Callable[[Tensor, Optional[Tensor]], Tuple[Tensor, Optional[Tensor]]]]
@dataclass
class Options(HyperParameters):
"""Settings for this Auxiliary Task."""
# Coefficient used to scale the task loss before adding it to the total.
coefficient: float = uniform(0.0, 1.0, default=1.0)
def __init__(self, *args, options: Options = None, name: str = None, **kwargs):
"""Creates a new Auxiliary Task to further train the encoder.
Can use the `encoder` and `classifier` components of the parent
`Classifier` instance.
NOTE: Since this object will be stored inside the `tasks` dict in the
model, we can't pass a reference to the parent here, otherwise the
parent would hold a reference to itself inside its `.modules()`, so
there would be an infinite recursion problem.
Parameters
----------
- options : AuxiliaryTask.Options, optional, by default None
The `Options` related to this task, containing the loss
coefficient used to scale this task, as well as any other additional
hyperparameters specific to this `AuxiliaryTask`.
- name: str, optional, by default None
The name of this auxiliary task. When not given, the name of the
class is used.
"""
super().__init__()
# If we are given the coefficient as a constructor argument, for
# instance, then we create the Options for this auxiliary task.
self.name = name or type(self).name
self.options = options or type(self).Options(*args, **kwargs)
self.device: torch.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
self._disabled = False
def encode(self, x: Tensor) -> Tensor:
# x, _ = AuxiliaryTask.preprocessing(x, None)
return AuxiliaryTask.encoder(x)
def logits(self, h_x: Tensor) -> Tensor:
return AuxiliaryTask.output_head(h_x)
@abstractmethod
def get_loss(self, forward_pass: Dict[str, Tensor], y: Tensor = None) -> Loss:
"""Calculates the Auxiliary loss for the input `x`.
The parameters `h_x`, `y_pred` are given for convenience, so we don't
re-calculate the forward pass multiple times on the same input.
Parameters
----------
- forward_pass: Dict[str, Tensor] containing:
- 'x' : Tensor
The input samples.
- 'h_x' : Tensor
The hidden vector, or hidden features, which corresponds to the
output of the feature extractor (should be equivalent to
`self.encoder(x)`). Given for convenience, when available.
- 'y_pred' : Tensor
The predicted labels.
- y : Tensor, optional, by default None
The true labels for each sample. Note that this is the label of the
output head's task, not of an auxiliary task.
Returns
-------
Tensor
The loss, not scaled.
"""
@property
def coefficient(self) -> float:
return self.options.coefficient
@coefficient.setter
def coefficient(self, value: float) -> None:
if self.enabled and value == 0:
self.disable()
elif self.disabled and value != 0:
self.enable()
self.options.coefficient = value
def enable(self) -> None:
"""Enable this auxiliary task.
This could be used to create/allocate resources to this task.
NOTE: The task will not work, even after being enabled, if its
coefficient is set to 0!
"""
self._disabled = False
def disable(self) -> None:
"""Disable this auxiliary task and sets its coefficient to 0.
This could be used to delete/deallocate resources used by this task.
"""
self._disabled = True
@property
def enabled(self) -> bool:
return not self._disabled
@property
def disabled(self) -> bool:
return self._disabled or self.coefficient == 0.0
def on_task_switch(self, task_id: Optional[int]) -> None:
"""Executed when the task switches (to either a new or known task)."""
@property
def model(self) -> LightningModule:
return type(self)._model
@staticmethod
def set_model(model: "Model") -> None:
AuxiliaryTask._model = model
def shared_modules(self) -> Dict[str, nn.Module]:
"""Returns any trainable modules if `self` that are shared across tasks.
By giving this information, these weights can then be used in
regularization-based auxiliary tasks like EWC, for example.
By default, for auxiliary tasks, this returns nothing, for instance.
For the base model, this returns a dictionary with the encoder, for example.
When using only one output head (i.e. when `self.hp.multihead` is `False`), then
this dict also includes the output head.
Returns
-------
Dict[str, nn.Module]:
Dictionary mapping from name to the shared modules, if any.
"""
return {}
================================================
FILE: sequoia/methods/aux_tasks/ewc.py
================================================
"""Elastic Weight Consolidation as an Auxiliary Task.
This is a simplified version of EWC, that only currently uses the L2 norm, rather
than the Fisher Information Matrix.
TODO: If it's worth it, we could re-add the 'real' EWC using the nngeometry
package, (which I don't think we need to have as a submodule).
"""
from collections import deque
from contextlib import contextmanager
from copy import deepcopy
from dataclasses import dataclass
from typing import Deque, List, Optional, Type
from gym.spaces.utils import flatdim
from nngeometry.metrics import FIM
from nngeometry.object.pspace import PMatAbstract, PMatDiag, PMatKFAC, PVector
from simple_parsing import choice
from torch import Tensor
from torch.utils.data import DataLoader
from sequoia.common.hparams import categorical, uniform
from sequoia.common.loss import Loss
from sequoia.methods.aux_tasks.auxiliary_task import AuxiliaryTask
from sequoia.methods.models.forward_pass import ForwardPass
from sequoia.methods.models.output_heads import ClassificationHead, RegressionHead
from sequoia.settings.base.objects import Observations
from sequoia.utils.logging_utils import get_logger
from sequoia.utils.utils import dict_intersection
logger = get_logger(__name__)
class EWCTask(AuxiliaryTask):
"""Elastic Weight Consolidation, implemented as a 'self-supervision-style'
Auxiliary Task.
```bibtex
@article{kirkpatrick2017overcoming,
title={Overcoming catastrophic forgetting in neural networks},
author={Kirkpatrick, James and Pascanu, Razvan and Rabinowitz, Neil and Veness,
Joel and Desjardins, Guillaume and Rusu, Andrei A and Milan, Kieran and Quan,
John and Ramalho, Tiago and Grabska-Barwinska, Agnieszka and others},
journal={Proceedings of the national academy of sciences},
volume={114},
number={13},
pages={3521--3526},
year={2017},
publisher={National Acad Sciences}
}
```
"""
name: str = "ewc"
@dataclass
class Options(AuxiliaryTask.Options):
"""Options of the EWC auxiliary task."""
# Coefficient of the EWC auxilary task.
# NOTE: It seems to be the case that, at least just for EWC, the coefficient
# can be often be much greater than 1, hence why we overwrite the prior over
# that hyper-parameter here.
coefficient: float = uniform(0.0, 100.0, default=1.0)
# Batchsize to be used when computing FIM (unused atm)
batch_size_fim: int = 32
# Number of observations to use for FIM calculation
sample_size_fim: int = categorical(2, 4, 8, 16, 32, 64, 128, 256, 512, default=8)
# Fisher information representation type (diagonal or block diagonal).
fim_representation: Type[PMatAbstract] = choice(
{"diagonal": PMatDiag, "block_diagonal": PMatKFAC},
default=PMatDiag,
)
def __init__(self, *args, name: str = None, options: "EWCTask.Options" = None, **kwargs):
super().__init__(*args, options=options, name=name, **kwargs)
self.options: EWCTask.Options
# The id of the current/most recent task the model has been trained on.
self.current_training_task: Optional[int] = None
# The id of the previous task the model was trained on.
self.previous_training_task: Optional[int] = None
# The ids of all the tasks trained on so far, not including the current task.
self.previous_training_tasks: List[Optional[int]] = []
self.previous_model_weights: Optional[PVector] = None
self.observation_collector: Deque[Observations] = deque(maxlen=self.options.sample_size_fim)
self.fisher_information_matrices: List[PMatAbstract] = []
# When True, ignore task boundaries (no EWC update).
# This is used mainly because of the need for executing forward passes when
# calculating the new FIMs, and the MultiheadModel class might then call
# `on_task_switch`, so we don't want to recurse.
self._ignore_task_boundaries: bool = False
if not self.model.shared_modules():
# TODO: This might cause a bug, if some auxiliary task were to replace the
# encoder and also be 'activated' after this task. This is a really obscure
# edge case though.
logger.warning(
RuntimeWarning(
"Disabling the EWC auxiliary task, since there appears to be no "
"shared weights between tasks!"
)
)
self.disable()
def get_loss(self, forward_pass: ForwardPass, y: Tensor = None) -> Loss:
"""Gets the EWC loss."""
if self.training:
self.observation_collector.append(forward_pass.observations)
if not self.enabled or self.previous_model_weights is None:
# We're in the first task: do nothing.
return Loss(name=self.name)
loss = 0.0
v_current = self.get_current_model_weights()
for fim in self.fisher_information_matrices:
diff = v_current - self.previous_model_weights
loss += fim.vTMv(diff)
ewc_loss = Loss(name=self.name, loss=loss)
return ewc_loss
def on_task_switch(self, task_id: Optional[int]):
"""Executed when the task switches (to either a known or unknown task)."""
if not self.enabled:
return
logger.debug(f"On task switch called: task_id={task_id}")
if self._ignore_task_boundaries:
logger.info("Ignoring task boundary (probably from recursive call)")
return
if not self.training:
logger.debug("Task boundary at test time, no EWC update.")
return
# Two cases:
# - Setting without task IDs --> still calculate the FIMs at each task boundary.
# - Setting with IDs --> calculate the FIMs before training on new tasks.
# Setting without task labels. Task ids: None -> None -> None (always None)
if task_id is None:
# Here we use the number of task boundaries as a 'fake' task id, meaning we
# treat each task as if it has never been encountered before.
if self.current_training_task is None:
# Start of first task, no EWC update.
self.current_training_task = 0
else:
self.previous_training_task = self.current_training_task
self.current_training_task += 1
self.update_anchor_weights(new_task_id=self.current_training_task)
# Setting with task labels. Task ids: 0 -> 1 -> 2 -> 1 -> 3 -> 5 -> 11 -> 5 etc.
else:
if self.current_training_task is None:
logger.info("Starting the first task, no EWC update.")
self.current_training_task = task_id
elif task_id == self.current_training_task:
logger.info("Switching to same task, no EWC update.")
elif task_id in self.previous_training_tasks:
logger.info(f"Switching to known task {task_id}, no EWC update.")
else:
logger.info(f"Switching to new task {task_id}, updating EWC params.")
self.previous_training_task = self.current_training_task
self.previous_training_tasks.append(self.current_training_task)
self.current_training_task = task_id
self.update_anchor_weights(new_task_id=self.current_training_task)
def update_anchor_weights(self, new_task_id: int) -> None:
"""Update the FIMs and other EWC params before starting training on a new task.
Parameters
----------
new_task_id : int
The ID of the new task.
"""
# we dont want to go here at test time.
# NOTE: We also switch between unknown tasks.
logger.info(
f"Updating the EWC 'anchor' weights before starting training on " f"task {new_task_id}"
)
self.previous_model_weights = self.get_current_model_weights().clone().detach()
# Create a Dataloader from the stored observations.
obs_type: Type[Observations] = type(self.observation_collector[0])
dataset = [obs.as_namedtuple() for obs in self.observation_collector]
# Or, alternatively (see the note below on why we don't use this):
# stacked_observations: Observations = obs_type.stack(self.observation_collector)
# dataset = TensorDataset(*stacked_observations.as_namedtuple())
# NOTE: This is equivalent to just using the same batch size as during
# training, as each Observations in the list is already a batch.
# NOTE: We keep the same batch size here as during training because for
# instance in RL, it would be weird to suddenly give some new batch size,
# since the buffers would get cleared and re-created just for these forward
# passes
dataloader = DataLoader(dataset, batch_size=None, collate_fn=None)
# TODO: Would be nice to have a progress bar here.
# Create the parameters to be passed to the FIM function. These may vary a
# bit, depending on if we're being applied in a classification setting or in
# a regression setting (not done yet)
variant: str
# TODO: Change this conditional to be based on the type of action space, rather
# than of output head.
if isinstance(self._model.output_head, ClassificationHead):
variant = "classif_logits"
n_output = self._model.action_space.n
def fim_function(*inputs) -> Tensor:
observations = obs_type(*inputs).to(self._model.device)
forward_pass: ForwardPass = self._model(observations)
actions = forward_pass.actions
return actions.logits
elif isinstance(self._model.output_head, RegressionHead):
# NOTE: This hasn't been tested yet.
variant = "regression"
n_output = flatdim(self._model.action_space)
def fim_function(*inputs) -> Tensor:
observations = obs_type(*inputs).to(self._model.device)
forward_pass: ForwardPass = self._model(observations)
actions = forward_pass.actions
return actions.y_pred
else:
raise NotImplementedError("TODO")
with self._ignoring_task_boundaries():
# Prevent recursive calls to `on_task_switch` from affecting us (can be
# called from MultiheadModel). (TODO: MultiheadModel will be fixed soon.)
# layer_collection = LayerCollection.from_model(self.model.shared_modules())
# nngeometry BUG: this doesn't work when passing the layer
# collection instead of the model
new_fim = FIM(
model=self.model.shared_modules(),
loader=dataloader,
representation=self.options.fim_representation,
n_output=n_output,
variant=variant,
function=fim_function,
device=self._model.device,
layer_collection=None,
)
# TODO: There was maybe an idea to use another fisher information matrix for
# the critic in A2C, but not doing that atm.
new_fims = [new_fim]
self.consolidate(new_fims, task=new_task_id)
self.observation_collector.clear()
@contextmanager
def _ignoring_task_boundaries(self):
"""Contextmanager used to temporarily ignore task boundaries (no EWC update)."""
self._ignore_task_boundaries = True
yield
self._ignore_task_boundaries = False
def consolidate(self, new_fims: List[PMatAbstract], task: Optional[int]) -> None:
"""Consolidates the new and current fisher information matrices.
Parameters
----------
new_fims : List[PMatAbstract]
The list of new fisher information matrices.
task : Optional[int]
The id of the previous task, when task labels are available, or the number
of task switches encountered so far when task labels are not available.
"""
if not self.fisher_information_matrices:
self.fisher_information_matrices = new_fims
return
assert task is not None, "Should have been given an int task id (even if fake)."
for i, (fim_previous, fim_new) in enumerate(
zip(self.fisher_information_matrices, new_fims)
):
# consolidate the FIMs
if fim_previous is None:
self.fisher_information_matrices[i] = fim_new
else:
# consolidate the fim_new into fim_previous in place
if isinstance(fim_new, PMatDiag):
# TODO: This is some kind of weird online-EWC related magic:
fim_previous.data = (deepcopy(fim_new.data) + fim_previous.data * (task)) / (
task + 1
)
elif isinstance(fim_new.data, dict):
# TODO: This is some kind of weird online-EWC related magic:
for _, (prev_param, new_param) in dict_intersection(
fim_previous.data, fim_new.data
):
for prev_item, new_item in zip(prev_param, new_param):
prev_item.data = (prev_item.data * task + deepcopy(new_item.data)) / (
task + 1
)
self.fisher_information_matrices[i] = fim_previous
def get_current_model_weights(self) -> PVector:
return PVector.from_model(self.model.shared_modules())
================================================
FILE: sequoia/methods/aux_tasks/reconstruction/__init__.py
================================================
""" Auxiliary tasks based on reconstructing an input given a hidden vector.
TODO: Add some denoising autoencoders maybe as a reconstruction task?
"""
from .ae import AEReconstructionTask
from .decoder_for_dataset import get_decoder_class_for_dataset
from .decoders import CifarDecoder, MnistDecoder
from .vae import VAEReconstructionTask
================================================
FILE: sequoia/methods/aux_tasks/reconstruction/ae.py
================================================
""" Defines an Auto-Encoder-based Auxiliary task.
"""
from typing import ClassVar, Dict, Optional, Tuple, Union
import torch
from torch import Tensor, nn
from sequoia.common.loss import Loss
from ..auxiliary_task import AuxiliaryTask
from .decoder_for_dataset import get_decoder_class_for_dataset
class AEReconstructionTask(AuxiliaryTask):
"""Task that adds the AE loss (reconstruction loss).
Uses the feature extractor (`encoder`) of the parent model as the encoder of
an AE. Contains trainable `decoder` module, which is
used to get the AE loss to train the feature extractor with.
"""
name: ClassVar[str] = "ae"
def __init__(self, coefficient: float = None, options: AuxiliaryTask.Options = None):
super().__init__(coefficient=coefficient, options=options)
self.loss = nn.MSELoss(reduction="sum")
# BUG: The decoder for mnist has output shape of [1, 28, 28], but the
# transforms 'fix' that shape to be [3, 28, 28].
# Therefore: TODO: Should we adapt the output shape of the decoder
# depending on the shape of the input?
self.decoder: Optional[nn.Module] = None
def create_decoder(self, input_shape: Union[torch.Size, Tuple[int, ...]]) -> nn.Module:
"""Creates a decoder to reconstruct the input from the hidden vectors."""
if len(input_shape) == 4:
# discard the batch dimension.
input_shape = input_shape[1:]
# At the moment we have a 'fixed' set of image sizes (28, 32, 224, iirc)
# and we just use the decoder type for the given dataset.
# TODO: Create the decoder dynamically, depending on the required shape.
decoder_class = get_decoder_class_for_dataset(input_shape)
decoder: nn.Module = decoder_class(
code_size=AuxiliaryTask.hidden_size,
)
decoder = decoder.to(self.device)
return decoder
def get_loss(self, forward_pass: Dict[str, Tensor], y: Tensor = None) -> Loss:
x = forward_pass["x"]
h_x = forward_pass["h_x"]
# y_pred = forward_pass["y_pred"]
z = h_x.view([h_x.shape[0], -1])
if self.decoder is None or self.decoder.output_shape != x.shape:
self.decoder = self.create_decoder(x.shape)
x_hat = self.decoder(z)
assert x_hat.shape == x.shape, (
f"reconstructed x should have same shape as original x! "
f"({x_hat.shape} != {x.shape})"
)
recon_loss = self.reconstruction_loss(x_hat, x)
loss_info = Loss(name=self.name, loss=recon_loss)
return loss_info
def forward(self, h_x: Tensor) -> Tensor: # type: ignore
z = h_x.view([h_x.shape[0], -1])
x_hat = self.decoder(z)
return x_hat
def reconstruct(self, x: Tensor) -> Tensor:
h_x = self.encode(x)
x_hat = self.forward(h_x)
return x_hat.view(x.shape)
def reconstruction_loss(self, recon_x: Tensor, x: Tensor) -> Tensor:
return self.loss(recon_x, x)
================================================
FILE: sequoia/methods/aux_tasks/reconstruction/decoder_for_dataset.py
================================================
from typing import Dict, Tuple, Type, Union
from torch import nn
from .decoders import CifarDecoder, ImageNetDecoder, MnistDecoder
# Dict mapping from image (height, width) to the type of decoder to use.
# TODO: Add some more decoders for other image datasets/shapes.
registered_decoders: Dict[Tuple[int, int], Type[nn.Module]] = {
(28, 28): MnistDecoder,
(32, 32): CifarDecoder,
(224, 224): ImageNetDecoder,
}
def get_decoder_class_for_dataset(input_shape: Union[Tuple[int, int, int]]) -> Type[nn.Module]:
assert len(input_shape) == 3, input_shape
channels: int
width: int
height: int
if input_shape[0] == min(input_shape):
# Image is in C, H, W format
channels, height, width = input_shape
elif input_shape[-1] == min(input_shape):
height, width, channels = input_shape
if (height, width) in registered_decoders:
return registered_decoders[(height, width)]
raise RuntimeError(f"No decoder available for input shape {input_shape}")
================================================
FILE: sequoia/methods/aux_tasks/reconstruction/decoders.py
================================================
from abc import ABC
from typing import Tuple
from torch import nn
from sequoia.common.layers import DeConvBlock, Reshape
class Decoder(nn.Sequential, ABC):
"""A base class for the decoders (mostly for typing purposes)."""
code_size: int
output_shape: Tuple[int, int, int]
class MnistDecoder(Decoder):
"""Decoder that generates images of shape [`out_channels`, 28, 28]"""
def __init__(self, code_size: int, out_channels: int = 3):
self.code_size = code_size
self.output_shape: Tuple[int, int, int] = (out_channels, 28, 28)
super().__init__(
Reshape([self.code_size, 1, 1]),
nn.ConvTranspose2d(self.code_size, 32, kernel_size=4, stride=1),
nn.BatchNorm2d(32),
nn.ELU(alpha=1.0, inplace=True),
nn.ConvTranspose2d(32, 16, kernel_size=5, stride=2),
nn.BatchNorm2d(16),
nn.ELU(alpha=1.0, inplace=True),
nn.ConvTranspose2d(16, 16, kernel_size=5, stride=2),
nn.BatchNorm2d(16),
nn.ELU(alpha=1.0, inplace=True),
nn.ConvTranspose2d(16, out_channels, kernel_size=4, stride=1),
nn.Sigmoid(),
)
class CifarDecoder(Decoder):
"""Decoder that generates images of shape [3, 32, 32]"""
def __init__(self, code_size: int):
self.code_size = code_size
self.output_shape: Tuple[int, int, int] = (3, 32, 32)
super().__init__(
Reshape([self.code_size, 1, 1]),
DeConvBlock(self.code_size, 16),
DeConvBlock(16, 32),
DeConvBlock(32, 64),
DeConvBlock(64, 64),
DeConvBlock(64, 3, last_relu=False),
nn.Sigmoid(),
)
class ImageNetDecoder(Decoder):
"""Decoder that generates images of shape [3, 224, 224]"""
def __init__(self, code_size: int):
self.code_size = code_size
self.output_shape: Tuple[int, int, int] = (3, 224, 224)
super().__init__(
Reshape([self.code_size, 1, 1]),
DeConvBlock(self.code_size, 16),
DeConvBlock(16, 32),
DeConvBlock(32, 64),
DeConvBlock(64, 128),
DeConvBlock(128, 224),
DeConvBlock(224, 3, last_relu=False),
nn.Sigmoid(),
)
================================================
FILE: sequoia/methods/aux_tasks/reconstruction/vae.py
================================================
from dataclasses import dataclass
from typing import ClassVar, Dict
import torch
from torch import Tensor, nn
from sequoia.common.loss import Loss
from ..auxiliary_task import AuxiliaryTask
from .ae import AEReconstructionTask
from .decoder_for_dataset import get_decoder_class_for_dataset
class VAEReconstructionTask(AEReconstructionTask):
"""Task that adds the VAE loss (reconstruction + KL divergence).
Uses the feature extractor (`encoder`) of the parent model as the encoder of
a VAE. Contains trainable `mu`, `logvar`, and `decoder` modules, which are
used to get the VAE loss to train the feature extractor with.
"""
name: ClassVar[str] = "vae"
@dataclass
class Options(AEReconstructionTask.Options):
"""Settings & Hyper-parameters related to the VAEReconstructionTask."""
code_size: int = 50 # dimensions of the VAE code-space.
beta: float = 1.0 # Beta term, multiplies the KL divergence term.
def __init__(self, coefficient: float = None, options: "VAEReconstructionTask.Options" = None):
super().__init__(coefficient=coefficient, options=options)
self.options: VAEReconstructionTask.Options
self.code_size = self.options.code_size
# add the rest of the VAE layers: (Mu, Sigma, and the decoder)
self.mu = nn.Linear(AuxiliaryTask.hidden_size, self.code_size)
self.logvar = nn.Linear(AuxiliaryTask.hidden_size, self.code_size)
decoder_class = get_decoder_class_for_dataset(AuxiliaryTask.input_shape)
self.decoder: nn.Module = decoder_class(
code_size=self.code_size,
)
def forward(self, h_x: Tensor) -> Tensor: # type: ignore
h_x = h_x.view([h_x.shape[0], -1])
mu, logvar = self.mu(h_x), self.logvar(h_x)
z = self.reparameterize(mu, logvar)
x_hat = self.decoder(z)
return x_hat
def reparameterize(self, mu: Tensor, logvar: Tensor) -> Tensor:
std = torch.exp(0.5 * logvar)
eps = torch.randn_like(std)
z = mu + eps * std
return z
def get_loss(self, forward_pass: Dict[str, Tensor], y: Tensor = None) -> Loss:
x = forward_pass["x"]
h_x = forward_pass["h_x"]
h_x = h_x.view([h_x.shape[0], -1])
mu, logvar = self.mu(h_x), self.logvar(h_x)
z = self.reparameterize(mu, logvar)
x_hat = self.decoder(z)
recon_loss = self.reconstruction_loss(x_hat, x)
kl_loss = self.options.beta * self.kl_divergence_loss(mu, logvar)
loss = Loss(self.name, tensors=dict(mu=mu, logvar=logvar, z=z, x_hat=x_hat))
loss += Loss("recon", loss=recon_loss)
loss += Loss("kl", loss=kl_loss)
return loss
def generate(self, z: Tensor) -> Tensor:
z = z.to(self.device)
return self.forward(z)
@staticmethod
def kl_divergence_loss(mu: Tensor, logvar: Tensor) -> Tensor:
# see Appendix B from VAE paper:
# Kingma and Welling. Auto-Encoding Variational Bayes. ICLR, 2014
# https://arxiv.org/abs/1312.6114
# 0.5 * sum(1 + log(sigma^2) - mu^2 - sigma^2)
return -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())
================================================
FILE: sequoia/methods/aux_tasks/transformation_based/__init__.py
================================================
from .bases import ClassifyTransformationTask, RegressTransformationTask, TransformationBasedTask
from .rotation import RotationTask
================================================
FILE: sequoia/methods/aux_tasks/transformation_based/bases.py
================================================
from dataclasses import dataclass
from functools import wraps
from typing import Any, Callable, List, Tuple
import torch
from torch import Tensor, nn
from torchvision.transforms import functional as TF
from sequoia.common.loss import Loss
from sequoia.common.metrics import Metrics, get_metrics
from sequoia.utils.logging_utils import get_logger
from sequoia.utils.utils import fix_channels
from ..auxiliary_task import AuxiliaryTask
logger = get_logger(__name__)
def wrap_pil_transform(function: Callable):
def _transform(img_x, arg):
x = TF.to_pil_image(img_x.cpu())
x = function(x, arg)
return TF.to_tensor(x).view(img_x.shape).to(img_x)
@wraps(function)
def _pil_transform(x: Tensor, arg: Any):
return torch.cat([_transform(x_i, arg) for x_i in x]).view(x.shape)
return _pil_transform
class TransformationBasedTask(AuxiliaryTask):
"""
Generates an AuxiliaryTask for an arbitrary transformation function.
Tries to classify or regress which argument was passed to the function,
given only the transformed code, if `compare_with_original` is False, else
given the original and transformed codes.
NOTE: For now, the same function is applied to all the images within the
batch. Therefore, the function_args is one value per batch of transformed
images, and not one value per image.
"""
@dataclass
class Options(AuxiliaryTask.Options):
"""Command-line options for the Transformation-based auxiliary task."""
# Wether or not both the original and transformed codes should be passed
# to the auxiliary layer in order to detect the transformation.
compare_with_original: bool = True
def __init__(
self,
function: Callable[[Tensor, Any], Tensor],
function_args: List[Any],
loss: Callable,
name: str = None,
auxiliary_layer: nn.Module = None,
options: Options = None,
):
"""Creates a transformation-based task to predict alpha given the codes.
Args:
function (Callable[[Tensor, Any], Tensor]): A function to apply to x
before it is passed to the encoder.
function_args (List[Any]): The arguments to be passed to the
`function`.
loss (Callable): A loss function, which will be called with
`alpha_pred` and `alpha` to get a loss for each argument in `function_args`.
name (str, optional): [description]. Defaults to None.
auxiliary_layer (nn.Module, optional): [description]. Defaults to None.
options (Options, optional): [description]. Defaults to None.
"""
super().__init__(options=options)
self.function = function
self.name = name or self.function.__name__
self.function_args = function_args
self.alphas: Tensor = torch.Tensor(self.function_args)
self.options: TransformationBasedTask.Options = options or self.Options()
self.nargs = len(self.function_args)
# which loss to use. CrossEntropy when classifying, or MSE when regressing.
self.loss = loss
if auxiliary_layer is not None:
self.auxiliary_layer = auxiliary_layer
else:
input_dims = AuxiliaryTask.hidden_size
if self.options.compare_with_original:
input_dims *= 2
self.auxiliary_layer = nn.Sequential(
nn.Flatten(),
nn.Linear(input_dims, self.nargs),
)
def get_loss(self, x: Tensor, h_x: Tensor, y_pred: Tensor = None, y: Tensor = None) -> Loss:
loss_info = Loss(self.name)
batch_size = x.shape[0]
assert self.alphas is not None, "set the `self.alphas` attribute in the base class."
assert (
self.function_args is not None
), "set the `self.function_args` attribute in the base class."
# Get the loss for each transformation argument.
for fn_arg, alpha in zip(self.function_args, self.alphas):
loss_i = self.get_loss_for_arg(x=x, h_x=h_x, fn_arg=fn_arg, alpha=alpha)
loss_info += loss_i
# print(f"{self.name}_{fn_arg}", loss_i.metrics)
# Fuse all the sub-metrics into a total metric.
# For instance, all the "rotate_0", "rotate_90", "rotate_180", etc.
metrics = loss_info.metrics
total_metrics = sum(loss_info.metrics.values(), Metrics())
# we actually add up all the metrics to get the "overall" metric.
metrics.clear()
metrics[self.name] = total_metrics
return loss_info
def get_loss_for_arg(self, x: Tensor, h_x: Tensor, fn_arg: Any, alpha: Tensor) -> Loss:
alpha = alpha.to(x.device)
# TODO: Transform before or after the `preprocess_inputs` function?
x = fix_channels(x)
# Transform X using the function.
x_t = self.function(x, fn_arg)
# Get the code for the transformed x.
h_x_t = self.encode(x_t)
aux_layer_input = h_x_t
if self.options.compare_with_original:
aux_layer_input = torch.cat([h_x, h_x_t], dim=-1)
# Get the predicted argument of the transformation.
alpha_t = self.auxiliary_layer(aux_layer_input)
# get the metrics for this particular argument (accuracy, mse, etc.)
if isinstance(fn_arg, int):
name = f"{fn_arg}"
else:
name = f"{fn_arg:.3f}"
loss = Loss(name)
loss.loss = self.loss(alpha_t, alpha)
loss.metrics[name] = get_metrics(x=x_t, h_x=h_x_t, y_pred=alpha_t, y=alpha)
# Save some tensors for debugging purposes:
loss.tensors["x_t"] = x_t
loss.tensors["h_x_t"] = h_x_t
loss.tensors["alpha_t"] = alpha_t
return loss
class ClassifyTransformationTask(TransformationBasedTask):
"""
Generates an AuxiliaryTask for an arbitrary transformation function.
Tries to classify which argument was passed to the function.
`self.alphas` is the classification target. It indicates which
transformation argument was used.
I.e. a vector of 0's for function_args[0], 1's for function_args[1], etc.
"""
def __init__(
self,
function: Callable[[Tensor, Any], Tensor],
function_args: List[Any],
name: str = None,
options: TransformationBasedTask.Options = None,
):
super().__init__(
function=function,
function_args=function_args,
name=name,
loss=nn.CrossEntropyLoss(),
options=options,
)
self.labels = torch.arange(len(function_args), dtype=torch.long)
def get_loss(self, x: Tensor, h_x: Tensor, y_pred: Tensor = None, y: Tensor = None) -> Loss:
batch_size = x.shape[0]
self.alphas = self.labels.view(-1, 1).repeat(1, batch_size)
return super().get_loss(x=x, h_x=h_x, y_pred=y_pred, y=y)
class RegressTransformationTask(TransformationBasedTask):
"""
Generates an AuxiliaryTask for an arbitrary transformation function.
Tries to Regress which argument value was passed to the function.
x -----------------------encoder(x)-> h_x -----|
x --f(x, alpha)--> x_t --encoder(x)-> h_x_t ---|----A(h_x, h_x_t) --> alpha_pred <-MSE-> alpha
Can either use a list of function arguments, or a range from which to sample
the argument values uniformly.
"""
def __init__(
self,
function: Callable[[Tensor, Any], Tensor],
function_args: List[Any] = None,
name: str = None,
function_arg_range: Tuple[float, float] = None,
n_calls: int = 2,
options: TransformationBasedTask.Options = None,
):
super().__init__(
function=function,
function_args=[],
name=name,
loss=nn.MSELoss(),
options=options,
)
if function_arg_range:
self.function_arg_range = function_arg_range
self.n_calls = n_calls
elif function_args:
self.function_arg_range = (min(function_args), max(function_args))
self.n_calls = len(function_args)
else:
raise RuntimeError("`function_args` or `function_arg_range` must be set.")
self.arg_min = self.function_arg_range[0]
self.arg_max = self.function_arg_range[1]
self.arg_med = (self.arg_min + self.arg_max) / 2
self.arg_amp = self.arg_max - self.arg_min
input_dims = AuxiliaryTask.hidden_size
if self.options.compare_with_original:
input_dims *= 2
self.auxiliary_layer = nn.Sequential(
nn.Flatten(),
nn.Linear(input_dims, 1),
nn.Sigmoid(),
ScaleToRange(arg_min=self.arg_min, arg_amp=self.arg_amp),
)
def get_function_args(self) -> Tensor:
# sample random arguments in the range [self.min_arg, self.max_arg]
args = torch.rand(self.n_calls)
args *= self.arg_amp
args += self.arg_min
return args
def get_loss(self, x: Tensor, h_x: Tensor, y_pred: Tensor = None, y: Tensor = None) -> Loss:
batch_size = x.shape[0]
random_alphas = self.get_function_args()
self.function_args = random_alphas.tolist()
self.alphas = random_alphas.view(-1, 1, 1).repeat(1, batch_size, 1)
loss = super().get_loss(x=x, h_x=h_x, y_pred=y_pred, y=y)
return loss
class ScaleToRange(nn.Module):
def __init__(self, arg_min: float, arg_amp: float):
super().__init__()
self.arg_min = arg_min
self.arg_max = arg_amp
def forward(self, x: Tensor) -> Tensor:
return self.arg_min + self.arg_amp * x
================================================
FILE: sequoia/methods/aux_tasks/transformation_based/rotation.py
================================================
from dataclasses import dataclass
from torch import Tensor
from .bases import ClassifyTransformationTask
def rotate(x: Tensor, angle: int) -> Tensor:
"""Rotates the given tensor `x` by an angle `angle`.
Currently only supports multiples of 90 degrees.
Args:
x (Tensor): An image or a batch of images, with shape [(b), C, H, W]
angle (int): An angle. Currently only supports {0, 90, 180, 270}.
Returns:
Tensor: The tensor x, rotated by `angle` degrees counter-clockwise.
Example:
>>> import torch
>>> x = torch.Tensor([
... [1, 2, 3],
... [4, 5, 6],
... [7, 8, 9],
... ])
>>> print(x)
tensor([[1., 2., 3.],
[4., 5., 6.],
[7., 8., 9.]])
>>> x = x.view(1, 3, 3)
>>> x_rot = rotate(x, 90)
>>> print(x_rot.shape)
torch.Size([1, 3, 3])
>>> print(x_rot)
tensor([[[3., 6., 9.],
[2., 5., 8.],
[1., 4., 7.]]])
"""
# TODO: Test that this works.
assert angle % 90 == 0, "can only rotate 0, 90, 180, or 270 degrees for now."
k = angle // 90
# BUG: Very rarely, this condition won't work! (More specifically, only on the last batch of data!)
# assert min(x.shape) == x.shape[-3], f"Image should be in [(b) C H W] format. (image shape: {x.shape}"
return x.rot90(k, dims=(-2, -1))
if __name__ == "__main__":
import doctest
doctest.testmod()
class RotationTask(ClassifyTransformationTask):
@dataclass
class Options(ClassifyTransformationTask.Options):
"""Command-line options for the Transformation-based auxiliary task."""
# Wether or not both the original and transformed codes should be passed
# to the auxiliary layer in order to detect the transformation.
# TODO: Maybe try with this set to False, to learn "innate" orientation rather than relative orientation.
compare_with_original: bool = True
def __init__(self, name="rotation", options: "RotationTask.Options" = None):
super().__init__(
function=rotate,
function_args=[0, 90, 180, 270],
name=name,
options=options or RotationTask.Options(),
)
================================================
FILE: sequoia/methods/avalanche_methods/__init__.py
================================================
""" Adapters for Avalanche Strategies, so they can be used as Methods in Sequoia.
See the Avalanche repo for more info: https://github.com/ContinualAI/avalanche
"""
# from .agem import AGEMMethod
# from .ar1 import AR1Method
# from .base import AvalancheMethod
# from .cwr_star import CWRStarMethod
# from .ewc import EWCMethod
# # Still quite buggy, needs to be fixed on the avalanche side.
# from .gdumb import GDumbMethod
# from .gem import GEMMethod
# from .lwf import LwFMethod
# from .naive import NaiveMethod
# from .replay import ReplayMethod
# from .synaptic_intelligence import SynapticIntelligenceMethod
================================================
FILE: sequoia/methods/avalanche_methods/agem.py
================================================
""" Method based on AGEM from [Avalanche](https://github.com/ContinualAI/avalanche).
See `avalanche.training.plugins.agem.AGEMPlugin` or
`avalanche.training.strategies.strategy_wrappers.AGEM` for more info.
"""
from dataclasses import dataclass
from typing import ClassVar, Type
import pytest
from avalanche.training.strategies import AGEM, BaseStrategy
from simple_parsing import ArgumentParser
from simple_parsing.helpers.hparams import uniform
from sequoia.methods import register_method
from sequoia.settings.sl import TaskIncrementalSLSetting
from .base import AvalancheMethod
@register_method
@dataclass
class AGEMMethod(AvalancheMethod[AGEM]):
"""Average Gradient Episodic Memory (AGEM) strategy from Avalanche.
See AGEM plugin for details.
This strategy does not use task identities.
See the parent class `AvalancheMethod` for the other hyper-parameters and methods.
"""
# number of patterns per experience in the memory
patterns_per_exp: int = uniform(10, 1000, default=100)
# number of patterns in memory sample when computing reference gradient.
sample_size: int = uniform(16, 256, default=64)
strategy_class: ClassVar[Type[BaseStrategy]] = AGEM
if __name__ == "__main__":
setting = TaskIncrementalSLSetting(
dataset="mnist", nb_tasks=5, monitor_training_performance=True
)
# Create the Method, either manually or through the command-line:
parser = ArgumentParser(__doc__)
parser.add_arguments(AGEMMethod, "method")
args = parser.parse_args()
method: AGEMMethod = args.method
results = setting.apply(method)
================================================
FILE: sequoia/methods/avalanche_methods/agem_test.py
================================================
""" WIP: Tests for the AGEM Method.
For now this only inherits the tests from the AvalancheMethod class.
"""
from typing import ClassVar, Type
from .agem import AGEMMethod
from .base import AvalancheMethod
from .base_test import _TestAvalancheMethod
class TestAGEMMethod(_TestAvalancheMethod):
Method: ClassVar[Type[AvalancheMethod]] = AGEMMethod
================================================
FILE: sequoia/methods/avalanche_methods/ar1.py
================================================
""" Method based on AR1 from [Avalanche](https://github.com/ContinualAI/avalanche).
See `avalanche.training.strategies.ar1.AR1` for more info.
"""
from dataclasses import dataclass
from typing import ClassVar, Type
from avalanche.training.strategies import AR1, BaseStrategy
from simple_parsing.helpers.hparams import log_uniform, uniform
from sequoia.methods import register_method
from sequoia.settings.sl import TaskIncrementalSLSetting
from .base import AvalancheMethod
@register_method
@dataclass
class AR1Method(AvalancheMethod[AR1]):
"""AR1 strategy from Avalanche.
See AR1 plugin for details.
This strategy does not use task identities.
See the parent class `AvalancheMethod` for the other hyper-parameters and methods.
"""
# The learning rate (SGD optimizer).
lr: float = log_uniform(1e-6, 1e-2, default=0.001)
# The momentum (SGD optimizer).
momentum: float = uniform(0.9, 0.999, default=0.9)
# The L2 penalty used for weight decay.
l2: float = uniform(1e-6, 1e-3, default=0.0005)
# The number of training epochs. Defaults to 4.
train_epochs: int = uniform(1, 50, default=4)
# The initial update rate of BatchReNorm layers.
init_update_rate: float = 0.01
# The incremental update rate of BatchReNorm layers.
inc_update_rate: float = 0.00005
# The maximum r value of BatchReNorm layers.
max_r_max: float = 1.25
# The maximum d value of BatchReNorm layers.
max_d_max: float = 0.5
# The incremental step of r and d values of BatchReNorm layers.
inc_step: float = 4.1e-05
# The size of the replay buffer. The replay buffer is shared across classes.
rm_sz: int = uniform(500, 2000, default=1500)
# A string describing the name of the layer to use while freezing the lower
# (nearest to the input) part of the model. The given layer is not frozen
# (exclusive).
freeze_below_layer: str = "lat_features.19.bn.beta"
# The number of the layer to use as the Latent Replay Layer. Usually this is the
# same of `freeze_below_layer`.
latent_layer_num: int = 19
# The Synaptic Intelligence lambda term. Defaults to 0, which means that the
# Synaptic Intelligence regularization will not be applied.
ewc_lambda: float = uniform(0, 1, default=0)
# The train minibatch size. Defaults to 128.
train_mb_size: int = uniform(1, 512, default=128)
# The eval minibatch size. Defaults to 128.
eval_mb_size: int = uniform(1, 512, default=128)
strategy_class: ClassVar[Type[BaseStrategy]] = AR1
if __name__ == "__main__":
from simple_parsing import ArgumentParser
setting = TaskIncrementalSLSetting(
dataset="mnist", nb_tasks=5, monitor_training_performance=True
)
# Create the Method, either manually or through the command-line:
parser = ArgumentParser(__doc__)
parser.add_arguments(AR1Method, "method")
args = parser.parse_args()
method: AR1Method = args.method
results = setting.apply(method)
================================================
FILE: sequoia/methods/avalanche_methods/ar1_test.py
================================================
""" WIP: Tests for the AR1 Method.
For now this only inherits the tests from the AvalancheMethod class.
"""
from typing import ClassVar, Type
import pytest
from avalanche.models import SimpleCNN, SimpleMLP
from torch.nn import Module
from sequoia.common.config import Config
from sequoia.conftest import xfail_param
from sequoia.settings.sl import TaskIncrementalSLSetting
from .ar1 import AR1Method
from .base import AvalancheMethod
from .base_test import _TestAvalancheMethod
from .patched_models import MTSimpleCNN, MTSimpleMLP
@pytest.mark.xfail(reason="AR1 isn't super well supported yet.")
class TestAR1Method(_TestAvalancheMethod):
Method: ClassVar[Type[AvalancheMethod]] = AR1Method
@pytest.mark.timeout(60)
@pytest.mark.parametrize(
"model_type",
[
xfail_param(
SimpleCNN,
reason="seems like the model in AR1 is supposed to be larger?",
),
SimpleMLP,
xfail_param(
MTSimpleCNN,
reason="IndexError Bug inside `avalanche/models/dynamic_modules.py",
),
xfail_param(
MTSimpleMLP,
reason="IndexError Bug inside `avalanche/models/dynamic_modules.py",
),
],
)
def test_short_task_incremental_setting(
self,
model_type: Type[Module],
short_task_incremental_setting: TaskIncrementalSLSetting,
config: Config,
):
method = self.Method(model=model_type)
results = short_task_incremental_setting.apply(method, config)
assert 0.05 < results.average_final_performance.objective
================================================
FILE: sequoia/methods/avalanche_methods/base.py
================================================
""" Adapter for the `BaseStrategy` from Avalanche, wrapping it up into a Sequoia Method.
See the Avalanche repo for more info: https://github.com/ContinualAI/avalanche
"""
import inspect
import warnings
from dataclasses import dataclass, fields
from typing import ClassVar, Dict, Generic, List, Optional, Type, TypeVar, Union
import gym
import torch
import tqdm
from avalanche.benchmarks.scenarios import Experience
from avalanche.evaluation.metrics import accuracy_metrics, forgetting_metrics, loss_metrics
from avalanche.logging import InteractiveLogger
from avalanche.logging.wandb_logger import WandBLogger as _WandBLogger
from avalanche.models import SimpleCNN, SimpleMLP
from avalanche.models.utils import avalanche_forward
from avalanche.training.plugins import EvaluationPlugin, StrategyPlugin
from avalanche.training.strategies import BaseStrategy
from gym import spaces
from gym.spaces.utils import flatdim
from gym.utils import colorize
from simple_parsing.helpers import choice, field, list_field
from simple_parsing.helpers.hparams import HyperParameters, log_uniform, uniform
from torch import nn, optim
from torch.nn import Module
from torch.optim import SGD
from torch.optim.optimizer import Optimizer
from sequoia.common.spaces import Image
from sequoia.methods import Method
from sequoia.settings.sl import (
ClassIncrementalSetting,
ContinualSLSetting,
PassiveEnvironment,
SLSetting,
)
from sequoia.settings.sl.continual import Actions, ContinualSLTestEnvironment, Observations, Rewards
from sequoia.settings.sl.continual.setting import smart_class_prediction
from sequoia.utils import get_logger
from .experience import SequoiaExperience
from .patched_models import MTSimpleCNN, MTSimpleMLP
logger = get_logger(__name__)
StrategyType = TypeVar("StrategyType", bound=BaseStrategy)
# "Patch" for the WandbLogger of Avalanche
class WandBLogger(_WandBLogger):
# def before_run(self):
# if self.wandb is None:
# self.import_wandb()
# if self.init_kwargs:
# self.wandb.init(**self.init_kwargs)
# else:
# self.wandb.init()
def import_wandb(self):
try:
import wandb
except ImportError:
raise ImportError('Please run "pip install wandb" to install wandb')
self.wandb = wandb
def args_parse(self):
self.init_kwargs = {"project": self.project_name, "name": self.run_name}
if self.params:
self.init_kwargs.update(self.params)
def before_run(self):
if self.wandb is None:
self.import_wandb()
if self.init_kwargs:
if not self.wandb.run:
self.wandb.init(**self.init_kwargs)
else:
if not self.wandb.run:
self.wandb.init()
@dataclass
class AvalancheMethod(
Method,
HyperParameters,
Generic[StrategyType],
target_setting=ContinualSLSetting,
):
"""Base class for all the Methods adapted from Avalanche."""
# Name for the 'family' of methods, use to differentiate methods with the same name.
family: ClassVar[str] = "avalanche"
# The Strategy class to use for this Method. Subclasses have to add this property.
strategy_class: ClassVar[Type[StrategyType]] = BaseStrategy
# TODO: Maybe use a 'PluginClass', so that we can avoid subclassing both the
# plugin and the strategy when we need to patch something in the plugin.
plugin_class: ClassVar[Optional[Type[StrategyPlugin]]]
# Class Variable to hold the types of models available as options for the `model`
# field below.
available_models: ClassVar[Dict[str, Type[nn.Module]]] = {
"simple_cnn": SimpleCNN,
"simple_mlp": SimpleMLP,
"mt_simple_cnn": MTSimpleCNN,
"mt_simple_mlp": MTSimpleMLP,
}
# Class Variable to hold the types of optimizers available for the `optimizer` field
# below.
available_optimizers: ClassVar[Dict[str, Type[Optimizer]]] = {
"sgd": SGD,
"adam": optim.Adam,
"rmsprop": optim.RMSprop,
}
# Class variable to hold the types of loss functions available for the `criterion`
# field below.
available_criterions: ClassVar[Dict[str, Type[nn.Module]]] = {
"cross_entropy_loss": nn.CrossEntropyLoss,
}
# The model.
model: Union[Module, Type[Module]] = choice(available_models, default=SimpleCNN)
# The optimizer to use.
optimizer: Union[Optimizer, Type[Optimizer]] = choice(available_optimizers, default=optim.Adam)
# The loss criterion to use.
criterion: Union[Module, Type[Module]] = choice(
available_criterions, default=nn.CrossEntropyLoss
)
# The train minibatch size.
train_mb_size: int = uniform(1, 2048, default=64)
# The number of training epochs.
train_epochs: int = uniform(1, 100, default=5)
# The eval minibatch size.
eval_mb_size: int = 1
# The device to use. Defaults to None (cpu).
device: torch.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# Plugins to be added. Defaults to None.
plugins: Optional[List[StrategyPlugin]] = list_field(default=None, cmd=False, to_dict=False)
# (optional) instance of EvaluationPlugin for logging and metric computations.
evaluator: Optional[EvaluationPlugin] = field(None, cmd=False, to_dict=False)
# The frequency of the calls to `eval` inside the training loop.
# if -1: no evaluation during training.
# if 0: calls `eval` after the final epoch of each training
# experience.
# if >0: calls `eval` every `eval_every` epochs and at the end
# of all the epochs for a single experience.
eval_every: int = -1
# Learning rate of the optimizer.
learning_rate: float = log_uniform(1e-6, 1e-2, default=1e-3)
# L2 regularization term for the model weights.
weight_decay: float = log_uniform(1e-12, 1e-3, default=1e-6)
# Hidden size of the model, when applicable.
hidden_size: int = uniform(128, 1024, default=512)
# Number of workers of the dataloader. Defaults to 4.
num_workers: int = 4
def __post_init__(self):
super().__post_init__()
# Count the number of calls to `configure`. (useful when running sweeps, as we
# reuse the Method instance.)
self._n_configures: int = 0
self.setting: ClassIncrementalSetting
self.cl_strategy: StrategyType
def configure(self, setting: ClassIncrementalSetting) -> None:
self.setting = setting
self.model = self.create_model(setting).to(self.device)
# Select the loss function to use.
if not isinstance(self.criterion, nn.Module):
self.criterion = self.criterion()
metrics = [
accuracy_metrics(epoch=True, experience=True, stream=True),
forgetting_metrics(experience=True, stream=True),
loss_metrics(minibatch=False, epoch=True, experience=True, stream=True),
]
loggers = [
# BUG: evaluation.py:94, _update_metrics:
# before_training() takes 2 positional arguments but 3 were given
# default_logger,
InteractiveLogger(),
]
if setting.wandb and setting.wandb.project:
wandb_logger = WandBLogger(
project_name=setting.wandb.project,
run_name=setting.wandb.run_name,
params=setting.wandb.wandb_init_kwargs(),
)
loggers.append(wandb_logger)
self.evaluator = EvaluationPlugin(
*metrics,
loggers=loggers,
)
self.optimizer = self.make_optimizer()
# Actually initialize the strategy using the fields on `self`.
self.cl_strategy: StrategyType = self.create_cl_strategy(setting)
if setting.monitor_training_performance and (
type(self).environment_to_experience is AvalancheMethod.environment_to_experience
):
warnings.warn(
UserWarning(
colorize(
"This Setting would like to monitor the online training "
"performance, which means that the rewards/labels (`y`) are "
"returned after sending an action (prediction) to the training "
"environment."
"\n"
"However, Avalanche does not currently support training on "
"'active' dataloaders or gym environments, and needs access to "
"the 'x' and 'y' at the same time, as is usually the case in "
"Supervised CL."
"\n"
"Therefore, the current solution I've found for this issue is "
"to iterate once over the training environment, sending it "
"(by default random) actions, in order to create an "
"'Experience' object expected by the Avalanche Strategies."
"\n"
"Concretely, this means that, unless you overwrite the "
"`environment_to_experience` method, **your online performance "
"score will be limited to chance accuracy!**",
"yellow",
)
)
)
def create_cl_strategy(self, setting: ClassIncrementalSetting) -> StrategyType:
strategy_constructor_params: List[str] = list(
inspect.signature(self.strategy_class.__init__).parameters.keys()
)
cl_strategy_kwargs = {
f.name: getattr(self, f.name)
for f in fields(self)
if f.name in strategy_constructor_params
}
return self.strategy_class(**cl_strategy_kwargs)
def create_model(self, setting: ClassIncrementalSetting) -> Module:
"""Create the Model for the setting.
Parameters
----------
setting : ClassIncrementalSetting
The Setting on which this Method will be applied.
Returns
-------
Module
The Model to be used, which will be passed to the Strategy constructor.
"""
image_space: Image = setting.observation_space.x
input_dims = flatdim(image_space)
assert isinstance(
setting.action_space, spaces.Discrete
), "assume a classification problem for now."
num_classes = setting.action_space.n
if setting.task_labels_at_train_time:
if setting.task_labels_at_test_time:
if self.model is SimpleCNN and MTSimpleCNN in self.available_models.values():
self.model = MTSimpleCNN
logger.info(
f"Upgrading the model to a {MTSimpleCNN}, since task-labels "
f"are available at train and test time."
)
if self.model is SimpleMLP and MTSimpleMLP in self.available_models.values():
self.model = MTSimpleMLP
logger.info(
f"Upgrading the model to a {MTSimpleMLP}, since task-labels "
f"are available at train and test time."
)
if isinstance(self.model, nn.Module):
if self._n_configures > 0:
logger.info("Resetting the model, since this isn't the first run.")
self.model = type(self.model)
self._n_configures += 1
else:
logger.info(f"Using model {self.model}.")
return self.model
if self.model is SimpleMLP:
return self.model(
input_size=input_dims,
hidden_size=self.hidden_size,
num_classes=num_classes,
)
if self.model is MTSimpleMLP:
return self.model(input_size=input_dims, hidden_size=self.hidden_size)
if self.model is SimpleCNN:
return self.model(num_classes=num_classes)
# self.model is most probably a type of nn.Module, so we instantiate it.
# These other models (MTSimpleCNN) don't seem to take any kwargs.
return self.model()
def make_optimizer(self) -> Optimizer:
"""Creates the Optimizer."""
optimizer_class = self.optimizer
if isinstance(self.optimizer, Optimizer):
optimizer_class = type(self.optimizer)
return optimizer_class(
self.model.parameters(),
lr=self.learning_rate,
weight_decay=self.weight_decay,
)
def fit(self, train_env: PassiveEnvironment, valid_env: PassiveEnvironment):
train_exp = self.environment_to_experience(train_env, setting=self.setting)
valid_exp = self.environment_to_experience(valid_env, setting=self.setting)
self.cl_strategy.train(train_exp, eval_streams=[valid_exp], num_workers=self.num_workers)
def get_actions(
self,
observations: ClassIncrementalSetting.Observations,
action_space: gym.Space,
) -> ClassIncrementalSetting.Actions:
observations = observations.to(self.device)
with torch.no_grad():
x = observations.x
task_labels = observations.task_labels
logits = avalanche_forward(self.model, x=x, task_labels=task_labels)
if task_labels is not None:
# If task labels are available, figure out the possible classes for
# each task, and 'mask out' those so they aren't predicted.
y_pred = smart_class_prediction(
logits, task_labels, setting=self.setting, train=False
)
else:
y_pred = logits.argmax(-1)
return self.target_setting.Actions(y_pred=y_pred)
def set_testing(self):
self.model.current_task_id = None
return super().set_testing()
def on_task_switch(self, task_id: Optional[int]) -> None:
if self.training:
# No need to tell the cl_strategy, because we call `.train` which calls
# `before_training_exp` with the current exp (the current task).
self.model.current_task_id = task_id
else:
# TODO: In Sequoia, the test 'epoch' goes through the sequence of tasks, not
# necessarily in the same order as during training, while in Avalanche the
# 'eval' occurs on a per-task basis.
# TODO: There is a bug with task-incremental setting, where during testing
# the algo might be tested on tasks it hasn't built an output layer for yet,
# but building this layer requires calling `adaptation(dataset)` and this
# dataset will be iterated on, which isn't great in the case of the test
# env...
# encountered before.
# During test-time, there might be a task boundary, and we need to let the
# cl_strategy and the plugins know.
# TODO: Get this working, figure out what the plugins expect to retrieve
# from the cl_strategy in this callback.
pass
def get_search_space(self, setting: ClassIncrementalSetting):
return self.get_orion_space()
def adapt_to_new_hparams(self, new_hparams: Dict):
for k, v in new_hparams.items():
if isinstance(v, dict):
raise NotImplementedError(f"todo: set hparam {k} to value {v}")
setattr(self, k, v)
def environment_to_experience(self, env: PassiveEnvironment, setting: SLSetting) -> Experience:
"""
"Converts" the PassiveEnvironments (dataloaders) from Sequoia
into an Experience object usable by the Avalanche Strategies. By default, this
just iterates through the environment, giving back the actions from the
`get_actions` method.
NOTE: You could instead train an online model here, in order to get better
online performance!
"""
all_observations: List[Observations] = []
all_rewards: List[Rewards] = []
for batch in tqdm.tqdm(env, desc="Converting environment into TensorDataset"):
observations: Observations
rewards: Optional[Rewards]
if isinstance(batch, Observations):
observations = batch
rewards = None
else:
assert isinstance(batch, tuple) and len(batch) == 2
observations, rewards = batch
if rewards is None:
# Need to send actions to the env before we can actually get the
# associated Reward. Here there are (at least) three options to choose
# from:
# Option 1: Select action at random:
action = env.action_space.sample()
if observations.batch_size != action.shape[0]:
action = action[: observations.batch_size]
rewards: Rewards = env.send(action)
# Option 2: Use the current model, in 'inference' mode:
# action = self.get_actions(observations, action_space=env.action_space)
# rewards: Rewards = env.send(action)
# Option 3: Train an online model:
# # NOTE: You might have to change this for your strategy. For instance,
# # currently does not take any plugins into consideration.
# self.cl_strategy.optimizer.zero_grad()
# x = observations.x.to(self.cl_strategy.device)
# task_labels = observations.task_labels
# logits = avalanche_forward(self.model, x=x, task_labels=task_labels)
# y_pred = logits.argmax(-1)
# action = self.target_setting.Actions(y_pred=y_pred)
# rewards: Rewards = env.send(action)
# y = rewards.y.to(self.cl_strategy.device)
# # Train the model:
# loss = self.cl_strategy.criterion(logits, y)
# loss.backward()
# self.cl_strategy.optimizer.step()
all_observations.append(observations)
all_rewards.append(rewards)
# Stack all the observations into a single `Observations` object:
stacked_observations: Observations = Observations.concatenate(all_observations)
stacked_rewards: Rewards = Rewards.concatenate(all_rewards)
# BUG: Cuda errors, probably due to indexing into a tensor on different device
# /numpy/etc.
stacked_observations = stacked_observations.cpu()
stacked_rewards = stacked_rewards.cpu()
x = stacked_observations.x
task_labels = stacked_observations.task_labels
y = stacked_rewards.y
return SequoiaExperience(env=env, setting=setting, x=x, y=y, task_labels=task_labels)
def test_epoch(strategy, test_env: ContinualSLTestEnvironment, **kwargs):
strategy.is_training = False
strategy.model.eval()
strategy.model.to(strategy.device)
# strategy.before_eval(**kwargs)
# Data Adaptation
# strategy.before_eval_dataset_adaptation(**kwargs)
# strategy.eval_dataset_adaptation(**kwargs)
# strategy.after_eval_dataset_adaptation(**kwargs)
# strategy.make_eval_dataloader(**kwargs)
# strategy.before_eval_exp(**kwargs)
# strategy.eval_epoch(**kwargs)
test_epoch_gym_env(strategy, test_env)
# strategy.after_eval_exp(**kwargs)
def test_epoch_gym_env(strategy: BaseStrategy, test_env: ContinualSLTestEnvironment, **kwargs):
strategy.mb_it = 0
episode = 0
strategy.experience = test_env
total_steps = 0
max_episodes = 1 # Only one 'episode' / 'epoch'.
while not test_env.is_closed() and episode < max_episodes:
observations: Observations = test_env.reset()
done = False
step = 0
with tqdm.tqdm(desc="Eval epoch") as pbar:
while not done:
# strategy.before_eval_iteration(**kwargs)
strategy.mb_x = observations.x
strategy.mb_task_id = observations.task_labels
strategy.mb_x = strategy.mb_x.to(strategy.device)
# IDEA: Should probably return a random action whenever we have task
# labels in the test loop the task id isn't a known one in the model:
# strategy.before_eval_forward(**kwargs)
strategy.logits = avalanche_forward(
model=strategy.model,
x=strategy.mb_x,
task_labels=strategy.mb_task_id,
)
y_pred = strategy.logits.argmax(-1)
actions = Actions(y_pred=y_pred)
observations, rewards, done, info = test_env.step(actions)
step += 1
pbar.update()
total_steps += 1
if not isinstance(done, bool):
assert False, done
strategy.mb_y = rewards.y.to(strategy.device) if rewards is not None else None
# strategy.after_eval_forward(**kwargs)
strategy.mb_it += 1
strategy.loss = strategy.criterion(strategy.logits, strategy.mb_y)
# strategy.after_eval_iteration(**kwargs)
pbar.set_postfix(
{
"Episode": f"{episode}/{max_episodes}",
"step": f"{step}",
"total_steps": f"{total_steps}",
"loss": f"{strategy.loss.item()}",
}
)
episode += 1
================================================
FILE: sequoia/methods/avalanche_methods/base_test.py
================================================
import inspect
from inspect import Signature, _empty, getsourcefile
from typing import ClassVar, List, Optional, Type
import pytest
import tqdm
from avalanche.models import SimpleCNN, SimpleMLP
from avalanche.models.utils import avalanche_forward
from avalanche.training.strategies import BaseStrategy
from sequoia.common.config import Config
from sequoia.conftest import slow
from sequoia.methods.method_test import MethodTests
from sequoia.settings.sl import ClassIncrementalSetting, SLSetting
from sequoia.settings.sl.incremental.objects import Observations, Rewards
from .base import AvalancheMethod
from .experience import SequoiaExperience
from .patched_models import MTSimpleCNN, MTSimpleMLP
class _TestAvalancheMethod(MethodTests):
Method: ClassVar[Type[AvalancheMethod]] = AvalancheMethod
# Names of (hyper-)parameters which are allowed to have a different default value in
# Sequoia compared to their implementations in Avalanche.
ignored_parameter_differences: ClassVar[List[str]] = [
"plugins",
"device",
"eval_mb_size",
"criterion",
"train_mb_size",
"train_epochs",
"evaluator",
]
@classmethod
@pytest.fixture(params=[SimpleCNN, SimpleMLP, MTSimpleCNN, MTSimpleMLP])
def method(cls, config: Config, request) -> AvalancheMethod:
"""Fixture that returns the Method instance to use when testing/debugging."""
model_type = request.param
return cls.Method(model=model_type, train_mb_size=10, train_epochs=1)
def test_hparams_have_same_defaults_as_in_avalanche(self):
strategy_type: Type[BaseStrategy] = self.Method.strategy_class
method = self.Method()
strategy_constructor: Signature = inspect.signature(strategy_type.__init__)
strategy_init_params = strategy_constructor.parameters
# TODO: Use the plugin constructor as the reference, rather than the Strategy
# constructor.
# plugin_constructor
for parameter_name, parameter in strategy_init_params.items():
if parameter.default is _empty:
continue
assert hasattr(method, parameter_name)
method_value = getattr(method, parameter_name)
# Ignore mismatches in some parameters, like `device`.
if parameter_name in self.ignored_parameter_differences:
continue
assert method_value == parameter.default, (
f"{self.Method.__name__} in Sequoia has different default value for "
f"hyper-parameter '{parameter_name}' than in Avalanche: \n"
f"\t{method_value} != {parameter.default}\n"
f"Path to sequoia implementation: {getsourcefile(self.Method)}\n"
f"Path to SB3 implementation: {getsourcefile(strategy_type)}\n"
)
def validate_results(
self,
setting: SLSetting,
method: AvalancheMethod,
results: SLSetting.Results,
) -> None:
assert results
assert results.objective
# TODO: Set some 'reasonable' bounds on the performance here, depending on the
# setting/dataset.# def validate_results
@slow
@pytest.mark.timeout(60)
def test_short_sl_track(
self,
method: AvalancheMethod,
short_sl_track_setting: ClassIncrementalSetting,
config: Config,
):
# Use the same batch size as the setting, since it's shorter than usual.
method.train_mb_size = short_sl_track_setting.batch_size
results = short_sl_track_setting.apply(method, config=config)
# TODO: Set up a more reasonable bound on the expected performance. For now this
# is fine as we're just debugging: the test passes as long as there is a results
# object that contains a non-zero online performance (meaning that the setting
# was monitoring training performance correctly).
assert 0 < results.average_online_performance.objective
assert 0 < results.average_final_performance.objective
def test_warning_if_environment_to_experience_isnt_overwritten(short_sl_track_setting):
"""When"""
method = AvalancheMethod()
assert short_sl_track_setting.monitor_training_performance
with pytest.warns(UserWarning, match="chance accuracy"):
method.configure(short_sl_track_setting)
class MyDummyMethod(AvalancheMethod):
def environment_to_experience(self, env, setting):
all_observations: List[Observations] = []
all_rewards: List[Rewards] = []
for batch in tqdm.tqdm(env, desc="Converting environment into TensorDataset"):
observations: Observations
rewards: Optional[Rewards]
if isinstance(batch, Observations):
observations = batch
rewards = None
else:
assert isinstance(batch, tuple) and len(batch) == 2
observations, rewards = batch
if rewards is None:
# Need to send actions to the env before we can actually get the
# associated Reward. Here there are (at least) three options to choose
# from:
# Option 1: Select action at random:
# action = env.action_space.sample()
# if observations.batch_size != action.shape[0]:
# action = action[: observations.batch_size]
# rewards: Rewards = env.send(action)
# Option 2: Use the current model, in 'inference' mode:
# action = self.get_actions(observations, action_space=env.action_space)
# rewards: Rewards = env.send(action)
# Option 3: Train an online model:
# NOTE: You might have to change this for your strategy. For instance,
# currently does not take any plugins into consideration.
self.cl_strategy.optimizer.zero_grad()
x = observations.x.to(self.cl_strategy.device)
task_labels = observations.task_labels
logits = avalanche_forward(self.model, x=x, task_labels=task_labels)
y_pred = logits.argmax(-1)
action = self.target_setting.Actions(y_pred=y_pred)
rewards: Rewards = env.send(action)
y = rewards.y.to(self.cl_strategy.device)
# Train the model:
loss = self.cl_strategy.criterion(logits, y)
loss.backward()
self.cl_strategy.optimizer.step()
all_observations.append(observations)
all_rewards.append(rewards)
# Stack all the observations into a single `Observations` object:
stacked_observations: Observations = Observations.concatenate(all_observations)
x = stacked_observations.x
task_labels = stacked_observations.task_labels
stacked_rewards: Rewards = Rewards.concatenate(all_rewards)
y = stacked_rewards.y
return SequoiaExperience(env=env, setting=setting, x=x, y=y, task_labels=task_labels)
def test_no_warning_if_environment_to_experience_is_overwritten(short_sl_track_setting):
"""When the Method doesn't overwrite the `environment_to_experience` method, we
raise a Warning to let the User know that they can only expect chance online
accuracy.
"""
method = MyDummyMethod()
assert short_sl_track_setting.monitor_training_performance
with pytest.warns(None) as record:
method.configure(short_sl_track_setting)
assert len(record) == 0
================================================
FILE: sequoia/methods/avalanche_methods/conftest.py
================================================
from pathlib import Path
import pytest
import torch
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from torch.utils.data import TensorDataset
from sequoia.common.config import Config
collect_ignore = []
collect_ignore_glob = []
try:
from avalanche.training.strategies import BaseStrategy # type: ignore
except ImportError:
# pytest.skip(reason="Needs avalanche", allow_module_level=True)
collect_ignore_glob.append("sequoia/methods/avalanche/**.py")
# FIXME: Overwriting the 'config' fixture from before so it's 'session' scoped instead.
@pytest.fixture(scope="session")
def config(tmp_path_factory):
test_log_dir = tmp_path_factory.mktemp("test_log_dir")
return Config(debug=True, seed=123, log_dir=test_log_dir)
@pytest.fixture(scope="session")
def fast_scenario(use_task_labels=False, shuffle=True):
"""Copied directly from Avalanche in "tests/unit_tests_utils.py".
Not used anywhere atm, but could be used as inspiration for writing quicker tests
in Sequoia.
"""
n_samples_per_class = 100
dataset = make_classification(
n_samples=10 * n_samples_per_class,
n_classes=10,
n_features=6,
n_informative=6,
n_redundant=0,
)
X = torch.from_numpy(dataset[0]).float()
y = torch.from_numpy(dataset[1]).long()
train_X, test_X, train_y, test_y = train_test_split(
X, y, train_size=0.6, shuffle=True, stratify=y
)
from avalanche.benchmarks import nc_benchmark # type: ignore
train_dataset = TensorDataset(train_X, train_y)
test_dataset = TensorDataset(test_X, test_y)
my_nc_benchmark = nc_benchmark(
train_dataset, test_dataset, 5, task_labels=use_task_labels, shuffle=shuffle
)
return my_nc_benchmark
================================================
FILE: sequoia/methods/avalanche_methods/cwr_star.py
================================================
""" Method based on CWRStar from [Avalanche](https://github.com/ContinualAI/avalanche).
See `avalanche.training.plugins.cwr_star.CWRStarPlugin` or
`avalanche.training.strategies.strategy_wrappers.CWRStar` for more info.
"""
from dataclasses import dataclass
from typing import ClassVar, Optional, Type
from avalanche.training.strategies import BaseStrategy, CWRStar
from sequoia.methods import register_method
from sequoia.settings.sl import TaskIncrementalSLSetting
from .base import AvalancheMethod
@register_method
@dataclass
class CWRStarMethod(AvalancheMethod[CWRStar]):
"""CWRStar strategy from Avalanche.
See CWRStar plugin for details.
This strategy does not use task identities.
See the parent class `AvalancheMethod` for the other hyper-parameters and methods.
"""
# Name of the CWR layer. Defaults to None, which means that the last fully connected
# layer will be used.
cwr_layer_name: Optional[str] = None
strategy_class: ClassVar[Type[BaseStrategy]] = CWRStar
if __name__ == "__main__":
from simple_parsing import ArgumentParser
setting = TaskIncrementalSLSetting(
dataset="mnist", nb_tasks=5, monitor_training_performance=True
)
# Create the Method, either manually or through the command-line:
parser = ArgumentParser(__doc__)
parser.add_arguments(CWRStarMethod, "method")
args = parser.parse_args()
method: CWRStarMethod = args.method
results = setting.apply(method)
================================================
FILE: sequoia/methods/avalanche_methods/cwr_star_test.py
================================================
""" WIP: Tests for the CWRStar Method.
For now this only inherits the tests from the AvalancheMethod class.
"""
from typing import ClassVar, Type
from .base import AvalancheMethod
from .base_test import _TestAvalancheMethod
from .cwr_star import CWRStarMethod
class TestCWRStarMethod(_TestAvalancheMethod):
Method: ClassVar[Type[AvalancheMethod]] = CWRStarMethod
================================================
FILE: sequoia/methods/avalanche_methods/ewc.py
================================================
""" Method based on EWC from [Avalanche](https://github.com/ContinualAI/avalanche).
See `avalanche.training.plugins.ewc.EWCPlugin` or
`avalanche.training.strategies.strategy_wrappers.EWC` for more info.
"""
from dataclasses import dataclass
from typing import ClassVar, Dict, Optional, Type, Union
from avalanche.models import SimpleCNN, SimpleMLP
from avalanche.training.strategies import EWC, BaseStrategy
from simple_parsing import ArgumentParser
from simple_parsing.helpers import choice
from simple_parsing.helpers.hparams import categorical, uniform
from torch import nn
from sequoia.methods import register_method
from sequoia.settings.sl import TaskIncrementalSLSetting
from .base import AvalancheMethod
@register_method
@dataclass
class EWCMethod(AvalancheMethod[EWC]):
"""
Elastic Weight Consolidation (EWC) strategy from Avalanche.
See EWC plugin for details.
This strategy does not use task identities.
See the parent class `AvalancheMethod` for the other hyper-parameters and methods.
"""
strategy_class: ClassVar[Type[BaseStrategy]] = EWC
# Class Variable to hold the types of models available as options for the `model`
# field below.
available_models: ClassVar[Dict[str, Type[nn.Module]]] = {
"simple_cnn": SimpleCNN,
"simple_mlp": SimpleMLP,
# "mt_simple_cnn": MTSimpleCNN, # These two still have some bugs in their loss
# "mt_simple_mlp": MTSimpleMLP, # These two still have some bugs in their loss
}
# The model.
model: Union[nn.Module, Type[nn.Module]] = choice(available_models, default=SimpleCNN)
# Hyperparameter to weigh the penalty inside the total loss. The larger the lambda,
# the larger the regularization.
ewc_lambda: float = uniform(1e-3, 1.0, default=0.1) # todo: set the right value to use here.
# `separate` to keep a separate penalty for each previous experience. `online` to
# keep a single penalty summed with a decay factor over all previous tasks.
mode: str = categorical("separate", "online", default="separate")
# Used only if `mode` is 'online'. It specify the decay term of the
# importance matrix.
decay_factor: Optional[float] = uniform(0.0, 1.0, default=0.9)
# if True, keep in memory both parameter values and importances for all previous
# task, for all modes. If False, keep only last parameter values and importances. If
# mode is `separate`, the value of `keep_importance_data` is set to be True.
keep_importance_data: bool = categorical(True, False, default=False)
if __name__ == "__main__":
setting = TaskIncrementalSLSetting(
dataset="mnist", nb_tasks=5, monitor_training_performance=True
)
# Create the Method, either manually or through the command-line:
parser = ArgumentParser(__doc__)
parser.add_arguments(EWCMethod, "method")
args = parser.parse_args()
method: EWCMethod = args.method
results = setting.apply(method)
================================================
FILE: sequoia/methods/avalanche_methods/ewc_test.py
================================================
""" WIP: Tests for the EWC Method.
For now this only inherits the tests from the AvalancheMethod class.
"""
from typing import ClassVar, List, Type
import pytest
from avalanche.models import SimpleCNN, SimpleMLP
from torch.nn import Module
from sequoia.common import Config
from sequoia.conftest import xfail_param
from sequoia.settings.sl import IncrementalSLSetting, TaskIncrementalSLSetting
from .base import AvalancheMethod
from .base_test import _TestAvalancheMethod
from .ewc import EWCMethod
from .patched_models import MTSimpleCNN, MTSimpleMLP
class TestEWCMethod(_TestAvalancheMethod):
Method: ClassVar[Type[AvalancheMethod]] = EWCMethod
ignored_parameter_differences: ClassVar[
List[str]
] = _TestAvalancheMethod.ignored_parameter_differences + [
"decay_factor",
]
@classmethod
@pytest.fixture(
params=[
SimpleCNN,
SimpleMLP,
xfail_param(
MTSimpleCNN,
reason=(
"Shape Mismatch between the saved parameter importance and the "
"current weight tensor in EWC plugin."
),
),
xfail_param(
MTSimpleMLP,
reason=(
"Shape Mismatch between the saved parameter importance and the "
"current weight tensor in EWC plugin."
),
),
]
)
def method(cls, config: Config, request) -> AvalancheMethod:
"""Fixture that returns the Method instance to use when testing/debugging."""
model_type = request.param
return cls.Method(model=model_type, train_mb_size=10, train_epochs=1)
@pytest.mark.timeout(60)
@pytest.mark.parametrize(
"model_type",
[
SimpleCNN,
SimpleMLP,
# MTSimpleCNN,
xfail_param(
MTSimpleCNN,
reason=(
"Shape Mismatch between the saved parameter importance and the "
"current weight tensor in EWC plugin."
),
),
# MTSimpleMLP,
xfail_param(
MTSimpleMLP,
reason=(
"Shape Mismatch between the saved parameter importance and the "
"current weight tensor in EWC plugin."
),
),
],
)
def test_short_task_incremental_setting(
self,
model_type: Type[Module],
short_task_incremental_setting: TaskIncrementalSLSetting,
config: Config,
):
method = self.Method(model=model_type, train_mb_size=10, train_epochs=1)
results = short_task_incremental_setting.apply(method, config)
assert 0.05 < results.average_final_performance.objective
@pytest.mark.timeout(60)
@pytest.mark.parametrize(
"model_type",
[
SimpleCNN,
SimpleMLP,
xfail_param(
MTSimpleCNN,
reason=(
"Shape Mismatch between the saved parameter importance and the "
"current weight tensor in EWC plugin."
),
),
# MTSimpleMLP,
xfail_param(
MTSimpleMLP,
reason=(
"Shape Mismatch between the saved parameter importance and the "
"current weight tensor in EWC plugin."
),
),
],
)
def test_short_class_incremental_setting(
self,
model_type: Type[Module],
short_class_incremental_setting: IncrementalSLSetting,
config: Config,
):
method = self.Method(model=model_type, train_mb_size=10, train_epochs=1)
results = short_class_incremental_setting.apply(method, config)
assert 0.05 < results.average_final_performance.objective
# @pytest.mark.timeout(60)
# @pytest.mark.parametrize(
# "model_type",
# [
# SimpleCNN,
# SimpleMLP,
# xfail_param(
# MTSimpleCNN,
# reason=(
# "Shape Mismatch between the saved parameter importance and the "
# "current weight tensor in EWC plugin."
# ),
# ),
# # MTSimpleMLP,
# xfail_param(
# MTSimpleMLP,
# reason=(
# "Shape Mismatch between the saved parameter importance and the "
# "current weight tensor in EWC plugin."
# ),
# ),
# ],
# )
# def test_short_continual_sl_setting(
# self,
# model_type: Type[Module],
# short_continual_sl_setting: ContinualSLSetting,
# config: Config,
# ):
# super().test_short_continual_sl_setting(
# model_type=model_type,
# short_continual_sl_setting=short_continual_sl_setting,
# config=config,
# )
# @pytest.mark.timeout(60)
# @pytest.mark.parametrize(
# "model_type",
# [
# SimpleCNN,
# SimpleMLP,
# xfail_param(
# MTSimpleCNN,
# reason=(
# "Shape Mismatch between the saved parameter importance and the "
# "current weight tensor in EWC plugin."
# ),
# ),
# # MTSimpleMLP,
# xfail_param(
# MTSimpleMLP,
# reason=(
# "Shape Mismatch between the saved parameter importance and the "
# "current weight tensor in EWC plugin."
# ),
# ),
# ],
# )
# def test_short_discrete_task_agnostic_sl_setting(
# self,
# model_type: Type[Module],
# short_discrete_task_agnostic_sl_setting: DiscreteTaskAgnosticSLSetting,
# config: Config,
# ):
# super().test_short_discrete_task_agnostic_sl_setting(
# model_type=model_type,
# short_discrete_task_agnostic_sl_setting=short_discrete_task_agnostic_sl_setting,
# config=config,
# )
================================================
FILE: sequoia/methods/avalanche_methods/experience.py
================================================
""" 'Wrapper' around a PassiveEnvironment from Sequoia, disguising it as an 'Experience'
from Avalanche.
"""
from typing import List, Optional
import tqdm
from avalanche.benchmarks.scenarios import Experience
from avalanche.benchmarks.utils.avalanche_dataset import AvalancheDataset, AvalancheDatasetType
from torch import Tensor
from torch.utils.data import TensorDataset
from sequoia.common.gym_wrappers.utils import IterableWrapper
from sequoia.settings.sl import IncrementalSLSetting, PassiveEnvironment, SLSetting
from sequoia.settings.sl.incremental.objects import Observations, Rewards
class SequoiaExperience(IterableWrapper, Experience):
def __init__(
self,
env: PassiveEnvironment,
setting: IncrementalSLSetting,
x: Tensor = None,
y: Tensor = None,
task_labels: Tensor = None,
):
super().__init__(env=env)
self.setting = setting
self.type: str
if isinstance(setting, IncrementalSLSetting):
self.task_id = setting.current_task_id
else:
# No known task, or we don't have access to the task ID, so just consider
# this to come from the first task.
self.task_id = 0
if env is setting.train_env:
self.type = "Train"
self.transforms = setting.train_transforms
elif env is setting.val_env:
self.type = "Valid"
self.transforms = setting.val_transforms
else:
self.type = "Test"
assert env is setting.test_env
self.transforms = setting.test_transforms
self.name = f"{self.type}_{self.task_id}"
if x is None and y is None and task_labels is None:
# Collect the x, y, and perhaps t if they aren't provided.
all_observations: List[Observations] = []
all_rewards: List[Rewards] = []
for batch in tqdm.tqdm(self, desc="Converting environment into TensorDataset"):
observations: Observations
rewards: Optional[Rewards]
if isinstance(batch, Observations):
observations = batch
rewards = None
else:
assert isinstance(batch, tuple) and len(batch) == 2
observations, rewards = batch
if rewards is None:
# Need to send actions to the env before we can actually get the
# associated Reward.
# Here we sample a random action (no other choice really..) and so we
# are going to get bad results in case the online performance is being
# evaluated.
action = self.env.action_space.sample()
if observations.batch_size != action.shape[0]:
action = action[: observations.batch_size]
rewards = self.env.send(action)
all_observations.append(observations)
all_rewards.append(rewards)
# TODO: This will be absolutely unfeasable for larger dataset like ImageNet.
stacked_observations: Observations = Observations.concatenate(all_observations)
x = stacked_observations.x
task_labels = stacked_observations.task_labels
assert all(
y_i is not None for y in all_rewards for y_i in y
), "Need fully labeled train dataset for now."
stacked_rewards: Rewards = Rewards.concatenate(all_rewards)
y = stacked_rewards.y
if task_labels is not None and all(t is None for t in task_labels):
# The task labels are None, even at training time, which indicates this
# is probably a `ContinualSLSetting`
task_labels = None
elif isinstance(task_labels, Tensor):
task_labels = task_labels.cpu().numpy().tolist()
dataset = TensorDataset(x, y)
self._tensor_dataset = dataset
self._dataset = AvalancheDataset(
dataset=dataset,
task_labels=task_labels,
targets=y.tolist(),
dataset_type=AvalancheDatasetType.CLASSIFICATION,
)
# self.task_pattern_indices = {}
# self.task_set = ...
# class DummyDataset(AvalancheDataset):
# pass
# def train(self):
# return self
# self._dataset = self
# self.tasks_pattern_indices = {} #dict({0: np.arange(len(self._dataset))})
# self.task_set = ... #_TaskSubsetDict(self._dataset)
# self._dataset = env
# from avalanche.benchmarks import GenericScenarioStream
# class FakeStream(GenericScenarioStream):
# pass
# self.origin_stream = FakeStream("train", scenario="whatever")
# self.origin_stream.name = "train"
@property
def dataset(self) -> AvalancheDataset:
return self._dataset
@dataset.setter
def dataset(self, value: AvalancheDataset) -> None:
self._dataset = value
@property
def task_label(self):
"""
The task label. This value will never have value "None". However,
for scenarios that don't produce task labels a placeholder value like 0
is usually set. Beware that this field is meant as a shortcut to obtain
a unique task label: it assumes that only patterns labeled with a
single task label are present. If this experience contains patterns from
multiple tasks, accessing this property will result in an exception.
"""
if not self.setting.task_labels_at_test_time:
return 0
if self.type == "Test" and self.setting.task_labels_at_test_time:
raise RuntimeError("More than one tasks present, can't use this property.")
return self.task_id
@property
def task_labels(self):
return self._tensor_dataset.tensors[-1]
@property
def current_experience(self):
# Return the index of the
return self.task_id
@property
def origin_stream(self) -> SLSetting:
# NOTE: This
class DummyStream(list):
name = self.name
# raise NotImplementedError
return DummyStream()
# def train(self):
# return self
================================================
FILE: sequoia/methods/avalanche_methods/gdumb.py
================================================
""" Method based on GDumb from [Avalanche](https://github.com/ContinualAI/avalanche).
See `avalanche.training.plugins.gdumb.GDumbPlugin` or
`avalanche.training.strategies.strategy_wrappers.GDumb` for more info.
BUG: There appears to be a bug in the GDumb plugin, caused by a mismatch in the tensor
shapes when concatenating them into a TensorDataset, when batch size > 1.
"""
from collections import defaultdict
from dataclasses import dataclass
from typing import Any, ClassVar, Dict, List, Optional, Tuple, Type
import torch
import tqdm
from avalanche.benchmarks.utils import AvalancheConcatDataset
from avalanche.training.plugins.gdumb import GDumbPlugin as _GDumbPlugin
from avalanche.training.strategies import BaseStrategy, GDumb
from simple_parsing import ArgumentParser
from simple_parsing.helpers.hparams import uniform
from torch import Tensor
from torch.utils.data import TensorDataset
from sequoia.methods import register_method
from sequoia.settings.sl import ClassIncrementalSetting, TaskIncrementalSLSetting
from sequoia.utils.logging_utils import get_logger
from .base import AvalancheMethod
logger = get_logger(__name__)
class GDumbPlugin(_GDumbPlugin):
"""Patched version of the GDumbPlugin from Avalanche.
The base implementation is quite inefficient: for each new item, it does an entire
concatenation with the current dataset.
This uses lists instead, and only concatenates once.
It also uses the task labels from each sample in the dataset, rather than from the
current experience, as there might be more than one task in the dataset.
"""
def __init__(self, mem_size: int = 200):
super().__init__(mem_size=mem_size)
self.ext_mem: Dict[Any, Tuple[List[Tensor], List[Tensor]]] = {}
# count occurrences for each class
self.counter: Dict[Any, Dict[Any, int]] = {}
def after_train_dataset_adaptation(self, strategy: BaseStrategy, **kwargs):
"""Before training we make sure to organize the memory following
GDumb approach and updating the dataset accordingly.
"""
# for each pattern, add it to the memory or not
dataset = strategy.experience.dataset
pbar = tqdm.tqdm(dataset, desc="Exhausting dataset to create GDumb buffer")
for pattern, target, task_id in pbar:
target = torch.as_tensor(target)
target_value = target.item()
if len(pattern.size()) == 1:
pattern = pattern.unsqueeze(0)
current_counter = self.counter.setdefault(task_id, defaultdict(int))
current_mem = self.ext_mem.setdefault(task_id, ([], []))
if current_counter == {}:
# any positive (>0) number is ok
patterns_per_class = 1
else:
patterns_per_class = int(self.mem_size / len(current_counter.keys()))
if (
target_value not in current_counter
or current_counter[target_value] < patterns_per_class
):
# add new pattern into memory
if sum(current_counter.values()) >= self.mem_size:
# full memory: replace item from most represented class
# with current pattern
to_remove = max(current_counter, key=current_counter.get)
# dataset_size = len(current_mem)
# for j in range(dataset_size):
# if current_mem.tensors[1][j].item() == to_remove:
# current_mem.tensors[0][j] = pattern
# current_mem.tensors[1][j] = target
# break
dataset_size = len(current_mem[0])
for j in range(dataset_size):
if current_mem[1][j].item() == to_remove:
current_mem[0][j] = pattern
current_mem[1][j] = target
break
current_counter[to_remove] -= 1
else:
# memory not full: add new pattern
current_mem[0].append(pattern)
current_mem[1].append(target)
# Indicate that we've changed the number of stored instances of this
# class.
current_counter[target_value] += 1
task_datasets: Dict[Any, TensorDataset] = {}
for task_id, task_mem_tuple in self.ext_mem.items():
patterns, targets = task_mem_tuple
task_dataset = TensorDataset(torch.stack(patterns, dim=0), torch.stack(targets, dim=0))
task_datasets[task_id] = task_dataset
logger.debug(
f"There are {len(task_dataset)} entries from task {task_id} in the new " f"dataset."
)
adapted_dataset = AvalancheConcatDataset(task_datasets.values())
strategy.adapted_dataset = adapted_dataset
@register_method
@dataclass
class GDumbMethod(AvalancheMethod[GDumb]):
"""GDumb strategy from Avalanche.
See GDumbPlugin for more details.
This strategy does not use task identities.
See the parent class `AvalancheMethod` for the other hyper-parameters and methods.
"""
name: ClassVar[str] = "gdumb"
# replay buffer size.
mem_size: int = uniform(100, 1_000, default=200)
# The number of training epochs.
train_epochs: int = uniform(1, 100, default=20)
strategy_class: ClassVar[Type[BaseStrategy]] = GDumb
def create_cl_strategy(self, setting: ClassIncrementalSetting) -> GDumb:
strategy = super().create_cl_strategy(setting)
# TODO: Replace the GDumbPlugin with our own version, with the same parameters.
old_gdumb_plugin_index: Optional[int] = None
for i, plugin in enumerate(strategy.plugins):
if isinstance(plugin, _GDumbPlugin):
old_gdumb_plugin_index = i
break
if old_gdumb_plugin_index is None:
raise RuntimeError("Couldn't find the Strategy's GDumb plugin!")
old_gdumb_plugin: _GDumbPlugin = strategy.plugins.pop(old_gdumb_plugin_index)
logger.info("Replacing the GDumbPlugin with our 'patched' version.")
new_gdumb_plugin = GDumbPlugin(mem_size=old_gdumb_plugin.mem_size)
# NOTE: Might not be necessarily, since those should be empty, but here we also
# copy the state from the old plugin to the new one.
new_gdumb_plugin.ext_mem = old_gdumb_plugin.ext_mem
new_gdumb_plugin.counter = old_gdumb_plugin.counter
strategy.plugins.insert(old_gdumb_plugin_index, new_gdumb_plugin)
return strategy
if __name__ == "__main__":
setting = TaskIncrementalSLSetting(
dataset="mnist", nb_tasks=5, monitor_training_performance=True
)
# Create the Method, either manually or through the command-line:
parser = ArgumentParser(__doc__)
parser.add_arguments(GDumbMethod, "method")
args = parser.parse_args()
method: GDumbMethod = args.method
results = setting.apply(method)
================================================
FILE: sequoia/methods/avalanche_methods/gdumb_test.py
================================================
""" WIP: Tests for the GDumb Method.
For now this only inherits the tests from the AvalancheMethod class.
"""
from typing import ClassVar, Type
from .base import AvalancheMethod
from .base_test import _TestAvalancheMethod
from .gdumb import GDumbMethod
class TestGDumbMethod(_TestAvalancheMethod):
Method: ClassVar[Type[AvalancheMethod]] = GDumbMethod
================================================
FILE: sequoia/methods/avalanche_methods/gem.py
================================================
""" Method based on GEM from [Avalanche](https://github.com/ContinualAI/avalanche).
See `avalanche.training.plugins.gem.GEMPlugin` or
`avalanche.training.strategies.strategy_wrappers.GEM` for more info.
"""
from dataclasses import dataclass
from typing import ClassVar, Type
from avalanche.training.strategies import GEM, BaseStrategy
from simple_parsing import ArgumentParser
from simple_parsing.helpers.hparams import uniform
from sequoia.methods import register_method
from sequoia.settings.sl import TaskIncrementalSLSetting
from .base import AvalancheMethod
@register_method
@dataclass
class GEMMethod(AvalancheMethod[GEM]):
"""Gradient Episodic Memory (GEM) strategy from Avalanche.
See GEM plugin for details.
This strategy does not use task identities.
See the parent class `AvalancheMethod` for the other hyper-parameters and methods.
"""
# number of patterns per experience in the memory
patterns_per_exp: int = uniform(10, 1000, default=100)
# Offset to add to the projection direction in order to favour backward transfer
# (gamma in original paper).
memory_strength: float = uniform(1e-2, 1.0, default=0.5)
strategy_class: ClassVar[Type[BaseStrategy]] = GEM
if __name__ == "__main__":
setting = TaskIncrementalSLSetting(
dataset="mnist", nb_tasks=5, monitor_training_performance=True
)
# Create the Method, either manually or through the command-line:
parser = ArgumentParser(__doc__)
parser.add_arguments(GEMMethod, "method")
args = parser.parse_args()
method: GEMMethod = args.method
results = setting.apply(method)
================================================
FILE: sequoia/methods/avalanche_methods/gem_test.py
================================================
""" WIP: Tests for the GEM Method.
For now this only inherits the tests from the AvalancheMethod class.
"""
from typing import ClassVar, Type
from .base import AvalancheMethod
from .base_test import _TestAvalancheMethod
from .gem import GEMMethod
class TestGEMMethod(_TestAvalancheMethod):
Method: ClassVar[Type[AvalancheMethod]] = GEMMethod
================================================
FILE: sequoia/methods/avalanche_methods/lwf.py
================================================
""" Method based on LwF from [Avalanche](https://github.com/ContinualAI/avalanche).
See `avalanche.training.plugins.lwf.LwFPlugin` or
`avalanche.training.strategies.strategy_wrappers.LwF` for more info.
"""
from dataclasses import dataclass
from typing import ClassVar, Optional, Sequence, Type, Union
from avalanche.training.plugins.lwf import LwFPlugin as LwFPlugin_
from avalanche.training.strategies import LwF
from simple_parsing.helpers.hparams import uniform
from torch import Tensor
from sequoia.methods import register_method
from sequoia.settings.sl import SLSetting, TaskIncrementalSLSetting
from .base import AvalancheMethod
class LwFPlugin(LwFPlugin_):
"""Patching a little error that happens in the 'LwFPlugin' which happens when a
Multi-Task model is used, and when we grow the output space after each task.
"""
def _distillation_loss(self, out: Tensor, prev_out: Tensor) -> Tensor:
"""
Compute distillation loss between output of the current model and
and output of the previous (saved) model.
"""
# Little "patch" to make sure this doesn't break if the shapes aren't exactly
# the same:
if out.shape != prev_out.shape:
prev_outputs = prev_out.shape[-1]
current_outputs = out.shape[-1]
assert prev_outputs < current_outputs
# Only consider the loss for the overlapping classes. We assume that the
# first columns are for the same class, so this should be fine.
out = out[..., :prev_outputs]
return super()._distillation_loss(out=out, prev_out=prev_out)
@register_method
@dataclass
class LwFMethod(AvalancheMethod[LwF]):
"""Learning without Forgetting strategy from Avalanche.
See LwF plugin for details.
This strategy does not use task identities.
See the parent class `AvalancheMethod` for the other hyper-parameters and methods.
"""
# changing the 'name' in this case here, because the default name would be
# 'lw_f'.
name: ClassVar[str] = "lwf"
# distillation hyperparameter. It can be either a float number or a list containing
# alpha for each experience.
alpha: Union[float, Sequence[float]] = uniform(
1e-2, 1, default=1
) # TODO: Check if the range makes sense.
# softmax temperature for distillation
temperature: float = uniform(1, 10, default=2) # TODO: Check if the range makes sense.
strategy_class: ClassVar[Type[LwF]] = LwF
def create_cl_strategy(self, setting: SLSetting) -> LwF:
strategy = super().create_cl_strategy(setting)
# Find and replace the 'LwFPlugin' with our "patched" version:
plugin_index: Optional[int] = None
for i, plugin in enumerate(strategy.plugins):
if type(plugin) is LwFPlugin_:
plugin_index = i
break
assert plugin_index is not None, "LwF strategy should have an LwF Plugin, no?"
assert isinstance(plugin_index, int)
old_plugin: LwFPlugin_ = strategy.plugins[plugin_index]
new_plugin = LwFPlugin(alpha=old_plugin.alpha, temperature=old_plugin.temperature)
new_plugin.prev_model = old_plugin.prev_model
strategy.plugins[plugin_index] = new_plugin
return strategy
if __name__ == "__main__":
from simple_parsing import ArgumentParser
setting = TaskIncrementalSLSetting(
dataset="mnist", nb_tasks=5, monitor_training_performance=True
)
# Create the Method, either manually or through the command-line:
parser = ArgumentParser(__doc__)
parser.add_arguments(LwFMethod, "method")
args = parser.parse_args()
method: LwFMethod = args.method
results = setting.apply(method)
================================================
FILE: sequoia/methods/avalanche_methods/lwf_test.py
================================================
""" WIP: Tests for the LwF Method.
For now this only inherits the tests from the AvalancheMethod class.
"""
from typing import ClassVar, Type
from .base import AvalancheMethod
from .base_test import _TestAvalancheMethod
from .lwf import LwFMethod
class TestLwFMethod(_TestAvalancheMethod):
Method: ClassVar[Type[AvalancheMethod]] = LwFMethod
================================================
FILE: sequoia/methods/avalanche_methods/naive.py
================================================
""" 'Naive' method from [Avalanche](https://github.com/ContinualAI/avalanche).
See `avalanche.training.strategies.Naive` for more info.
"""
from typing import ClassVar, Type
from avalanche.training.strategies import BaseStrategy, Naive
from sequoia.settings.sl import TaskIncrementalSLSetting
from .base import AvalancheMethod
class NaiveMethod(AvalancheMethod[Naive]):
"""'Naive' Strategy from [Avalanche](https://github.com/ContinualAI/avalanche).
The simplest (and least effective) Continual Learning strategy. Naive just
incrementally fine tunes a single model without employing any method
to contrast the catastrophic forgetting of previous knowledge.
This strategy does not use task identities.
Naive is easy to set up and its results are commonly used to show the worst
performing baseline.
See the parent class `AvalancheMethod` for the other hyper-parameters and methods.
"""
strategy_class: ClassVar[Type[BaseStrategy]] = Naive
if __name__ == "__main__":
setting = TaskIncrementalSLSetting(
dataset="mnist", nb_tasks=5, monitor_training_performance=True
)
method = NaiveMethod()
results = setting.apply(method)
================================================
FILE: sequoia/methods/avalanche_methods/naive_test.py
================================================
""" WIP: Tests for the Naive Method.
For now this only inherits the tests from the AvalancheMethod class.
"""
from typing import ClassVar, Type
from .base import AvalancheMethod
from .base_test import _TestAvalancheMethod
from .naive import NaiveMethod
class TestNaiveMethod(_TestAvalancheMethod):
Method: ClassVar[Type[AvalancheMethod]] = NaiveMethod
================================================
FILE: sequoia/methods/avalanche_methods/patched_models.py
================================================
""" Patch for the multi-task models in Avalanche, so that we can evaluate on future
tasks, by selecting random prediction.
"""
import warnings
from abc import abstractmethod
from typing import Any, List, Optional
import torch
from avalanche.models import MTSimpleCNN as _MTSimpleCNN
from avalanche.models import MTSimpleMLP as _MTSimpleMLP
from avalanche.models import MultiHeadClassifier as _MultiHeadClassifier
from avalanche.models.dynamic_modules import MultiTaskModule
from torch import Tensor
from torch.nn import functional as F
from sequoia.utils import get_logger
logger = get_logger(__name__)
class PatchedMultiTaskModule(MultiTaskModule):
@property
@abstractmethod
def known_task_ids(self) -> List[Any]:
pass
def task_inference_forward_pass(self, x: Tensor) -> Tensor:
"""Forward pass with a simple form of task inference."""
# We don't have access to task labels (`task_labels` is None).
# --> Perform a simple kind of task inference:
# 1. Perform a forward pass with each task's output head;
# 2. Merge these predictions into a single prediction somehow.
# NOTE: This assumes that the observations are batched.
# These are used below to indicate the shape of the different tensors.
B = x.shape[0]
T = len(self.known_task_ids)
# N = self.action_space.n
# Tasks encountered previously and for which we have an output head.
# TODO: This assumes that the keys of the ModuleDict are integers.
known_task_ids: List[int] = list(int(t) for t in self.known_task_ids)
assert known_task_ids
# Placeholder for the predictions from each output head for each item in the
# batch
task_outputs = [None for _ in known_task_ids] # [T, B, N]
# Get the forward pass for each task.
for task_id in known_task_ids:
# Create 'fake' Observations for this forward pass, with 'fake' task labels.
# NOTE: We do this so we can call `self.forward` and not get an infinite
# recursion.
task_labels = torch.full([B], task_id, device=x.device, dtype=int)
# task_observations = replace(observations, task_labels=task_labels)
# Setup the model for task `task_id`, and then do a forward pass.
task_forward_pass = self.forward(x, task_labels=task_labels)
task_outputs[task_id] = task_forward_pass
if len(task_outputs) == 1:
return task_outputs[0]
N = max(task_output.shape[-1] for task_output in task_outputs)
# 'Merge' the predictions from each output head using some kind of task
# inference.
assert all(item is not None for item in task_outputs)
# Stack the predictions (logits) from each output head.
# NOTE: Here in Avalanche it's possible that each output head's output had a
# different shape. Therefore we need to handle it like a list of tensors rather
# than a stacked tensor.
if all(not task_output.shape[-1] == N for task_output in task_outputs):
raise NotImplementedError("TODO: Output heads didn't give outputs of the same shape!")
# logits_from_each_head = task_outputs
# probs_from_each_head = [
# torch.softmax(head_logits, dim=-1) for head_logits in logits_from_each_head
# ]
# IDEA: Add zeros to the outputs of a different shape.
else:
logits_from_each_head = torch.stack(task_outputs, dim=1)
# Normalize the logits from each output head with softmax.
# Example with batch size of 1, output heads = 2, and classes = 4:
# logits from each head: [[[123, 456, 123, 123], [1, 1, 2, 1]]]
# 'probs' from each head: [[[0.1, 0.6, 0.1, 0.1], [0.2, 0.2, 0.4, 0.2]]]
probs_from_each_head = torch.softmax(logits_from_each_head, dim=-1)
assert probs_from_each_head.shape == (B, T, N)
# Simple kind of task inference:
# For each item in the batch, use the class that has the highest probability
# accross all output heads.
max_probs_across_heads, chosen_head_per_class = probs_from_each_head.max(dim=1)
assert max_probs_across_heads.shape == (B, N)
assert chosen_head_per_class.shape == (B, N)
# Example (continued):
# max probs across heads: [[0.2, 0.6, 0.4, 0.2]]
# chosen output heads per class: [[1, 0, 1, 1]]
# Determine which output head has highest "confidence":
max_prob_value, most_probable_class = max_probs_across_heads.max(dim=1)
assert max_prob_value.shape == (B,)
assert most_probable_class.shape == (B,)
# Example (continued):
# max_prob_value: [0.6]
# max_prob_class: [1]
# A bit of boolean trickery to get what we need, which is, for each item, the
# index of the output head that gave the most confident prediction.
mask = F.one_hot(most_probable_class, N).to(dtype=bool, device=x.device)
chosen_output_head_per_item = chosen_head_per_class[mask]
assert mask.shape == (B, N)
assert chosen_output_head_per_item.shape == (B,)
# Example (continued):
# mask: [[False, True, False, True]]
# chosen_output_head_per_item: [0]
# Create a bool tensor to select items associated with the chosen output head.
selected_mask = F.one_hot(chosen_output_head_per_item, T).to(dtype=bool, device=x.device)
assert selected_mask.shape == (B, T)
# Select the logits using the mask:
selected_outputs = logits_from_each_head[selected_mask]
assert selected_outputs.shape == (B, N)
return selected_outputs
from avalanche.benchmarks.utils import AvalancheDataset
class MultiHeadClassifier(_MultiHeadClassifier):
def __init__(self, in_features: int, initial_out_features: int = 2):
"""Multi-head classifier with separate classifiers for each task.
Typically used in task-incremental scenarios where task labels are
available and provided to the model.
:param in_features: number of input features.
:param initial_out_features: initial number of classes (can be
dynamically expanded).
"""
super().__init__(in_features=in_features, initial_out_features=initial_out_features)
def adaptation(self, dataset: AvalancheDataset):
"""If `dataset` contains new tasks, a new head is initialized.
:param dataset: data from the current experience.
:return:
"""
super().adaptation(dataset)
def forward(self, x: Tensor, task_labels: Optional[Tensor]) -> Tensor:
if task_labels is None:
# We don't do task inference in this layer, since it's handled in the
# patched models below.
raise NotImplementedError("Shouldn't get None task labels in the MultiHeadClassifier!")
else:
assert isinstance(task_labels, Tensor)
return super().forward(x, task_labels)
def forward_single_task(self, x: Tensor, task_label: Optional[Tensor]):
"""compute the output given the input `x`. This module uses the task
label to activate the correct head.
:param x:
:param task_label:
:return:
"""
if task_label is not None:
if not isinstance(task_label, int):
task_label = task_label.item()
# TODO: If/when we make the context variable truly continuous, then this
# won't work.
assert task_label is None or isinstance(task_label, int), task_label
if str(task_label) not in self.classifiers:
# TODO: Let's use the most 'recent' output head instead?
known_task_labels = list(self.classifiers.keys())
assert known_task_labels, "Need to have seen at least one task!"
last_known_task = known_task_labels[-1]
task_label = last_known_task
warnings.warn(
RuntimeWarning(
f"performing forward pass on previously unseen task, will pretend "
f"inputs come from task {last_known_task} instead."
)
)
return super().forward_single_task(x, task_label)
class MTSimpleCNN(_MTSimpleCNN, PatchedMultiTaskModule):
def __init__(self):
super().__init__()
self.classifier = MultiHeadClassifier(in_features=64)
def forward(self, x: Tensor, task_labels: Optional[Tensor] = None) -> Tensor:
if task_labels is None:
# NOTE: When training, we could rely on a property like `current_task_id`
# being set within the `on_task_switch` callback.
# The reason for this is that in some of the strategies, `GEM` strategy (and
# others), when training they sometimes don't pass a task index! In the case
# of GEM though, it doesnt pass the task id when calculating the
# reference gradient, so I'm not sure we want to be using this in this case.
if self.training:
warnings.warn(
RuntimeWarning("Using task inference in the forward pass while training?")
)
return self.task_inference_forward_pass(x=x)
return super().forward(x=x, task_labels=task_labels)
@property
def known_task_ids(self) -> List[Any]:
return list(self.classifier.classifiers.keys())
class MTSimpleMLP(_MTSimpleMLP, PatchedMultiTaskModule):
def __init__(self, input_size: int = 28 * 28, hidden_size: int = 512):
"""
Multi-task MLP with multi-head classifier.
"""
super().__init__(input_size=input_size, hidden_size=hidden_size)
self.classifier = MultiHeadClassifier(in_features=hidden_size)
def forward(self, x: Tensor, task_labels: Optional[Tensor] = None) -> Tensor:
if task_labels is None:
if self.training:
warnings.warn(
RuntimeWarning("Using task inference in the forward pass while training?")
)
return self.task_inference_forward_pass(x=x)
return super().forward(x=x, task_labels=task_labels)
@property
def known_task_ids(self) -> List[Any]:
return list(self.classifier.classifiers.keys())
================================================
FILE: sequoia/methods/avalanche_methods/plugins.py
================================================
""" WIP: @lebrice: Plugins that I was using while trying to get the BaseStrategy and
plugins from Avalanche to work directly with the Sequoia environments.
"""
from typing import List
import numpy as np
import torch
from avalanche.training.plugins import StrategyPlugin
from avalanche.training.strategies import BaseStrategy
from torch import Tensor
from torch.utils.data import TensorDataset
class GatherDataset(StrategyPlugin):
"""IDEA: A Plugin that accumulates the tensors from the env to create a "proper"
Dataset to be used by the plugins.
"""
def __init__(self):
self.train_xs: List[Tensor] = []
self.train_ys: List[Tensor] = []
self.train_ts: List[Tensor] = []
self.train_dataset: TensorDataset
self.train_datasets: List[TensorDataset] = []
self.eval_xs: List[Tensor] = []
self.eval_ys: List[Tensor] = []
self.eval_ts: List[Tensor] = []
self.eval_dataset: TensorDataset
self.eval_datasets: List[TensorDataset] = []
def after_forward(self, strategy, **kwargs):
x, y, t = strategy.mb_x, strategy.mb_task_id, strategy.mb_y
self.train_xs.append(x)
self.train_ys.append(y)
self.train_ts.append(t)
return super().after_forward(strategy, **kwargs)
def after_training_epoch(self, strategy, **kwargs):
self.train_dataset = TensorDataset(
torch.cat(self.train_xs), torch.cat(self.train_ys), torch.cat(self.train_ts)
)
self.train_xs.clear()
self.train_ys.clear()
self.train_ts.clear()
return super().after_training_epoch(strategy, **kwargs)
def after_eval_forward(self, strategy, **kwargs):
x, y, t = strategy.mb_x, strategy.mb_task_id, strategy.mb_y
self.eval_xs.append(x)
self.eval_ys.append(y)
self.eval_ts.append(t)
return super().after_eval_forward(strategy, **kwargs)
def after_eval_exp(self, strategy, **kwargs):
self.eval_dataset = TensorDataset(
torch.cat(self.eval_xs), torch.cat(self.eval_ys), torch.cat(self.eval_ts)
)
self.eval_xs.clear()
self.eval_ys.clear()
self.eval_ts.clear()
if strategy.setting:
strategy.experience.dataset = self.eval_dataset
self.eval_datasets.append(self.eval_dataset)
return super().after_eval_exp(strategy, **kwargs)
def train(self):
return self.train_dataset
def eval(self):
return self.eval_dataset
def after_training_exp(self, strategy: "BaseStrategy", **kwargs):
"""
Compute importances of parameters after each experience.
"""
if strategy.setting:
strategy.experience.dataset = self.train_dataset
self.train_datasets.append(self.train_dataset)
return super().after_training_exp(strategy, **kwargs)
# def after_eval_exp(self, strategy: "BaseStrategy", **kwargs):
# """
# Compute importances of parameters after each experience.
# """
# return super().after_eval_exp(strategy, **kwargs)
class OnlineAccuracyPlugin(StrategyPlugin):
def __init__(self):
self.current_task_accuracies: List[float] = []
self.all_task_accuracies: List[List[float]] = []
self.enabled: bool = True
def _calc_accuracy(self, strategy: "BaseStrategy") -> float:
y_pred = strategy.logits.argmax(-1)
y = strategy.mb_y
acc = ((y_pred == y).sum() / len(y_pred)).item()
return acc
def after_forward(self, strategy: "BaseStrategy", **kwargs):
if not self.enabled:
return
acc = self._calc_accuracy(strategy)
self.current_task_accuracies.append(acc)
return super().after_forward(strategy, **kwargs)
def after_training_epoch(self, strategy, **kwargs):
# Turn off at the end of the first epoch.
self.all_task_accuracies.append(np.mean(self.current_task_accuracies))
self.current_task_accuracies.clear()
self.enabled = False
return super().after_training_epoch(strategy, **kwargs)
================================================
FILE: sequoia/methods/avalanche_methods/replay.py
================================================
""" Method based on Replay from [Avalanche](https://github.com/ContinualAI/avalanche).
See `avalanche.training.plugins.replay.ReplayPlugin` or
`avalanche.training.strategies.strategy_wrappers.Replay` for more info.
"""
import warnings
from dataclasses import dataclass
from typing import ClassVar, Optional, Type
from avalanche.training.plugins.replay import (
ExperienceBalancedStoragePolicy as ExperienceBalancedStoragePolicy_,
)
from avalanche.training.plugins.replay import ReplayPlugin as ReplayPlugin_
from avalanche.training.plugins.replay import StoragePolicy
from avalanche.training.strategies import BaseStrategy, Replay
from simple_parsing.helpers.hparams import uniform
from sequoia.methods import register_method
from sequoia.settings.sl import SLSetting, TaskIncrementalSLSetting
from .base import AvalancheMethod
class ReplayPlugin(ReplayPlugin_):
def __init__(self, mem_size: int = 200, storage_policy: Optional["StoragePolicy"] = None):
super().__init__(mem_size=mem_size, storage_policy=storage_policy)
# "patch" the ExperienceBalanchedStoragePolicy:
if type(self.storage_policy) is ExperienceBalancedStoragePolicy_:
self.storage_policy = ExperienceBalancedStoragePolicy(
ext_mem=self.storage_policy.ext_mem,
mem_size=self.storage_policy.mem_size,
adaptive_size=self.storage_policy.adaptive_size,
num_experiences=self.storage_policy.num_experiences,
)
class ExperienceBalancedStoragePolicy(ExperienceBalancedStoragePolicy_):
def __call__(self, strategy: BaseStrategy, **kwargs):
num_exps = strategy.training_exp_counter + 1
num_exps = num_exps if self.adaptive_size else self.num_experiences
curr_data = strategy.experience.dataset
# new group may be bigger because of the remainder.
group_size = self.mem_size // num_exps
new_group_size = group_size + (self.mem_size % num_exps)
self.subsample_all_groups(group_size * (num_exps - 1))
curr_data = self.subsample_single(curr_data, new_group_size)
self.ext_mem[strategy.training_exp_counter + 1] = curr_data
# buffer size should always equal self.mem_size
len_tot = sum(len(el) for el in self.ext_mem.values())
# TODO: Just disabling the failing assert check for now. Should check if this
# makes any difference in the performance of the plugin:
# assert len_tot == self.mem_size
warnings.warn(
RuntimeWarning(
f"Ignoring a failing assert in Avalanche's Replay plugin: "
f"len_tot ({len_tot}) != self.mem_size ({self.mem_size})"
)
)
# NOTE: Could also avoid copying the code from their method here by suppressing
# AssertionErrors:
# import contextlib
# with contextlib.suppress(AssertionError):
# return super().__call__(strategy=strategy, **kwargs)
@register_method
@dataclass
class ReplayMethod(AvalancheMethod[Replay]):
"""Replay strategy from Avalanche.
See Replay plugin for details.
This strategy does not use task identities.
See the parent class `AvalancheMethod` for the other hyper-parameters and methods.
"""
# Replay buffer size.
mem_size: int = uniform(100, 2_000, default=200)
strategy_class: ClassVar[Type[BaseStrategy]] = Replay
def create_cl_strategy(self, setting: SLSetting) -> Replay:
strategy = super().create_cl_strategy(setting)
# Find and replace the original plugin with our "patched" version:
plugin_index: Optional[int] = None
for i, plugin in enumerate(strategy.plugins):
if type(plugin) is ReplayPlugin_:
plugin_index = i
break
assert plugin_index is not None, "strategy should have the Plugin, no?"
assert isinstance(plugin_index, int)
old_plugin: ReplayPlugin_ = strategy.plugins[plugin_index]
new_plugin = ReplayPlugin(
mem_size=old_plugin.mem_size,
storage_policy=old_plugin.storage_policy,
)
strategy.plugins[plugin_index] = new_plugin
return strategy
if __name__ == "__main__":
from simple_parsing import ArgumentParser
setting = TaskIncrementalSLSetting(
dataset="mnist", nb_tasks=5, monitor_training_performance=True
)
# Create the Method, either manually or through the command-line:
parser = ArgumentParser(__doc__)
parser.add_arguments(ReplayMethod, "method")
args = parser.parse_args()
method: ReplayMethod = args.method
results = setting.apply(method)
================================================
FILE: sequoia/methods/avalanche_methods/replay_test.py
================================================
""" WIP: Tests for the Replay Method.
For now this only inherits the tests from the AvalancheMethod class.
"""
from typing import ClassVar, Type
from .base import AvalancheMethod
from .base_test import _TestAvalancheMethod
from .replay import ReplayMethod
class TestReplayMethod(_TestAvalancheMethod):
Method: ClassVar[Type[AvalancheMethod]] = ReplayMethod
================================================
FILE: sequoia/methods/avalanche_methods/synaptic_intelligence.py
================================================
""" Method based on SynapticIntelligence from [Avalanche](https://github.com/ContinualAI/avalanche).
See `avalanche.training.plugins.synaptic_intelligence.SynapticIntelligencePlugin` or
`avalanche.training.strategies.strategy_wrappers.SynapticIntelligence` for more info.
"""
from dataclasses import dataclass
from typing import ClassVar, Optional, Set, Type
import numpy as np
import torch
from avalanche.training.plugins.synaptic_intelligence import EwcDataType, ParamDict
from avalanche.training.plugins.synaptic_intelligence import (
SynapticIntelligencePlugin as SynapticIntelligencePlugin_,
)
from avalanche.training.plugins.synaptic_intelligence import SynDataType
from avalanche.training.strategies import BaseStrategy, SynapticIntelligence
from simple_parsing import ArgumentParser
from simple_parsing.helpers.hparams import uniform
from torch import Tensor
from torch.nn import Module
from sequoia.methods import register_method
from sequoia.settings.sl import SLSetting, TaskIncrementalSLSetting
from .base import AvalancheMethod
class SynapticIntelligencePlugin(SynapticIntelligencePlugin_):
# TODO: Why do they have everything as a static method rather than as a classmethod?
# Makes it almost impossible to extend this SynapticIntelligencePlugin!
@staticmethod
@torch.no_grad()
def extract_weights(model: Module, target: ParamDict, excluded_parameters: Set[str]):
params = SynapticIntelligencePlugin_.allowed_parameters(model, excluded_parameters)
# Getting this error:
# RuntimeError: The expanded size of the tensor (128) must match the existing
# size (256) at non-singleton dimension 0. Target sizes: [128].
# Tensor sizes: [256]
# TODO: @lebrice For now I'll just replace the entries in that 'target' dict if
# the shapes don't match, and hope it still works.
for name, param in params:
# target[name][...] = param.detach().cpu().flatten()
if param.shape == target[name].shape:
target[name][...] = param.detach().cpu().flatten()
else:
# Replace the entries with a different shape, rather than replacing their data
# as done above?
target[name].data = param.detach().cpu().flatten()
@staticmethod
@torch.no_grad()
def extract_grad(model, target: ParamDict, excluded_parameters: Set[str]):
params = SynapticIntelligencePlugin_.allowed_parameters(model, excluded_parameters)
# Store the gradients into target
for name, param in params:
# BUG: Getting AttributeError: 'NoneType' object has no attribute 'detach'
if param.grad is not None:
target[name][...] = param.grad.detach().cpu().flatten()
@staticmethod
def compute_ewc_loss(
model, ewc_data: EwcDataType, excluded_parameters: Set[str], device, lambd=0.0
):
params = SynapticIntelligencePlugin_.allowed_parameters(model, excluded_parameters)
loss = None
for name, param in params:
weights = param.to(device).flatten() # Flat, not detached
param_ewc_data_0 = ewc_data[0][name].to(device) # Flat, detached
param_ewc_data_1 = ewc_data[1][name].to(device) # Flat, detached
# BUG: Getting RuntimeError: inconsistent tensor size, expected tensor [128]
# and src [256] to have the same number of elements, but got 128 and 256
# elements respectively
if param_ewc_data_1.shape == param_ewc_data_0.shape == weights.shape:
syn_loss: Tensor = torch.dot(
param_ewc_data_1, (weights - param_ewc_data_0) ** 2
) * (lambd / 2)
else:
# FIXME: For now, I'll just consider the 'common' elements?
param_0_cols = param_ewc_data_0.shape[-1]
param_1_cols = param_ewc_data_1.shape[-1]
# Weird: why does param_0 have *more* columns than param_1?
assert param_0_cols > param_1_cols
# Assuming that the first indices are the common weights between tasks:
param_ewc_data_0 = param_ewc_data_0[..., :param_1_cols]
weights = weights[..., :param_1_cols]
syn_loss: Tensor = torch.dot(
param_ewc_data_1, (weights - param_ewc_data_0) ** 2
) * (lambd / 2)
if loss is None:
loss = syn_loss
else:
loss += syn_loss
return loss
@staticmethod
@torch.no_grad()
def post_update(model, syn_data: SynDataType, excluded_parameters: Set[str]):
SynapticIntelligencePlugin_.extract_weights(
model, syn_data["new_theta"], excluded_parameters
)
SynapticIntelligencePlugin_.extract_grad(model, syn_data["grad"], excluded_parameters)
for param_name in syn_data["trajectory"]:
# BUG: Getting RuntimeError: The size of tensor a (128) must match the size
# of tensor b (256) at non-singleton dimension 0
# syn_data['trajectory'][param_name] += \
# syn_data['grad'][param_name] * (
# syn_data['new_theta'][param_name] -
# syn_data['old_theta'][param_name])
destination: Tensor = syn_data["trajectory"][param_name]
grad: Tensor = syn_data["grad"][param_name]
new_theta: Tensor = syn_data["new_theta"][param_name]
old_theta: Tensor = syn_data["old_theta"][param_name]
if not (destination.shape == grad.shape == new_theta.shape == old_theta.shape):
destination_cols = destination.shape[-1]
grad_cols = grad.shape[-1]
new_theta_cols = new_theta.shape[-1]
old_theta_cols = old_theta.shape[-1]
assert grad_cols < new_theta_cols and new_theta_cols == old_theta_cols
# FIXME: @lebrice Chop the last two? or extend the grad? Extending the
# grad with zeros for now (no idea what that implies though!)
grad_extension = grad.new_zeros(size=[*grad.shape[:-1], new_theta_cols - grad_cols])
grad = torch.cat([grad, grad_extension], -1)
destination_extension = destination.new_zeros(
size=[*destination.shape[:-1], new_theta_cols - destination_cols]
)
destination = torch.cat([destination, destination_extension], -1)
assert destination.shape == grad.shape == new_theta.shape == old_theta.shape
destination += grad * (new_theta - old_theta)
# Replace the entry (in case we replaced the `destination` variable above).
syn_data["trajectory"][param_name] = destination
@staticmethod
@torch.no_grad()
def update_ewc_data(
net,
ewc_data: EwcDataType,
syn_data: SynDataType,
clip_to: float,
excluded_parameters: Set[str],
c=0.0015,
):
SynapticIntelligencePlugin.extract_weights(net, syn_data["new_theta"], excluded_parameters)
eps = 0.0000001 # 0.001 in few task - 0.1 used in a more complex setup
for param_name in syn_data["cum_trajectory"]:
# BUG: Getting RuntimeError: The size of tensor a (128) must match the size
# of tensor b (256) at non-singleton dimension 0
# syn_data['cum_trajectory'][param_name] += \
# c * syn_data['trajectory'][param_name] / (
# np.square(syn_data['new_theta'][param_name] -
# ewc_data[0][param_name]) + eps)
cum_trajectory = syn_data["cum_trajectory"][param_name]
trajectory = syn_data["trajectory"][param_name]
new_theta = syn_data["new_theta"][param_name]
ewc_data_0 = ewc_data[0][param_name]
if not (
cum_trajectory.shape == trajectory.shape == new_theta.shape == ewc_data_0.shape
):
cum_trajectory_cols = cum_trajectory.shape[-1]
trajectory_cols = trajectory.shape[-1]
new_theta_cols = new_theta.shape[-1]
ewc_data_0_cols = ewc_data_0.shape[-1]
assert cum_trajectory_cols < trajectory_cols == new_theta_cols == ewc_data_0_cols
# FIXME: @lebrice Extending the cum_trajectory with zeros for now (no
# idea what that implies though!)
cum_trajectory_extension = cum_trajectory.new_zeros(
size=[
*cum_trajectory.shape[:-1],
trajectory_cols - cum_trajectory_cols,
]
)
cum_trajectory = torch.cat([cum_trajectory, cum_trajectory_extension], -1)
cum_trajectory += c * trajectory / (np.square(new_theta - ewc_data_0) + eps)
# Reset the cum_trajectory variable in the dict, just in case we replaced
# the variable above.
syn_data["cum_trajectory"][param_name] = cum_trajectory
for param_name in syn_data["cum_trajectory"]:
ewc_data[1][param_name] = torch.empty_like(
syn_data["cum_trajectory"][param_name]
).copy_(-syn_data["cum_trajectory"][param_name])
# change sign here because the Ewc regularization
# in Caffe (theta - thetaold) is inverted w.r.t. syn equation [4]
# (thetaold - theta)
for param_name in ewc_data[1]:
ewc_data[1][param_name] = torch.clamp(ewc_data[1][param_name], max=clip_to)
ewc_data[0][param_name] = syn_data["new_theta"][param_name].clone()
# TODO: Why do they have everything as a static method rather than as a classmethod?
# Makes it almost impossible to extend this SynapticIntelligencePlugin!
SynapticIntelligencePlugin_.extract_weights = SynapticIntelligencePlugin.extract_weights
SynapticIntelligencePlugin_.extract_grad = SynapticIntelligencePlugin.extract_grad
SynapticIntelligencePlugin_.compute_ewc_loss = SynapticIntelligencePlugin.compute_ewc_loss
SynapticIntelligencePlugin_.post_update = SynapticIntelligencePlugin.post_update
SynapticIntelligencePlugin_.update_ewc_data = SynapticIntelligencePlugin.update_ewc_data
@register_method
@dataclass
class SynapticIntelligenceMethod(AvalancheMethod[SynapticIntelligence]):
"""The Synaptic Intelligence strategy from Avalanche.
This is the Synaptic Intelligence PyTorch implementation of the
algorithm described in the paper
"Continuous Learning in Single-Incremental-Task Scenarios"
(https://arxiv.org/abs/1806.08568)
The original implementation has been proposed in the paper
"Continual Learning Through Synaptic Intelligence"
(https://arxiv.org/abs/1703.04200).
The Synaptic Intelligence regularization can also be used in a different
strategy by applying the :class:`SynapticIntelligencePlugin` plugin.
See the parent class `AvalancheMethod` for the other hyper-parameters and methods.
"""
# Synaptic Intelligence lambda term.
si_lambda: float = uniform(1e-2, 1.0, default=0.5) # TODO: Check the range.
strategy_class: ClassVar[Type[BaseStrategy]] = SynapticIntelligence
def create_cl_strategy(self, setting: SLSetting) -> SynapticIntelligence:
strategy = super().create_cl_strategy(setting)
# Find and replace the original plugin with our "patched" version:
plugin_index: Optional[int] = None
for i, plugin in enumerate(strategy.plugins):
if type(plugin) is SynapticIntelligencePlugin_:
plugin_index = i
break
assert plugin_index is not None, "strategy should have the Plugin, no?"
assert isinstance(plugin_index, int)
old_plugin: SynapticIntelligencePlugin_ = strategy.plugins[plugin_index]
new_plugin = SynapticIntelligencePlugin(
si_lambda=old_plugin.si_lambda,
excluded_parameters=old_plugin.excluded_parameters,
# device=old_plugin.device,
)
new_plugin.ewc_data = old_plugin.ewc_data
new_plugin.syn_data = old_plugin.syn_data
new_plugin._device = old_plugin._device
strategy.plugins[plugin_index] = new_plugin
return strategy
if __name__ == "__main__":
setting = TaskIncrementalSLSetting(
dataset="mnist", nb_tasks=5, monitor_training_performance=True
)
# Create the Method, either manually or through the command-line:
parser = ArgumentParser(__doc__)
parser.add_arguments(SynapticIntelligenceMethod, "method")
args = parser.parse_args()
method: SynapticIntelligenceMethod = args.method
results = setting.apply(method)
================================================
FILE: sequoia/methods/avalanche_methods/synaptic_intelligence_test.py
================================================
""" WIP: Tests for the SynapticIntelligence Method.
For now this only inherits the tests from the AvalancheMethod class.
"""
from typing import ClassVar, Type
from .base import AvalancheMethod
from .base_test import _TestAvalancheMethod
from .synaptic_intelligence import SynapticIntelligenceMethod
class TestSynapticIntelligenceMethod(_TestAvalancheMethod):
Method: ClassVar[Type[AvalancheMethod]] = SynapticIntelligenceMethod
================================================
FILE: sequoia/methods/base_method.py
================================================
""" Defines a Method, which is a "solution" for a given "problem" (a Setting).
The Method could be whatever you want, really. For the 'baselines' we have here,
we use pytorch-lightning, and a few little utility classes such as `Metrics` and
`Loss`, which are basically just like dicts/objects, with some cool other
methods.
TODO: Add a wrapper to limit the 'epoch' length in RL, and then use an early-stopping
callback to also perform validation like in SL.
"""
import warnings
from dataclasses import dataclass, fields, is_dataclass
from pathlib import Path
from typing import Any, Callable, Dict, List, Mapping, Optional, Tuple, Type, Union
import gym
import torch
from pytorch_lightning import Callback, Trainer
from pytorch_lightning.callbacks.early_stopping import EarlyStopping
from simple_parsing import mutable_field
from wandb.wandb_run import Run
from sequoia.common import Config
from sequoia.common.spaces import Image
from sequoia.methods import register_method
from sequoia.settings import RLSetting, SLSetting
from sequoia.settings.assumptions.incremental import IncrementalAssumption
from sequoia.settings.base import Method
from sequoia.settings.base.environment import Environment
from sequoia.settings.base.objects import Actions, Observations, Rewards
from sequoia.settings.base.results import Results
from sequoia.settings.base.setting import Setting, SettingType
from sequoia.settings.rl.continual import ContinualRLSetting
from sequoia.utils.logging_utils import get_logger
from sequoia.utils.parseable import Parseable
from sequoia.utils.serialization import Serializable
from sequoia.utils.utils import compute_identity
from .models import BaseModel
from .trainer import Trainer, TrainerConfig
logger = get_logger(__name__)
# TODO: Set the target setting back to Setting once we fix the PL + RL issues.
@register_method
@dataclass
class BaseMethod(Method, Serializable, Parseable, target_setting=SLSetting):
"""Versatile Base method which targets all settings.
Uses pytorch-lightning's Trainer for training and LightningModule as model.
Uses a [BaseModel](methods/models/base_model/base_model.py), which
can be used for:
- Self-Supervised training with modular auxiliary tasks;
- Semi-Supervised training on partially labeled batches;
- Multi-Head prediction (e.g. in task-incremental scenario);
"""
# NOTE: these two fields are also used to create the command-line arguments.
# HyperParameters of the method.
hparams: BaseModel.HParams = mutable_field(BaseModel.HParams)
# Configuration options.
config: Config = mutable_field(Config)
# Options for the Trainer object.
trainer_options: TrainerConfig = mutable_field(TrainerConfig)
def __init__(
self,
hparams: BaseModel.HParams = None,
config: Config = None,
trainer_options: TrainerConfig = None,
**kwargs,
):
"""Creates a new BaseMethod, using the provided configuration options.
Parameters
----------
hparams : BaseModel.HParams, optional
Hyper-parameters of the BaseModel used by this Method. Defaults to None.
config : Config, optional
Configuration dataclass with options like log_dir, device, etc. Defaults to
None.
trainer_options : TrainerConfig, optional
Dataclass which holds all the options for creating the `pl.Trainer` which
will be used for training. Defaults to None.
**kwargs :
If any of the above arguments are left as `None`, then they will be created
using any appropriate value from `kwargs`, if present.
## Examples:
```
method = BaseMethod(hparams=BaseModel.HParams(learning_rate=0.01))
method = BaseMethod(learning_rate=0.01) # Same as above
method = BaseMethod(config=Config(debug=True))
method = BaseMethod(debug=True) # Same as above
method = BaseMethod(hparams=BaseModel.HParams(learning_rate=0.01),
config=Config(debug=True))
method = BaseMethod(learning_rate=0.01, debug=True) # Same as above
```
"""
# TODO: When creating a Method from a script, like `BaseMethod()`,
# should we expect the hparams to be passed? Should we create them from
# the **kwargs? Should we parse them from the command-line?
# Get the type of hparams to use from the field's type annotation.
hparam_field = [f for f in fields(self) if f.name == "hparams"][0]
hparam_type = hparam_field.type
# Option 2: Try to use the keyword arguments to create the hparams,
# config and trainer options.
if kwargs:
logger.info(
f"using keyword arguments {kwargs} to populate the corresponding "
f"values in the hparams, config and trainer_options."
)
self.hparams = hparams or hparam_type.from_dict(kwargs, drop_extra_fields=True)
self.config = config or Config.from_dict(kwargs, drop_extra_fields=True)
self.trainer_options = trainer_options or TrainerConfig.from_dict(
kwargs, drop_extra_fields=True
)
elif self._argv:
# Since the method was parsed from the command-line, parse those as
# well from the argv that were used to create the Method.
# Option 3: Parse them from the command-line.
# assert not kwargs, "Don't pass any extra kwargs to the constructor!"
self.hparams = hparams or hparam_type.from_args(self._argv, strict=False)
self.config = config or Config.from_args(self._argv, strict=False)
self.trainer_options = trainer_options or TrainerConfig.from_args(
self._argv, strict=False
)
else:
# Option 1: Use the default values:
self.hparams = hparams or hparam_type()
self.config = config or Config()
self.trainer_options = trainer_options or TrainerConfig()
assert self.hparams
assert self.config
assert self.trainer_options
if self.config.debug:
# Disable wandb logging if debug is True.
self.trainer_options.no_wandb = True
# The model and Trainer objects will be created in `self.configure`.
# NOTE: This right here doesn't create the fields, it just gives some
# type information for static type checking.
self.trainer: Trainer
self.model: BaseModel
self.additional_train_wrappers: List[Callable] = []
self.additional_valid_wrappers: List[Callable] = []
self.setting: Setting
def configure(self, setting: SettingType) -> None:
"""Configures the method for the given Setting.
Concretely, this creates the model and Trainer objects which will be
used to train and test a model for the given `setting`.
Args:
setting (SettingType): The setting the method will be evaluated on.
"""
# Note: this here is temporary, just tinkering with wandb atm.
method_name: str = self.get_name()
# Set the default batch size to use, depending on the kind of Setting.
if self.hparams.batch_size is None:
if isinstance(setting, RLSetting):
# Default batch size of 1 in RL
self.hparams.batch_size = 1
elif isinstance(setting, SLSetting):
self.hparams.batch_size = 32
else:
warnings.warn(
UserWarning(
f"Dont know what batch size to use by default for setting "
f"{setting}, will try 16."
)
)
self.hparams.batch_size = 16
# Set the batch size on the setting.
setting.batch_size = self.hparams.batch_size
# TODO: Should we set the 'config' on the setting from here?
if setting.config and setting.config == self.config:
pass
elif self.config != Config():
assert (
setting.config is None or setting.config == Config()
), "method.config has been modified, and so has setting.config!"
setting.config = self.config
elif setting.config:
assert setting.config != Config(), "Weird, both configs have default values.."
self.config = setting.config
setting_name: str = setting.get_name()
dataset = setting.dataset
if isinstance(setting, IncrementalAssumption):
if self.hparams.multihead is None:
# Use a multi-head model by default if the task labels are
# available at training time and has more than one task.
if setting.task_labels_at_test_time:
assert setting.task_labels_at_train_time
self.hparams.multihead = setting.nb_tasks > 1
if not setting.known_task_boundaries_at_train_time:
# If we won't have access to the task boundaries, so we can only do one
# epoch.
self.trainer_options.max_epochs = 1
if isinstance(setting, ContinualRLSetting):
setting.add_done_to_observations = True
setting.prefer_tensors = True
if isinstance(setting.observation_space.x, Image):
if self.hparams.encoder is None:
self.hparams.encoder = "simple_convnet"
# TODO: Add 'proper' transforms for cartpole, specifically?
from sequoia.common.transforms import Transforms
transforms = [
Transforms.three_channels,
Transforms.to_tensor,
Transforms.resize_64x64,
]
setting.transforms = transforms
setting.train_transforms = transforms
setting.val_transforms = transforms
setting.test_transforms = transforms
# Configure the baseline specifically for an RL setting.
# TODO: Select which output head to use from the command-line?
# Limit the number of epochs so we never iterate on a closed env.
# TODO: Would multiple "epochs" be possible?
if setting.train_max_steps is not None:
self.trainer_options.max_epochs = 1
self.trainer_options.limit_train_batches = setting.train_max_steps // (
setting.batch_size or 1
)
self.trainer_options.limit_val_batches = min(
setting.train_max_steps // (setting.batch_size or 1), 1000
)
# TODO: Test batch size is limited to 1 for now.
# NOTE: This isn't used, since we don't call `trainer.test()`.
self.trainer_options.limit_test_batches = setting.train_max_steps
# TODO: Debug the multi-GPU setup with DP accelerator and pytorch lightning.
self.model = self.create_model(setting).to(self.config.device)
# The PolicyHead actually does its own backward pass, so we disable
# automatic optimization when using it.
from .models.output_heads import PolicyHead
if isinstance(self.model.output_head, PolicyHead):
# Doing the backward pass manually, since there might not be a loss
# at each step.
self.trainer_options.automatic_optimization = False
self.trainer = self.create_trainer(setting)
self.setting = setting
def fit(
self,
train_env: Environment[Observations, Actions, Rewards],
valid_env: Environment[Observations, Actions, Rewards],
):
"""Called by the Setting to train the method.
Could be called more than once before training is 'over', for instance
when training on a series of tasks.
Overwrite this to customize training.
"""
assert self.model is not None, (
"Setting should have been called method.configure(setting=self) "
"before calling `fit`!"
)
# TODO: Figure out if there is a smarter way to reset the state of the Trainer,
# rather than just creating a new one every time.
self.trainer = self.create_trainer(self.setting)
# NOTE: It doesn't seem sufficient to just do this, since for instance the
# early-stopping callback would prevent training on future tasks, since they
# have higher validation loss:
# self.trainer.current_epoch = 0
success = self.trainer.fit(
model=self.model,
train_dataloader=train_env,
val_dataloaders=valid_env,
)
# BUG: After `fit`, it seems like the output head of the model is on the CPU?
self.model.to(self.config.device)
return success
def get_actions(self, observations: Observations, action_space: gym.Space) -> Actions:
"""Get a batch of predictions (actions) for a batch of observations.
This gets called by the Setting during the test loop.
TODO: There is a mismatch here between the type of the output of this
method (`Actions`) and the type of `action_space`: we should either have
a `Discrete` action space, and this method should return ints, or this
method should return `Actions`, and the `action_space` should be a
`TypedDictSpace` or something similar.
Either way, `get_actions(obs, action_space) in action_space` should
always be `True`.
"""
self.model.eval()
with torch.no_grad():
forward_pass = self.model.forward(observations)
actions: Actions = forward_pass.actions
action_numpy = actions.actions_np
assert action_numpy in action_space, (action_numpy, action_space)
return actions
def create_model(self, setting: SettingType) -> BaseModel[SettingType]:
"""Creates the BaseModel (a LightningModule) for the given Setting.
You could extend this to customize which model is used depending on the
setting.
TODO: As @oleksost pointed out, this might allow the creation of weird
'frankenstein' methods that are super-specific to each setting, without
really having anything in common.
Args:
setting (SettingType): An experimental setting.
Returns:
BaseModel[SettingType]: The BaseModel that is to be applied
to that setting.
"""
# Create the model, passing the setting, hparams and config.
return BaseModel(setting=setting, hparams=self.hparams, config=self.config)
def create_trainer(self, setting: SettingType) -> Trainer:
"""Creates a Trainer object from pytorch-lightning for the given setting.
NOTE: At the moment, uses the KNN and VAE callbacks.
To use different callbacks, overwrite this method.
Args:
Returns:
Trainer: the Trainer object.
"""
# We use this here to create loggers!
# No need to use this, we can use
callbacks = self.configure_callbacks(setting)
loggers = []
if setting.wandb and setting.wandb.project:
wandb_logger = setting.wandb.make_logger()
loggers.append(wandb_logger)
trainer = self.trainer_options.make_trainer(
config=self.config,
callbacks=callbacks,
loggers=loggers,
)
return trainer
def get_experiment_name(self, setting: Setting, experiment_id: str = None) -> str:
"""Gets a unique name for the experiment where `self` is applied to `setting`.
This experiment name will be passed to `orion` when performing a run of
Hyper-Parameter Optimization.
Parameters
----------
- setting : Setting
The `Setting` onto which this method will be applied. This method will be used when
- experiment_id: str, optional
A custom hash to append to the experiment name. When `None` (default), a
unique hash will be created based on the values of the Setting's fields.
Returns
-------
str
The name for the experiment.
"""
if not experiment_id:
setting_dict = setting.to_dict()
# BUG: Some settings have non-string keys/value or something?
from sequoia.utils.utils import flatten_dict
d = flatten_dict(setting_dict)
experiment_id = compute_identity(size=5, **d)
assert isinstance(setting.dataset, str), "assuming that dataset is a str for now."
return f"{self.get_name()}-{setting.get_name()}_{setting.dataset}_{experiment_id}"
def get_search_space(self, setting: Setting) -> Mapping[str, Union[str, Dict]]:
"""Returns the search space to use for HPO in the given Setting.
Parameters
----------
setting : Setting
The Setting on which the run of HPO will take place.
Returns
-------
Mapping[str, Union[str, Dict]]
An orion-formatted search space dictionary, mapping from hyper-parameter
names (str) to their priors (str), or to nested dicts of the same form.
"""
return {
"hparams": self.hparams.get_orion_space(),
"trainer_options": self.trainer_options.get_orion_space(),
}
def adapt_to_new_hparams(self, new_hparams: Dict[str, Any]) -> None:
"""Adapts the Method when it receives new Hyper-Parameters to try for a new run.
It is required that this method be implemented if you want to perform HPO sweeps
with Orion.
Parameters
----------
new_hparams : Dict[str, Any]
The new hyper-parameters being recommended by the HPO algorithm. These will
have the same structure as the search space.
"""
# Here we overwrite the corresponding attributes with the new suggested values
# leaving other fields unchanged.
self.hparams = self.hparams.replace(**new_hparams["hparams"])
# BUG with the `replace` function and Union[int, float] type, it doesn't
# preserve the type of the field when serializing/deserializing!
self.trainer_options.max_epochs = new_hparams["trainer_options"]["max_epochs"]
def hparam_sweep(
self,
setting: Setting,
search_space: Dict[str, Union[str, Dict]] = None,
experiment_id: str = None,
database_path: Union[str, Path] = None,
max_runs: int = None,
hpo_algorithm: Union[str, Dict] = "BayesianOptimizer",
debug: bool = False,
) -> Tuple[BaseModel.HParams, float]:
# Setting max epochs to 1, just to keep runs somewhat short.
# NOTE: Now we're actually going to have the max_epochs as a tunable
# hyper-parameter, so we're not hard-setting this value anymore.
# self.trainer_options.max_epochs = 1
# Call 'configure', so that we create `self.model` at least once, which will
# update the hparams.output_head field to be of the right type. This is
# necessary in order for the `get_orion_space` to retrieve all the hparams
# of the output head.
self.configure(setting)
return super().hparam_sweep(
setting=setting,
search_space=search_space,
experiment_id=experiment_id,
database_path=database_path,
max_runs=max_runs,
debug=debug or self.config.debug,
hpo_algorithm=hpo_algorithm,
)
def receive_results(self, setting: Setting, results: Results):
"""Receives the results of an experiment, where `self` was applied to Setting
`setting`, which produced results `results`.
"""
super().receive_results(setting, results=results)
def configure_callbacks(self, setting: SettingType = None) -> List[Callback]:
"""Create the PytorchLightning Callbacks for this Setting.
These callbacks will get added to the Trainer in `create_trainer`.
Parameters
----------
setting : SettingType
The `Setting` on which this Method is going to be applied.
Returns
-------
List[Callback]
A List of `Callaback` objects to use during training.
"""
setting = setting or self.setting
# TODO: Move this to something like a `configure_callbacks` method in the model,
# once PL adds it.
# from sequoia.common.callbacks.vae_callback import SaveVaeSamplesCallback
return [
EarlyStopping(monitor="val/loss"),
# self.hparams.knn_callback,
# SaveVaeSamplesCallback(),
]
def apply_all(self, argv: Union[str, List[str]] = None) -> Dict[Type[Setting], Results]:
"""(WIP): Runs this Method on all its applicable settings.
Returns
-------
Dict mapping from setting type to the Results produced by this method.
"""
applicable_settings = self.get_applicable_settings()
all_results: Dict[Type[Setting], Results] = {}
for setting_type in applicable_settings:
setting = setting_type.from_args(argv)
results = setting.apply(self)
all_results[setting_type] = results
print(f"All results for method of type {type(self)}:")
print(
{
method.get_name(): (results.get_metric() if results else "crashed")
for method, results in all_results.items()
}
)
return all_results
def __init_subclass__(cls, target_setting: Type[SettingType] = Setting, **kwargs) -> None:
"""Called when creating a new subclass of Method.
Args:
target_setting (Type[Setting], optional): The target setting.
Defaults to None, in which case the method will inherit the
target setting of it's parent class.
"""
if not is_dataclass(cls):
logger.critical(
UserWarning(
f"The BaseMethod subclass {cls} should be decorated with "
f"@dataclass!\n"
f"While this isn't strictly necessary for things to work, it is"
f"highly recommended, as any dataclass-style class attributes "
f"won't have the corresponding command-line arguments "
f"generated, which can cause a lot of subtle bugs."
)
)
super().__init_subclass__(target_setting=target_setting, **kwargs)
def on_task_switch(self, task_id: Optional[int]) -> None:
"""Called when switching between tasks.
Args:
task_id (int, optional): the id of the new task. When None, we are
basically being informed that there is a task boundary, but without
knowing what task we're switching to.
"""
self.model.on_task_switch(task_id)
def setup_wandb(self, run: Run) -> None:
"""Called by the Setting when using Weights & Biases, after `wandb.init`.
This method is here to provide Methods with the opportunity to log some of their
configuration options or hyper-parameters to wandb.
NOTE: The Setting has already set the `"setting"` entry in the `wandb.config` by
this point.
Parameters
----------
run : wandb.Run
Current wandb Run.
"""
# TODO: (@lebrice) I think these will probably be set by the wandb logger,
# run.config["config"] = self.config.to_dict()
# Need to check wether this causes any issues.
# run.config["hparams"] = self.hparams.to_dict()
# run.config["trainer_config"] = self.trainer_options
================================================
FILE: sequoia/methods/base_method_test.py
================================================
from typing import ClassVar, Dict, Type
import pytest
import torch
from sequoia.common.config import Config
from sequoia.conftest import slow
from sequoia.methods.trainer import TrainerConfig
from sequoia.settings import (
ClassIncrementalSetting,
IncrementalRLSetting,
Setting,
TraditionalRLSetting,
)
from sequoia.settings.rl.continual.results import ContinualRLResults
from .base_method import BaseMethod
from .method_test import MethodTests
class TestBaseMethod(MethodTests):
Method: ClassVar[Type[BaseMethod]] = BaseMethod
method_debug_kwargs: ClassVar[Dict] = {"max_epochs": 1}
@classmethod
@pytest.fixture(scope="module")
def trainer_options(cls, tmp_path_factory) -> TrainerConfig:
tmp_path = tmp_path_factory.mktemp("log_dir")
return TrainerConfig(
# logger=False,
max_epochs=1,
checkpoint_callback=False,
default_root_dir=tmp_path,
)
@classmethod
@pytest.fixture
def method(cls, config: Config, trainer_options: TrainerConfig) -> BaseMethod:
"""Fixture that returns the Method instance to use when testing/debugging."""
trainer_options.max_epochs = 1
return cls.Method(trainer_options=trainer_options, config=config)
def validate_results(
self,
setting: Setting,
method: BaseMethod,
results: Setting.Results,
) -> None:
assert results
assert results.objective
# TODO: Set some 'reasonable' bounds on the performance here, depending on the
# setting/dataset.
@pytest.mark.xfail(reason="TODO: Re-enable once we fix the bugs for BaseMethod in RL.")
@slow
@pytest.mark.timeout(120)
def test_cartpole_state(self, config: Config, trainer_options: TrainerConfig):
"""Test that the baseline method can learn cartpole (state input)"""
# TODO: Actually remove the trainer_config class from the BaseMethod?
trainer_options.max_epochs = 1
method = self.Method(config=config, trainer_options=trainer_options)
method.hparams.learning_rate = 0.01
setting = TraditionalRLSetting(
dataset="CartPole-v0",
train_max_steps=5000,
nb_tasks=1,
test_max_steps=2_000,
config=config,
)
results: ContinualRLResults = setting.apply(method)
print(results.to_log_dict())
# The method should normally get the maximum length (200), but checking with
# 100 just to account for randomness.
assert results.average_metrics.mean_episode_length > 100.0
@pytest.mark.xfail(reason="TODO: Re-enable once we fix the bugs for BaseMethod in RL.")
@slow
@pytest.mark.timeout(120)
def test_incremental_cartpole_state(self, config: Config, trainer_options: TrainerConfig):
"""Test that the baseline method can learn cartpole (state input)"""
# TODO: Actually remove the trainer_config class from the BaseMethod?
trainer_options.max_epochs = 1
method = self.Method(config=config, trainer_options=trainer_options)
method.hparams.learning_rate = 0.01
setting = IncrementalRLSetting(
dataset="cartpole", train_max_steps=5000, nb_tasks=2, test_max_steps=1000
)
results: ContinualRLResults = setting.apply(method)
print(results.to_log_dict())
# The method should normally get the maximum length (200), but checking with
# 100 just to account for randomness.
assert results.mean_episode_length > 100.0
@pytest.mark.xfail(reason="TODO: Unreliable test.")
@pytest.mark.timeout(30)
@pytest.mark.skipif(not torch.cuda.is_available(), reason="Cuda is required.")
def test_device_of_output_head_is_correct(
self,
short_class_incremental_setting: ClassIncrementalSetting,
trainer_options: TrainerConfig,
config: Config,
):
"""There is a bug happening where the output head is on CPU while the rest of the
model is on GPU.
"""
trainer_options.max_epochs = 1
method = self.Method(trainer_options=trainer_options, config=config)
results = short_class_incremental_setting.apply(method)
assert 0.20 <= results.objective
def test_weird_pl_bug():
replica_device = None
def find_tensor_with_device(tensor: torch.Tensor) -> torch.Tensor:
nonlocal replica_device
if replica_device is None and tensor.device != torch.device("cpu"):
replica_device = tensor.device
return tensor
from pytorch_lightning.utilities.apply_func import apply_to_collection
from sequoia.settings.sl.incremental.objects import (
IncrementalSLObservations,
IncrementalSLRewards,
)
# TODO: Not quite sure why there is also a `0` in there.
input_device = "cuda"
inputs = (
(
IncrementalSLObservations(
x=torch.rand([32, 3, 28, 28], device=input_device),
task_labels=torch.zeros([32], device=input_device),
),
IncrementalSLRewards(y=torch.randint(10, [32], device=input_device)),
),
0,
)
# from collections.abc import Mapping, Sequence
apply_to_collection(inputs, dtype=torch.Tensor, function=find_tensor_with_device)
assert replica_device is not None
BaseMethodTests = TestBaseMethod
================================================
FILE: sequoia/methods/conftest.py
================================================
import pytest
from sequoia.client import SettingProxy
from sequoia.common.config import Config
from sequoia.settings.sl import (
ClassIncrementalSetting,
ContinualSLSetting,
DiscreteTaskAgnosticSLSetting,
TaskIncrementalSLSetting,
)
from sequoia.settings.sl.continual.setting import random_subset
@pytest.fixture(scope="session")
def short_class_incremental_setting(session_config: Config):
setting = ClassIncrementalSetting(
dataset="mnist",
nb_tasks=5,
monitor_training_performance=True,
)
setting.config = session_config
setting.prepare_data()
setting.setup()
# Testing this out: Shortening the train datasets:
setting.train_datasets = [
random_subset(task_dataset, 100) for task_dataset in setting.train_datasets
]
setting.val_datasets = [
random_subset(task_dataset, 100) for task_dataset in setting.val_datasets
]
setting.test_datasets = [
random_subset(task_dataset, 100) for task_dataset in setting.test_datasets
]
assert len(setting.train_datasets) == 5
assert len(setting.val_datasets) == 5
assert len(setting.test_datasets) == 5
assert all(len(dataset) == 100 for dataset in setting.train_datasets)
assert all(len(dataset) == 100 for dataset in setting.val_datasets)
assert all(len(dataset) == 100 for dataset in setting.test_datasets)
# Assert that calling setup doesn't overwrite the datasets.
setting.setup()
assert len(setting.train_datasets) == 5
assert len(setting.val_datasets) == 5
assert len(setting.test_datasets) == 5
assert all(len(dataset) == 100 for dataset in setting.train_datasets)
assert all(len(dataset) == 100 for dataset in setting.val_datasets)
assert all(len(dataset) == 100 for dataset in setting.test_datasets)
return setting
@pytest.fixture(scope="session")
def short_continual_sl_setting(session_config: Config):
setting = ContinualSLSetting(
dataset="mnist",
monitor_training_performance=True,
)
setting.config = session_config
setting.prepare_data()
setting.setup()
# Testing this out: Shortening the train datasets:
setting.train_datasets = [
random_subset(task_dataset, 100) for task_dataset in setting.train_datasets
]
setting.val_datasets = [
random_subset(task_dataset, 100) for task_dataset in setting.val_datasets
]
setting.test_datasets = [
random_subset(task_dataset, 100) for task_dataset in setting.test_datasets
]
assert len(setting.train_datasets) == 5
assert len(setting.val_datasets) == 5
assert len(setting.test_datasets) == 5
assert all(len(dataset) == 100 for dataset in setting.train_datasets)
assert all(len(dataset) == 100 for dataset in setting.val_datasets)
assert all(len(dataset) == 100 for dataset in setting.test_datasets)
# Assert that calling setup doesn't overwrite the datasets.
setting.setup()
assert len(setting.train_datasets) == 5
assert len(setting.val_datasets) == 5
assert len(setting.test_datasets) == 5
assert all(len(dataset) == 100 for dataset in setting.train_datasets)
assert all(len(dataset) == 100 for dataset in setting.val_datasets)
assert all(len(dataset) == 100 for dataset in setting.test_datasets)
return setting
@pytest.fixture(scope="session")
def short_discrete_task_agnostic_sl_setting(session_config: Config):
setting = DiscreteTaskAgnosticSLSetting(
dataset="mnist",
monitor_training_performance=True,
)
setting.config = session_config
setting.prepare_data()
setting.setup()
# Testing this out: Shortening the train datasets:
setting.train_datasets = [
random_subset(task_dataset, 100) for task_dataset in setting.train_datasets
]
setting.val_datasets = [
random_subset(task_dataset, 100) for task_dataset in setting.val_datasets
]
setting.test_datasets = [
random_subset(task_dataset, 100) for task_dataset in setting.test_datasets
]
assert len(setting.train_datasets) == 5
assert len(setting.val_datasets) == 5
assert len(setting.test_datasets) == 5
assert all(len(dataset) == 100 for dataset in setting.train_datasets)
assert all(len(dataset) == 100 for dataset in setting.val_datasets)
assert all(len(dataset) == 100 for dataset in setting.test_datasets)
# Assert that calling setup doesn't overwrite the datasets.
setting.setup()
assert len(setting.train_datasets) == 5
assert len(setting.val_datasets) == 5
assert len(setting.test_datasets) == 5
assert all(len(dataset) == 100 for dataset in setting.train_datasets)
assert all(len(dataset) == 100 for dataset in setting.val_datasets)
assert all(len(dataset) == 100 for dataset in setting.test_datasets)
return setting
@pytest.fixture(scope="session")
def short_task_incremental_setting(session_config: Config):
setting = TaskIncrementalSLSetting(
dataset="mnist",
nb_tasks=5,
monitor_training_performance=True,
)
setting.config = session_config
setting.prepare_data()
setting.setup()
# Testing this out: Shortening the train datasets:
setting.train_datasets = [
random_subset(task_dataset, 100) for task_dataset in setting.train_datasets
]
setting.val_datasets = [
random_subset(task_dataset, 100) for task_dataset in setting.val_datasets
]
setting.test_datasets = [
random_subset(task_dataset, 100) for task_dataset in setting.test_datasets
]
assert len(setting.train_datasets) == 5
assert len(setting.val_datasets) == 5
assert len(setting.test_datasets) == 5
assert all(len(dataset) == 100 for dataset in setting.train_datasets)
assert all(len(dataset) == 100 for dataset in setting.val_datasets)
assert all(len(dataset) == 100 for dataset in setting.test_datasets)
# Assert that calling setup doesn't overwrite the datasets.
setting.setup()
assert len(setting.train_datasets) == 5
assert len(setting.val_datasets) == 5
assert len(setting.test_datasets) == 5
assert all(len(dataset) == 100 for dataset in setting.train_datasets)
assert all(len(dataset) == 100 for dataset in setting.val_datasets)
assert all(len(dataset) == 100 for dataset in setting.test_datasets)
return setting
@pytest.fixture(scope="session")
def short_sl_track_setting(session_config: Config):
setting = SettingProxy(
ClassIncrementalSetting,
"sl_track",
# dataset="synbols",
# nb_tasks=12,
# class_order=class_order,
# monitor_training_performance=True,
)
setting.config = session_config
# TODO: This could be a bit more convenient.
setting.data_dir = session_config.data_dir
assert setting.config == session_config
assert setting.data_dir == session_config.data_dir
assert setting.nb_tasks == 12
# For now we'll just shorten the tests by shortening the datasets.
samples_per_task = 100
setting.batch_size = 10
setting.setup()
# Testing this out: Shortening the train datasets:
setting.train_datasets = [
random_subset(task_dataset, samples_per_task) for task_dataset in setting.train_datasets
]
setting.val_datasets = [
random_subset(task_dataset, samples_per_task) for task_dataset in setting.val_datasets
]
setting.test_datasets = [
random_subset(task_dataset, samples_per_task) for task_dataset in setting.test_datasets
]
assert len(setting.train_datasets) == setting.nb_tasks
assert len(setting.val_datasets) == setting.nb_tasks
assert len(setting.test_datasets) == setting.nb_tasks
assert all(len(dataset) == samples_per_task for dataset in setting.train_datasets)
assert all(len(dataset) == samples_per_task for dataset in setting.val_datasets)
assert all(len(dataset) == samples_per_task for dataset in setting.test_datasets)
# Assert that calling setup doesn't overwrite the datasets.
setting.setup()
assert len(setting.train_datasets) == setting.nb_tasks
assert len(setting.val_datasets) == setting.nb_tasks
assert len(setting.test_datasets) == setting.nb_tasks
assert all(len(dataset) == samples_per_task for dataset in setting.train_datasets)
assert all(len(dataset) == samples_per_task for dataset in setting.val_datasets)
assert all(len(dataset) == samples_per_task for dataset in setting.test_datasets)
return setting
================================================
FILE: sequoia/methods/d3rlpy_methods/__init__.py
================================================
================================================
FILE: sequoia/methods/d3rlpy_methods/base.py
================================================
from typing import ClassVar, Type, Union
import gym
import numpy as np
try:
from d3rlpy.algos import *
from d3rlpy.dataset import MDPDataset
except ImportError as err:
raise RuntimeError(f"You need to have `d3rlpy` installed to use these methods.") from err
from gym import Space
from gym.wrappers.record_episode_statistics import RecordEpisodeStatistics
from sequoia import Actions, Environment, Method, Observations, Rewards
from sequoia.settings.offline_rl.setting import OfflineRLSetting
class OfflineRLWrapper(gym.Wrapper):
def __init__(self, env):
super().__init__(env)
self.observation_space = env.observation_space.x
def reset(self):
observation = super().reset()
return observation.x
def step(self, action):
observation, reward, done, info = super().step(action)
return observation.x, reward.y, done, info
class BaseOfflineRLMethod(Method, target_setting=OfflineRLSetting):
Algo: ClassVar[Type[AlgoBase]] = AlgoBase
def __init__(
self,
train_steps: int = 1_000_000,
train_steps_per_epoch=1_000_000,
test_steps=1_000,
scorers: dict = None,
use_gpu: bool = False,
**kwargs,
):
super().__init__()
self.train_steps = train_steps
self.train_steps_per_epoch = train_steps_per_epoch
self.test_steps = test_steps
self.scorers = scorers
self.offline_metrics = None
self.use_gpu = use_gpu
self.kwargs = kwargs
self.algo = None
def configure(self, setting: OfflineRLSetting) -> None:
super().configure(setting)
self.setting = setting
self.algo = type(self).Algo(use_gpu=self.use_gpu, **self.kwargs)
def fit(
self,
train_env: Union[Environment[Observations, Actions, Rewards], MDPDataset],
valid_env: Union[Environment[Observations, Actions, Rewards], MDPDataset],
) -> None:
"""
Fit self.algo on training and evaluation environment
Works for both gym environments and d3rlpy datasets
"""
if isinstance(self.setting, OfflineRLSetting):
self.offline_metrics = self.algo.fit(
train_env,
eval_episodes=valid_env,
n_steps=self.train_steps,
n_steps_per_epoch=self.train_steps_per_epoch,
scorers=self.scorers,
)
else:
train_env = RecordEpisodeStatistics(OfflineRLWrapper(train_env))
valid_env = RecordEpisodeStatistics(OfflineRLWrapper(valid_env))
self.algo.fit_online(env=train_env, eval_env=valid_env, n_steps=self.train_steps)
def get_actions(self, obs: Union[np.ndarray, Observations], action_space: Space) -> np.ndarray:
"""
Return actions predicted by self.algo for given observation and action space
"""
if isinstance(obs, Observations):
obs = obs.x
obs = np.expand_dims(obs, axis=0)
action = np.asarray(self.algo.predict(obs)).squeeze(axis=0)
return action
"""
D3RLPY Methods: target OfflineRL and TraditionalRL assumptions
"""
class DQNMethod(BaseOfflineRLMethod):
Algo: ClassVar[Type[AlgoBase]] = DQN
class DoubleDQNMethod(BaseOfflineRLMethod):
Algo: ClassVar[Type[AlgoBase]] = DoubleDQN
class DDPGMethod(BaseOfflineRLMethod):
Algo: ClassVar[Type[AlgoBase]] = DDPG
class TD3Method(BaseOfflineRLMethod):
Algo: ClassVar[Type[AlgoBase]] = TD3
class SACMethod(BaseOfflineRLMethod):
Algo: ClassVar[Type[AlgoBase]] = SAC
class DiscreteSACMethod(BaseOfflineRLMethod):
Algo: ClassVar[Type[AlgoBase]] = DiscreteSAC
class CQLMethod(BaseOfflineRLMethod):
Algo: ClassVar[Type[AlgoBase]] = CQL
class DiscreteCQLMethod(BaseOfflineRLMethod):
Algo: ClassVar[Type[AlgoBase]] = DiscreteCQL
class BEARMethod(BaseOfflineRLMethod):
Algo: ClassVar[Type[AlgoBase]] = BEAR
class AWRMethod(BaseOfflineRLMethod):
Algo: ClassVar[Type[AlgoBase]] = AWR
class DiscreteAWRMethod(BaseOfflineRLMethod):
Algo: ClassVar[Type[AlgoBase]] = DiscreteAWR
class BCMethod(BaseOfflineRLMethod):
Algo: ClassVar[Type[AlgoBase]] = BC
class DiscreteBCMethod(BaseOfflineRLMethod):
Algo: ClassVar[Type[AlgoBase]] = DiscreteBC
class BCQMethod(BaseOfflineRLMethod):
Algo: ClassVar[Type[AlgoBase]] = BCQ
class DiscreteBCQMethod(BaseOfflineRLMethod):
Algo: ClassVar[Type[AlgoBase]] = DiscreteBCQ
================================================
FILE: sequoia/methods/d3rlpy_methods/base_test.py
================================================
import pytest
from d3rlpy.constants import ActionSpace
from sequoia import TraditionalRLSetting
from sequoia.methods.d3rlpy_methods.base import *
from sequoia.settings.offline_rl.setting import OfflineRLSetting
class BaseOfflineRLMethodTests:
Method: ClassVar[Type[BaseOfflineRLMethod]]
@pytest.fixture
def method(self):
return self.Method(train_steps=1, train_steps_per_epoch=1)
@pytest.mark.parametrize("dataset", OfflineRLSetting.available_datasets)
def test_offlinerl(self, method, dataset: str):
setting_offline = OfflineRLSetting(dataset=dataset)
#
# Check for mismatch
if isinstance(setting_offline.env.action_space, gym.spaces.Box):
if method.algo.get_action_type() not in {ActionSpace.CONTINUOUS, ActionSpace.BOTH}:
pytest.skip("This setting requires continuous action space algorithm")
elif isinstance(setting_offline.env.action_space, gym.spaces.discrete.Discrete):
if method.algo.get_action_type() not in {ActionSpace.DISCRETE, ActionSpace.BOTH}:
pytest.skip("This setting requires discrete action space algorithm")
else:
pytest.skip("Invalid setting action space")
results = setting_offline.apply(method)
# Difficult to set a meaningful threshold for 1 step fit
assert isinstance(results.objective, float)
@pytest.mark.parametrize("dataset", TraditionalRLSetting.available_datasets)
def test_traditionalrl(self, method, dataset):
# BC is a strictly offline method
if isinstance(method, (BCMethod, BCQMethod, DiscreteBCMethod, DiscreteBCQMethod)):
pytest.skip("This method only works on OfflineRLSetting")
setting_online = TraditionalRLSetting(dataset=dataset, test_max_steps=10)
#
# Check for mismatch
if isinstance(setting_online.action_space, gym.spaces.Box):
if method.algo.get_action_type() != ActionSpace.CONTINUOUS:
pytest.skip("This setting requires continuous action space algorithm")
elif isinstance(setting_online.action_space, gym.spaces.discrete.Discrete):
if method.algo.get_action_type() != ActionSpace.DISCRETE:
pytest.skip("This setting requires discrete action space algorithm")
else:
pytest.skip("Invalid setting action space")
results = setting_online.apply(method)
# Difficult to set a meaningful threshold for 1 step fit
assert isinstance(results.objective, (int, float))
class TestDQNMethod(BaseOfflineRLMethodTests):
Method: ClassVar[Type[BaseOfflineRLMethod]] = DQNMethod
class TestDoubleDQNMethod(BaseOfflineRLMethodTests):
Method: ClassVar[Type[BaseOfflineRLMethod]] = DoubleDQNMethod
class TestDDPGMethod(BaseOfflineRLMethodTests):
Method: ClassVar[Type[BaseOfflineRLMethod]] = DDPGMethod
class TestTD3Method(BaseOfflineRLMethodTests):
Method: ClassVar[Type[BaseOfflineRLMethod]] = TD3Method
class TestSACMethod(BaseOfflineRLMethodTests):
Method: ClassVar[Type[BaseOfflineRLMethod]] = SACMethod
class TestDiscreteSACMethod(BaseOfflineRLMethodTests):
Method: ClassVar[Type[BaseOfflineRLMethod]] = DiscreteSACMethod
class TestCQLMethod(BaseOfflineRLMethodTests):
Method: ClassVar[Type[BaseOfflineRLMethod]] = CQLMethod
class TestDiscreteCQLMethod(BaseOfflineRLMethodTests):
Method: ClassVar[Type[BaseOfflineRLMethod]] = DiscreteCQLMethod
class TestBEARMethod(BaseOfflineRLMethodTests):
Method: ClassVar[Type[BaseOfflineRLMethod]] = BEARMethod
class TestAWRMethod(BaseOfflineRLMethodTests):
Method: ClassVar[Type[BaseOfflineRLMethod]] = AWRMethod
class TestDiscreteAWRMethod(BaseOfflineRLMethodTests):
Method: ClassVar[Type[BaseOfflineRLMethod]] = DiscreteAWRMethod
class TestBCMethod(BaseOfflineRLMethodTests):
Method: ClassVar[Type[BaseOfflineRLMethod]] = BCMethod
class TestDiscreteBCMethod(BaseOfflineRLMethodTests):
Method: ClassVar[Type[BaseOfflineRLMethod]] = DiscreteBCMethod
class TestBCQMethod(BaseOfflineRLMethodTests):
Method: ClassVar[Type[BaseOfflineRLMethod]] = BCQMethod
class TestDiscreteBCQMethod(BaseOfflineRLMethodTests):
Method: ClassVar[Type[BaseOfflineRLMethod]] = DiscreteBCQMethod
================================================
FILE: sequoia/methods/ewc_method.py
================================================
"""Defines the EWC method, as a subclass of the BaseMethod.
Likewise, defines the `EwcModel`, which is a very simple subclass of the
`BaseModel`, adding in the Ewc auxiliary task (`EWCTask`).
For a more detailed view of exactly how the EwcTask calculates its loss, see
the `sequoia.methods.aux_tasks.ewc.EwcTask`.
"""
import warnings
from dataclasses import dataclass
from typing import Optional
from gym.utils import colorize
from simple_parsing import ArgumentParser, mutable_field
from sequoia.common.config import Config
from sequoia.methods import register_method
from sequoia.methods.aux_tasks.ewc import EWCTask
from sequoia.methods.base_method import BaseMethod, BaseModel
from sequoia.methods.trainer import TrainerConfig
from sequoia.settings import Setting, TaskIncrementalRLSetting, IncrementalSLSetting
from sequoia.settings.assumptions.incremental import IncrementalAssumption
class EwcModel(BaseModel):
"""Modified version of the BaseModel, which adds the EWC auxiliary task."""
@dataclass
class HParams(BaseModel.HParams):
"""Hyper-parameters of the `EwcModel`."""
# Hyper-parameters related to the EWC auxiliary task.
ewc: EWCTask.Options = mutable_field(EWCTask.Options)
def __init__(self, setting: Setting, hparams: "EwcModel.HParams", config: Config):
super().__init__(setting=setting, hparams=hparams, config=config)
self.hp: EwcModel.HParams
self.add_auxiliary_task(EWCTask(options=self.hp.ewc))
def get_loss(self, forward_pass, rewards=None, loss_name=""):
return super().get_loss(forward_pass, rewards=rewards, loss_name=loss_name)
@register_method
@dataclass
class EwcMethod(BaseMethod, target_setting=IncrementalSLSetting):
"""Subclass of the BaseMethod, which adds the EWCTask to the `BaseModel`.
This Method is applicable to any CL setting (RL or SL) where there are clear task
boundaries, regardless of if the task labels are given or not.
"""
hparams: EwcModel.HParams = mutable_field(EwcModel.HParams)
def __init__(
self,
hparams: EwcModel.HParams = None,
config: Config = None,
trainer_options: TrainerConfig = None,
**kwargs,
):
super().__init__(hparams=hparams, config=config, trainer_options=trainer_options, **kwargs)
def configure(self, setting: IncrementalAssumption):
"""Called before the method is applied on a setting (before training).
You can use this to instantiate your model, for instance, since this is
where you get access to the observation & action spaces.
"""
super().configure(setting)
if setting.phases == 1:
warnings.warn(
RuntimeWarning(
colorize(
"Disabling the EWC portion of this Method entirely, as there "
"is only one phase of training in this setting (i.e. `fit` is "
"only called once).",
"red",
)
)
)
# We could also just disable the ewc task (after super().configure(setting))
self.model.tasks["ewc"].disable()
def on_task_switch(self, task_id: Optional[int]):
super().on_task_switch(task_id)
def create_model(self, setting: Setting) -> EwcModel:
"""Create the Model to use for the given Setting.
In this case, we want to return an `EwcModel` (our customized version of the
BaseModel).
Parameters
----------
setting : Setting
The experimental Setting this Method will be applied to.
Returns
-------
EwcModel
The Model that will be trained and used for evaluation.
"""
return EwcModel(setting=setting, hparams=self.hparams, config=self.config)
def demo():
"""Runs the EwcMethod on a simple setting, just to check that it works fine."""
# Adding arguments for each group directly:
parser = ArgumentParser(description=__doc__)
EwcMethod.add_argparse_args(parser, dest="method")
parser.add_arguments(Config, "config")
args = parser.parse_args()
method = EwcMethod.from_argparse_args(args, dest="method")
config: Config = args.config
task_schedule = {
0: {"gravity": 10, "length": 0.2},
1000: {"gravity": 100, "length": 1.2},
# 2000: {"gravity": 10, "length": 0.2},
}
setting = TaskIncrementalRLSetting(
dataset="cartpole",
train_task_schedule=task_schedule,
test_task_schedule=task_schedule,
# max_steps=1000,
)
# from sequoia.settings import TaskIncrementalSLSetting, ClassIncrementalSetting
# setting = ClassIncrementalSetting(dataset="mnist", nb_tasks=5)
# setting = TaskIncrementalSLSetting(dataset="mnist", nb_tasks=5)
results = setting.apply(method, config=config)
print(results.summary())
if __name__ == "__main__":
demo()
================================================
FILE: sequoia/methods/ewc_method_test.py
================================================
""" TODO: Tests for the EWC Method. """
from functools import partial
from typing import ClassVar, Type
import numpy as np
import pytest
from torch import Tensor
from sequoia.common import Loss
from sequoia.common.config import Config
from sequoia.conftest import slow
from sequoia.methods.trainer import TrainerConfig
from sequoia.settings.rl import IncrementalRLSetting, TaskIncrementalRLSetting, TraditionalRLSetting
from sequoia.settings.sl import (
ClassIncrementalSetting,
MultiTaskSLSetting,
TaskIncrementalSLSetting,
TraditionalSLSetting,
)
from .base_method_test import TestBaseMethod as BaseMethodTests
from .ewc_method import EwcMethod, EwcModel
class TestEWCMethod(BaseMethodTests):
Method: ClassVar[Type[Method]] = EwcMethod
@classmethod
@pytest.fixture
def method(cls, config: Config, trainer_options: TrainerConfig) -> EwcMethod:
"""Fixture that returns the Method instance to use when testing/debugging."""
trainer_options.max_epochs = 1
return cls.Method(trainer_options=trainer_options, config=config)
@slow
@pytest.mark.timeout(300)
def test_task_incremental_mnist(self, monkeypatch):
# TODO: Change this to use the 'short task incremental setting'.
setting = TaskIncrementalSLSetting(dataset="mnist", monitor_training_performance=True)
total_ewc_losses_per_task = np.zeros(setting.nb_tasks)
_training_step = EwcModel.training_step
def wrapped_training_step(self: EwcModel, batch, batch_idx: int, *args, **kwargs):
step_results = _training_step(self, batch, batch_idx=batch_idx, *args, **kwargs)
loss_object: Loss = step_results["loss_object"]
if "ewc" in loss_object.losses:
ewc_loss_obj = loss_object.losses["ewc"]
ewc_loss = ewc_loss_obj.total_loss
if isinstance(ewc_loss, Tensor):
ewc_loss = ewc_loss.detach().cpu().numpy()
total_ewc_losses_per_task[self.current_task] += ewc_loss
return step_results
monkeypatch.setattr(EwcModel, "training_step", wrapped_training_step)
_fit = EwcMethod.fit
at_all_points_in_time = []
def wrapped_fit(self, train_env, valid_env):
print(f"starting task {self.model.current_task}: {total_ewc_losses_per_task}")
total_ewc_losses_per_task[:] = 0
_fit(self, train_env, valid_env)
at_all_points_in_time.append(total_ewc_losses_per_task.copy())
monkeypatch.setattr(EwcMethod, "fit", wrapped_fit)
# _on_epoch_end = EwcModel.on_epoch_end
# def fake_on_epoch_end(self, *args, **kwargs):
# assert False, f"heyo: {total_ewc_losses_per_task}"
# return _on_epoch_end(self, *args, **kwargs)
# # monkeypatch.setattr(EwcModel, "on_epoch_end", fake_on_epoch_end)
method = EwcMethod(max_epochs=1)
results = setting.apply(method)
assert (at_all_points_in_time[0] == 0).all()
assert at_all_points_in_time[1][1] != 0
assert at_all_points_in_time[2][2] != 0
assert at_all_points_in_time[3][3] != 0
assert at_all_points_in_time[4][4] != 0
assert 0.95 <= results.average_online_performance.objective
# TODO: Fix this: Should be getting way better than this, even when just
# debugging.
assert 0.15 <= results.average_final_performance.objective
@pytest.mark.parametrize(
"non_cl_setting_fn",
[
partial(ClassIncrementalSetting, nb_tasks=1),
MultiTaskSLSetting,
TraditionalSLSetting,
TraditionalRLSetting,
partial(IncrementalRLSetting, nb_tasks=1),
partial(TaskIncrementalRLSetting, nb_tasks=1),
],
)
def test_raises_warning_when_applied_to_non_cl_setting(self, non_cl_setting_fn):
"""When applied onto a non-CL setting like IID or Multi-Task SL (or RL), the
EWCMethod should raise a warning, and disable the auxiliary task.
"""
method = EwcMethod()
setting = non_cl_setting_fn()
with pytest.warns(RuntimeWarning):
method.configure(setting)
================================================
FILE: sequoia/methods/experience_replay.py
================================================
""" Method that uses a replay buffer to prevent forgetting.
TODO: Refactor this to be based on the BaseMethod, possibly using an auxiliary task for
the Replay.
"""
from argparse import ArgumentParser, Namespace
from collections.abc import Iterable
from dataclasses import dataclass
from typing import Any, Dict, List, Optional, Tuple, Type
import gym
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.models as models
import tqdm
from gym import spaces
from torch import Tensor
from torchvision.models import ResNet
from wandb.wandb_run import Run
from sequoia.methods import register_method
from sequoia.settings import ClassIncrementalSetting
from sequoia.settings.base import Actions, Environment, Method, Observations
from sequoia.settings.sl.continual.setting import smart_class_prediction
from sequoia.utils import get_logger
logger = get_logger(__name__)
@register_method
@dataclass
class ExperienceReplayMethod(Method, target_setting=ClassIncrementalSetting):
"""Simple method that uses a replay buffer to reduce forgetting."""
def __init__(
self,
learning_rate: float = 1e-3,
buffer_capacity: int = 200,
max_epochs_per_task: int = 10,
weight_decay: float = 1e-6,
seed: int = None,
):
self.learning_rate = learning_rate
self.weight_decay = weight_decay
self.buffer_capacity = buffer_capacity
self.net: ResNet
self.buffer: Optional[Buffer] = None
self.optim: torch.optim.Optimizer
self.task: int = 0
self.rng = np.random.default_rng(seed)
self.seed = seed
if seed:
torch.manual_seed(seed)
torch.set_deterministic(True)
self.epochs_per_task: int = max_epochs_per_task
self.early_stop_patience: int = 2
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
def configure(self, setting: ClassIncrementalSetting):
self.setting = setting
# create the model
self.net = models.resnet18(pretrained=False)
self.net.fc = nn.Linear(512, setting.action_space.n)
if torch.cuda.is_available():
self.net = self.net.to(device=self.device)
# Set drop_last to True, to avoid getting a batch of size 1, which makes
# batchnorm raise an error.
setting.drop_last = True
image_space: spaces.Box = setting.observation_space["x"]
# Create the buffer.
if self.buffer_capacity:
self.buffer = Buffer(
capacity=self.buffer_capacity,
input_shape=image_space.shape,
extra_buffers={"t": torch.LongTensor},
rng=self.rng,
).to(device=self.device)
# Create the optimizer.
self.optim = torch.optim.Adam(
self.net.parameters(),
lr=self.learning_rate,
weight_decay=self.weight_decay,
)
def fit(self, train_env: Environment, valid_env: Environment):
self.net.train()
# Simple example training loop, not using the validation loader.
best_val_loss = np.inf
best_epoch = 0
for epoch in range(self.epochs_per_task):
train_pbar = tqdm.tqdm(train_env, desc=f"Training Epoch {epoch}")
postfix = {}
obs: ClassIncrementalSetting.Observations
rew: ClassIncrementalSetting.Rewards
for i, (obs, rew) in enumerate(train_pbar):
self.optim.zero_grad()
obs = obs.to(device=self.device)
x = obs.x
# FIXME: Batch norm will cause a crash if we pass x with batch_size==1!
fake_batch = False
if x.shape[0] == 1:
# Pretend like this has batch_size of 2 rather than just 1.
x = x.tile([2, *(1 for _ in x.shape[1:])])
x[1] += 1 # Just so the two samples aren't identical, otherwise
# maybe the batch norm std would be nan or something.
fake_batch = True
logits = self.net(x)
if fake_batch:
logits = logits[:1] # Drop the 'fake' second item.
if rew is None:
# If our online training performance is being measured, we might
# need to provide actions before we can get the corresponding
# rewards (image labels in this case).
y_pred = logits.argmax(1)
rew = train_env.send(y_pred)
rew = rew.to(device=self.device)
y = rew.y
loss = F.cross_entropy(logits, y)
postfix["loss"] = loss.detach().item()
if self.task > 0 and self.buffer:
b_samples = self.buffer.sample(x.size(0))
b_logits = self.net(b_samples["x"])
loss_replay = F.cross_entropy(b_logits, b_samples["y"])
loss += loss_replay
postfix["replay loss"] = loss_replay.detach().item()
loss.backward()
self.optim.step()
train_pbar.set_postfix(postfix)
# Only add new samples to the buffer (only during first epoch).
if self.buffer and epoch == 0:
self.buffer.add_reservoir({"x": x, "y": y, "t": self.task})
# Validation loop:
self.net.eval()
torch.set_grad_enabled(False)
val_pbar = tqdm.tqdm(valid_env)
val_pbar.set_description(f"Validation Epoch {epoch}")
epoch_val_loss = 0.0
epoch_val_loss_list: List[float] = []
for i, (obs, rew) in enumerate(val_pbar):
obs = obs.to(device=self.device)
x = obs.x
logits = self.net(x)
if rew is None:
y_pred = logits.argmax(-1)
rew = valid_env.send(y_pred)
assert rew is not None
rew = rew.to(device=self.device)
y = rew.y
val_loss = F.cross_entropy(logits, y).item()
epoch_val_loss_list.append(val_loss)
postfix["validation loss"] = val_loss
val_pbar.set_postfix(postfix)
torch.set_grad_enabled(True)
epoch_val_loss_mean = np.mean(epoch_val_loss_list)
if epoch_val_loss_mean < best_val_loss:
best_val_loss = epoch_val_loss_mean
best_epoch = epoch
if epoch - best_epoch > self.early_stop_patience:
print(f"Early stopping at epoch {epoch}.")
# TODO: Reload the weights from the best epoch.
break
def get_actions(self, observations: Observations, action_space: gym.Space) -> Actions:
observations = observations.to(device=self.device)
task_labels = observations.task_labels
logits = self.net(observations.x)
if task_labels is not None:
y_pred = smart_class_prediction(
logits=logits,
task_labels=task_labels,
setting=self.setting,
train=False,
)
else:
y_pred = logits.argmax(1)
return self.setting.Actions(y_pred=y_pred)
def on_task_switch(self, task_id: Optional[int]):
print(f"Switching from task {self.task} to task {task_id}")
if self.training:
self.task = task_id
@classmethod
def add_argparse_args(cls, parser: ArgumentParser) -> None:
"""Add the command-line arguments for this Method to the given parser.
Parameters
----------
parser : ArgumentParser
The ArgumentParser.
"""
parser.add_argument("--learning_rate", type=float, default=1e-3)
parser.add_argument("--weight_decay", type=float, default=1e-6)
parser.add_argument("--buffer_capacity", type=int, default=200)
parser.add_argument("--max_epochs_per_task", type=int, default=10)
parser.add_argument("--seed", type=int, default=None, help="Random seed")
@classmethod
def from_argparse_args(cls, args: Namespace, dest: str = None):
"""Extract the parsed command-line arguments from the namespace and
return an instance of class `cls`.
Parameters
----------
args : Namespace
The namespace containing all the parsed command-line arguments.
dest : str, optional
The , by default None
Returns
-------
cls
An instance of the class `cls`.
"""
args = args if not dest else getattr(args, dest)
return cls(
learning_rate=args.learning_rate,
buffer_capacity=args.buffer_capacity,
max_epochs_per_task=args.max_epochs_per_task,
weight_decay=args.weight_decay,
seed=args.seed,
)
def get_search_space(self, setting: ClassIncrementalSetting) -> Dict:
return {
"learning_rate": "loguniform(1e-4, 5e-1, default_value=1e-3)",
"buffer_capacity": "uniform(1000, 100_000, default_value=10_000, discrete=True)",
"weight_decay": "loguniform(1e-12, 1e-3, default_value=1e-6)",
"early_stop_patience": "uniform(0, 2, default_value=1, discrete=True)",
}
def adapt_to_new_hparams(self, new_hparams: Dict[str, Any]) -> None:
"""Adapts the Method when it receives new Hyper-Parameters to try for a new run.
It is required that this method be implemented if you want to perform HPO sweeps
with Orion.
NOTE: It is very strongly recommended that you always re-create your model and
any modules / components that depend on these hyper-parameters inside the
`configure` method! (Otherwise these new hyper-parameters will not be used in
the next run)
Parameters
----------
new_hparams : Dict[str, Any]
The new hyper-parameters being recommended by the HPO algorithm. These will
have the same structure as the search space.
"""
# Here we overwrite the corresponding attributes with the new suggested values
# leaving other fields unchanged.
# NOTE: These new hyper-paramers will be used in the next run in the sweep,
# since each call to `configure` will create a new Model.
self.learning_rate = new_hparams["learning_rate"]
self.weight_decay = new_hparams["weight_decay"]
self.buffer_capacity = new_hparams["buffer_capacity"]
def setup_wandb(self, run: Run) -> None:
"""Called by the Setting when using Weights & Biases, after `wandb.init`.
This method is here to provide Methods with the opportunity to log some of their
configuration options or hyper-parameters to wandb.
NOTE: The Setting has already set the `"setting"` entry in the `wandb.config` by
this point.
Parameters
----------
run : wandb.Run
Current wandb Run.
"""
run.config.update(
dict(
learning_rate=self.learning_rate,
weight_decay=self.weight_decay,
buffer_capacity=self.buffer_capacity,
epochs_per_task=self.epochs_per_task,
seed=self.seed,
)
)
class Buffer(nn.Module):
def __init__(
self,
capacity: int,
input_shape: Tuple[int, ...],
extra_buffers: Dict[str, Type[torch.Tensor]] = None,
rng: np.random.RandomState = None,
):
super().__init__()
self.rng = rng or np.random.RandomState()
bx = torch.zeros([capacity, *input_shape], dtype=torch.float)
by = torch.zeros([capacity], dtype=torch.long)
self.register_buffer("bx", bx)
self.register_buffer("by", by)
self.buffers = ["bx", "by"]
extra_buffers = extra_buffers or {}
for name, dtype in extra_buffers.items():
tmp = dtype(capacity).fill_(0)
self.register_buffer(f"b{name}", tmp)
self.buffers += [f"b{name}"]
self.current_index = 0
self.n_seen_so_far = 0
self.is_full = 0
# (@lebrice) args isn't defined here:
# self.to_one_hot = lambda x : x.new(x.size(0), args.n_classes).fill_(0).scatter_(1, x.unsqueeze(1), 1)
self.arange_like = lambda x: torch.arange(x.size(0)).to(x.device)
self.shuffle = lambda x: x[torch.randperm(x.size(0))]
@property
def x(self):
return self.bx[: self.current_index]
@property
def y(self):
raise NotImplementedError("Can't make y one-hot, dont have n_classes.")
return self.to_one_hot(self.by[: self.current_index])
def add_reservoir(self, batch: Dict[str, Tensor]) -> None:
n_elem = batch["x"].size(0)
# add whatever still fits in the buffer
place_left = max(0, self.bx.size(0) - self.current_index)
if place_left:
offset = min(place_left, n_elem)
for name, data in batch.items():
buffer = getattr(self, f"b{name}")
if isinstance(data, Iterable):
buffer[self.current_index : self.current_index + offset].data.copy_(
data[:offset]
)
else:
buffer[self.current_index : self.current_index + offset].fill_(data)
self.current_index += offset
self.n_seen_so_far += offset
# everything was added
if offset == batch["x"].size(0):
return
x = batch["x"]
self.place_left = False
indices = (
torch.FloatTensor(x.size(0) - place_left)
.to(x.device)
.uniform_(0, self.n_seen_so_far)
.long()
)
valid_indices: Tensor = (indices < self.bx.size(0)).long()
idx_new_data = valid_indices.nonzero(as_tuple=False).squeeze(-1)
idx_buffer = indices[idx_new_data]
self.n_seen_so_far += x.size(0)
if idx_buffer.numel() == 0:
return
# perform overwrite op
for name, data in batch.items():
buffer = getattr(self, f"b{name}")
if isinstance(data, Iterable):
data = data[place_left:]
buffer[idx_buffer] = data[idx_new_data]
else:
buffer[idx_buffer] = data
def sample(self, n_samples: int, exclude_task: int = None) -> Dict[str, Tensor]:
buffers = {}
if exclude_task is not None:
assert hasattr(self, "bt")
valid_indices = (self.bt != exclude_task).nonzero().squeeze()
for buffer_name in self.buffers:
buffers[buffer_name] = getattr(self, buffer_name)[valid_indices]
else:
for buffer_name in self.buffers:
buffers[buffer_name] = getattr(self, buffer_name)[: self.current_index]
bx = buffers["bx"]
if bx.size(0) < n_samples:
return buffers
else:
indices_np = self.rng.choice(bx.size(0), n_samples, replace=False)
indices = torch.from_numpy(indices_np).to(self.bx.device)
return {k[1:]: v[indices] for (k, v) in buffers.items()}
if __name__ == "__main__":
ExperienceReplayMethod.main()
================================================
FILE: sequoia/methods/experience_replay_test.py
================================================
from typing import ClassVar, Dict, Type
import pytest
from sequoia.common.config import Config
from sequoia.conftest import slow
from sequoia.methods.method_test import MethodTests
from sequoia.settings.sl import ClassIncrementalSetting, SLSetting
from .experience_replay import ExperienceReplayMethod
class TestExperienceReplay(MethodTests):
Method: ClassVar[Type[ExperienceReplayMethod]] = ExperienceReplayMethod
method_debug_kwargs: ClassVar[Dict] = {"buffer_capacity": 100, "max_epochs_per_task": 1}
@classmethod
@pytest.fixture
def method(cls, config: Config) -> ExperienceReplayMethod:
"""Fixture that returns the Method instance to use when testing/debugging."""
return cls.Method(**cls.method_debug_kwargs)
def validate_results(
self,
setting: SLSetting,
method: ExperienceReplayMethod,
results: SLSetting.Results,
) -> None:
assert results
assert results.objective
@slow
@pytest.mark.timeout(300)
def test_class_incremental_mnist(self, config: Config):
method = ExperienceReplayMethod(buffer_capacity=200, max_epochs_per_task=1)
setting = ClassIncrementalSetting(
dataset="mnist",
monitor_training_performance=True,
)
results = setting.apply(method, config=config)
assert 0.90 <= results.average_online_performance.objective
assert 0.70 <= results.final_performance_metrics[0].objective
assert 0.70 <= results.final_performance_metrics[1].objective
assert 0.70 <= results.final_performance_metrics[2].objective
assert 0.70 <= results.final_performance_metrics[3].objective
assert 0.70 <= results.final_performance_metrics[4].objective
assert 0.80 <= results.average_final_performance.objective
================================================
FILE: sequoia/methods/hat.py
================================================
""" Hard Attention to the Task
```
@inproceedings{serra2018overcoming,
title={Overcoming Catastrophic Forgetting with Hard Attention to the Task},
author={Serra, Joan and Suris, Didac and Miron, Marius and Karatzoglou, Alexandros},
booktitle={International Conference on Machine Learning},
pages={4548--4557},
year={2018}
}
```
"""
from argparse import Namespace
from dataclasses import dataclass
from typing import Any, Dict, Mapping, NamedTuple, Optional, Tuple, Union
import gym
import numpy as np
import torch
import tqdm
from numpy import inf
from simple_parsing import ArgumentParser
from torch import Tensor
from wandb.wandb_run import Run
from sequoia.common import Config
from sequoia.common.hparams import HyperParameters, categorical, log_uniform, uniform
from sequoia.common.spaces import Image
from sequoia.methods import register_method
from sequoia.settings import Environment, Method, Setting
from sequoia.settings.sl import TaskIncrementalSLSetting
from sequoia.settings.sl.environment import PassiveEnvironment
from sequoia.settings.sl.incremental.objects import Actions, Observations, Rewards
class Masks(NamedTuple):
"""Named tuple for the masked tensors created in the HATNet."""
gc1: Tensor
gc2: Tensor
gc3: Tensor
gfc1: Tensor
gfc2: Tensor
class HatNet(torch.nn.Module):
"""
@inproceedings{serra2018overcoming,
title={Overcoming Catastrophic Forgetting with Hard Attention to the Task},
author={Serra, Joan and Suris, Didac and Miron, Marius and Karatzoglou, Alexandros},
booktitle={International Conference on Machine Learning},
pages={4548--4557},
year={2018}
}
The model is where the model weights are initialized.
Just like a classic PyTorch, here the different layers and components of the model
are defined.
"""
def __init__(self, image_space: Image, n_classes_per_task: Dict[int, int], s_hat: int = 50):
super().__init__()
ncha = image_space.channels
size = image_space.width
self.n_classes_per_task = n_classes_per_task
self.s_hat = s_hat
self.c1 = torch.nn.Conv2d(ncha, 64, kernel_size=size // 8)
s = compute_conv_output_size(size, size // 8)
s //= 2
self.c2 = torch.nn.Conv2d(64, 128, kernel_size=size // 10)
s = compute_conv_output_size(s, size // 10)
s //= 2
self.c3 = torch.nn.Conv2d(128, 256, kernel_size=2)
s = compute_conv_output_size(s, 2)
s //= 2
self.smid = s
self.maxpool = torch.nn.MaxPool2d(2)
self.relu = torch.nn.ReLU()
self.drop1 = torch.nn.Dropout(0.2)
self.drop2 = torch.nn.Dropout(0.5)
self.fc1 = torch.nn.Linear(256 * self.smid * self.smid, 2048)
self.fc2 = torch.nn.Linear(2048, 2048)
self.output_layers = torch.nn.ModuleList()
n_tasks = len(self.n_classes_per_task)
# TODO: (@lebrice) Here I'm 'fixing' this, by making it so each output head has
# as many outputs as there are classes in total. It's not super efficient, but
# it should work.
total_classes = sum(self.n_classes_per_task.values())
for task_index, n_classes_in_task in self.n_classes_per_task.items():
self.output_layers.append(torch.nn.Linear(2048, total_classes))
self.gate = torch.nn.Sigmoid()
# All embedding stuff should start with 'e'
self.ec1 = torch.nn.Embedding(n_tasks, 64)
self.ec2 = torch.nn.Embedding(n_tasks, 128)
self.ec3 = torch.nn.Embedding(n_tasks, 256)
self.efc1 = torch.nn.Embedding(n_tasks, 2048)
self.efc2 = torch.nn.Embedding(n_tasks, 2048)
self.flatten = torch.nn.Flatten()
self.loss = torch.nn.CrossEntropyLoss()
self.current_task: Optional[int] = 0
def forward(self, observations: TaskIncrementalSLSetting.Observations) -> Tuple[Tensor, Masks]:
observations.as_list_of_tuples()
x = observations.x
t = observations.task_labels
# BUG: This won't work if task_labels is None (which is the case at
# test-time in the ClassIncrementalSetting)
masks = self.mask(t, s_hat=self.s_hat)
gc1, gc2, gc3, gfc1, gfc2 = masks
# Gated
h = self.maxpool(self.drop1(self.relu(self.c1(x))))
h = h * gc1.unsqueeze(2).unsqueeze(3)
h = self.maxpool(self.drop1(self.relu(self.c2(h))))
h = h * gc2.unsqueeze(2).unsqueeze(3)
h = self.maxpool(self.drop2(self.relu(self.c3(h))))
h = h * gc3.unsqueeze(2).unsqueeze(3)
h = self.flatten(h)
h = self.drop2(self.relu(self.fc1(h)))
h = h * gfc1.expand_as(h)
h = self.drop2(self.relu(self.fc2(h)))
h = h * gfc2.expand_as(h)
# Each batch can have elements of more than one Task (in test)
# In Task Incremental Learning, each task have it own classification head.
y: Optional[Tensor] = None
task_masks = {}
for task_id in set(t.tolist()):
task_mask = t == task_id
task_masks[task_id] = task_mask
y_pred_t = self.output_layers[task_id](h.clone())
if y is None:
y = y_pred_t
else:
y[task_mask] = y_pred_t[task_mask]
assert y is not None
return y, masks
def mask(self, t: Tensor, s_hat: float) -> Masks:
gc1 = self.gate(s_hat * self.ec1(t))
gc2 = self.gate(s_hat * self.ec2(t))
gc3 = self.gate(s_hat * self.ec3(t))
gfc1 = self.gate(s_hat * self.efc1(t))
gfc2 = self.gate(s_hat * self.efc2(t))
return Masks(gc1, gc2, gc3, gfc1, gfc2)
def shared_step(
self, batch: Tuple[Observations, Optional[Rewards]], environment: Environment
) -> Tuple[Tensor, Dict]:
"""Shared step used for both training and validation.
Parameters
----------
batch : Tuple[Observations, Optional[Rewards]]
Batch containing Observations, and optional Rewards. When the Rewards are
None, it means that we'll need to provide the Environment with actions
before we can get the Rewards (e.g. image labels) back.
This happens for example when being applied in a Setting which cares about
sample efficiency or training performance, for example.
environment : Environment
The environment we're currently interacting with. Used to provide the
rewards when they aren't already part of the batch, for example when our
performance is being monitored during training.
Returns
-------
Tuple[Tensor, Dict]
The Loss tensor, and a dict of metrics to be logged.
"""
# Since we're training on a Passive environment, we will get both observations
# and rewards, unless we're being evaluated based on our training performance,
# in which case we will need to send actions to the environments before we can
# get the corresponding rewards (image labels) back.
observations: Observations = batch[0]
rewards: Optional[Rewards] = batch[1]
# Get the predictions:
logits, _ = self(observations)
y_pred = logits.argmax(-1)
if rewards is None:
# If the rewards in the batch were None, it means we're expected to give
# actions before we can get rewards back from the environment.
# This happens when the Setting is monitoring our training performance.
rewards = environment.send(Actions(y_pred))
assert rewards is not None
image_labels = rewards.y
loss = self.loss(logits, image_labels)
accuracy = (y_pred == image_labels).sum().float() / len(image_labels)
metrics_dict = {"accuracy": accuracy}
return loss, metrics_dict
def compute_conv_output_size(
Lin: int, kernel_size: int, stride: int = 1, padding: int = 0, dilation: int = 1
) -> int:
return int(np.floor((Lin + 2 * padding - dilation * (kernel_size - 1) - 1) / float(stride) + 1))
@register_method
class HatMethod(Method, target_setting=TaskIncrementalSLSetting):
"""Hard Attention to the Task
```
@inproceedings{serra2018overcoming,
title={Overcoming Catastrophic Forgetting with Hard Attention to the Task},
author={Serra, Joan and Suris, Didac and Miron, Marius and Karatzoglou, Alexandros},
booktitle={International Conference on Machine Learning},
pages={4548--4557},
year={2018}
}
```
"""
@dataclass
class HParams(HyperParameters):
"""Hyper-parameters of the Settings."""
# Learning rate of the optimizer.
learning_rate: float = log_uniform(1e-6, 1e-2, default=0.001)
# Batch size
batch_size: int = categorical(16, 32, 64, 128, default=128)
# weight/importance of the task embedding to the gate function
s_hat: float = uniform(1.0, 100.0, default=50.0)
# Maximum number of training epochs per task
max_epochs_per_task: int = uniform(1, 20, default=10, discrete=True)
def __init__(self, hparams: HParams = None):
self.hparams: HatMethod.HParams = hparams or self.HParams()
self.early_stopping_patience = 2
# We will create those when `configure` will be called, before training.
self.model: HatNet
self.optimizer: torch.optim.Optimizer
def configure(self, setting: TaskIncrementalSLSetting):
"""Called before the method is applied on a setting (before training).
You can use this to instantiate your model, for instance, since this is
where you get access to the observation & action spaces.
"""
setting.batch_size = self.hparams.batch_size
assert (
setting.increment == setting.test_increment
), "Assuming same number of classes per task for training and testing."
n_classes_per_task = {
i: setting.num_classes_in_task(i, train=True) for i in range(setting.nb_tasks)
}
image_space: Image = setting.observation_space["x"]
self.model = HatNet(
image_space=image_space,
n_classes_per_task=n_classes_per_task,
s_hat=self.hparams.s_hat,
)
self.optimizer = torch.optim.Adam(
self.model.parameters(),
lr=self.hparams.learning_rate,
)
def fit(self, train_env: PassiveEnvironment, valid_env: PassiveEnvironment):
"""
Train loop
Different Settings can return elements from tasks in an other way,
be it class incremental, task incremental, etc.
Batch can have information about en environment, rewards, input, task labels, etc.
And we call the forward training function of our method, independent of the settings
"""
# configure() will have been called by the setting before we get here,
best_val_loss = inf
best_epoch = 0
for epoch in range(self.hparams.max_epochs_per_task):
self.model.train()
print(f"Starting epoch {epoch}")
# Training loop:
with tqdm.tqdm(train_env) as train_pbar:
postfix = {}
train_pbar.set_description(f"Training Epoch {epoch}")
for i, batch in enumerate(train_pbar):
loss, metrics_dict = self.model.shared_step(
batch,
environment=train_env,
)
self.optimizer.zero_grad()
loss.backward()
self.optimizer.step()
postfix.update(metrics_dict)
train_pbar.set_postfix(postfix)
# Validation loop:
self.model.eval()
torch.set_grad_enabled(False)
with tqdm.tqdm(valid_env) as val_pbar:
postfix = {}
val_pbar.set_description(f"Validation Epoch {epoch}")
epoch_val_loss = 0.0
for i, batch in enumerate(val_pbar):
batch_val_loss, metrics_dict = self.model.shared_step(
batch,
environment=valid_env,
)
epoch_val_loss += batch_val_loss
postfix.update(metrics_dict, val_loss=epoch_val_loss)
val_pbar.set_postfix(postfix)
torch.set_grad_enabled(True)
if epoch_val_loss < best_val_loss:
best_val_loss = epoch_val_loss
best_epoch = i
elif epoch - best_epoch > self.early_stopping_patience:
print(f"Early stopping at epoch {epoch}")
break
def get_actions(self, observations: Observations, action_space: gym.Space) -> Actions:
"""Get a batch of predictions (aka actions) for these observations."""
with torch.no_grad():
logits, _ = self.model(observations)
# Get the predicted classes
y_pred = logits.argmax(dim=-1)
return self.target_setting.Actions(y_pred)
def on_task_switch(self, task_id: Optional[int]):
# This method gets called if task boundaries are known in the current
# setting. Furthermore, if task labels are available, task_id will be
# the index of the new task. If not, task_id will be None.
# TODO: Does this method actually work when task_id is None?
self.model.current_task = task_id
@classmethod
def add_argparse_args(cls, parser: ArgumentParser) -> None:
parser.add_arguments(cls.HParams, dest="hparams")
# You can also add arguments as usual:
# parser.add_argument("--foo", default=123)
@classmethod
def from_argparse_args(cls, args: Namespace) -> "HatMethod":
hparams: HatMethod.HParams = args.hparams
# foo: int = args.foo
method = cls(hparams=hparams)
return method
def get_search_space(self, setting: Setting) -> Mapping[str, Union[str, Dict]]:
"""Returns the search space to use for HPO in the given Setting.
Parameters
----------
setting : Setting
The Setting on which the run of HPO will take place.
Returns
-------
Mapping[str, Union[str, Dict]]
An orion-formatted search space dictionary, mapping from hyper-parameter
names (str) to their priors (str), or to nested dicts of the same form.
"""
return self.hparams.get_orion_space()
def adapt_to_new_hparams(self, new_hparams: Dict[str, Any]) -> None:
"""Adapts the Method when it receives new Hyper-Parameters to try for a new run.
It is required that this method be implemented if you want to perform HPO sweeps
with Orion.
Parameters
----------
new_hparams : Dict[str, Any]
The new hyper-parameters being recommended by the HPO algorithm. These will
have the same structure as the search space.
"""
# Here we overwrite the corresponding attributes with the new suggested values
# leaving other fields unchanged.
# NOTE: These new hyper-paramers will be used in the next run in the sweep,
# since each call to `configure` will create a new Model.
self.hparams = self.hparams.replace(**new_hparams)
def setup_wandb(self, run: Run) -> None:
"""Called by the Setting when using Weights & Biases, after `wandb.init`.
This method is here to provide Methods with the opportunity to log some of their
configuration options or hyper-parameters to wandb.
NOTE: The Setting has already set the `"setting"` entry in the `wandb.config` by
this point.
Parameters
----------
run : wandb.Run
Current wandb Run.
"""
run.config["hparams"] = self.hparams.to_dict()
if __name__ == "__main__":
# Example: Evaluate a Method on a single CL setting:
parser = ArgumentParser(description=__doc__, add_dest_to_option_strings=False)
"""
We must define 3 main components:
1.- Setting: It is the continual learning scenario that we are working, SL or RL, TI or CI
Each settings has it own parameters that can be customized.
2.- Model: Is the parameters and layers of the model, just like in PyTorch.
We can use a predefined model or create your own
3.- Method: It is how we are going to use what the settings give us to train our model.
Same as before, we can define our own or use pre-defined Methods.
"""
# Add arguments for the Method, the Setting, and the Config.
# (Config contains options like the log_dir, the data_dir, etc.)
HatMethod.add_argparse_args(parser, dest="method")
parser.add_arguments(TaskIncrementalSLSetting, dest="setting")
parser.add_arguments(Config, "config")
args = parser.parse_args()
# Create the Method from the args, and extract the Setting, and the Config:
method: HatMethod = HatMethod.from_argparse_args(args, dest="method")
setting: TaskIncrementalSLSetting = args.setting
config: Config = args.config
# Apply the method to the setting, optionally passing in a Config,
# producing Results.
results = setting.apply(method, config=config)
print(results.summary())
print(f"objective: {results.objective}")
================================================
FILE: sequoia/methods/method_test.py
================================================
from abc import ABC, abstractmethod
from dataclasses import dataclass
from typing import ClassVar, Dict, Type, TypeVar
import pytest
from sequoia.common.config import Config
from sequoia.conftest import config, session_config
from sequoia.settings import RLSetting, Setting, SLSetting
from sequoia.settings.base import Method
from sequoia.settings.sl.continual.setting import random_subset
def key_fn(setting_class: Type[Setting]):
# order tests in terms of their 'depth' in the tree, and break ties arbitrarily
# based on the name.
return (len(setting_class.parents()), setting_class.__name__)
def make_setting_type_fixture(method_type: Type[Method]) -> pytest.fixture:
"""Create a parametrized fixture that will go through all the applicable settings
for a given method.
"""
def setting_type(self, request):
setting_type = request.param
return setting_type
setting_types = set(method_type.get_applicable_settings())
settings_to_remove = set([Setting, SLSetting, RLSetting])
# NOTE: Need to make a deterministic ordering of settings, otherwise we can't
# parallelize tests with pytest-xdist
setting_types = sorted(list(setting_types - settings_to_remove), key=key_fn)
return pytest.fixture(
params=setting_types,
scope="module",
)(setting_type)
MethodType = TypeVar("MethodType", bound=Method)
class MethodTests(ABC):
"""Base class that can be extended to generate tests for a method.
The main test of interest is `test_debug`.
"""
Method: ClassVar[Type[MethodType]]
setting_type: pytest.fixture
# Kwargs to pass when contructing the Settings.
setting_kwargs: ClassVar[Dict] = {}
method_debug_kwargs: ClassVar[Dict] = {}
def __init_subclass__(cls, method: Type[MethodType] = None):
"""Dynamically generates a `setting_type` fixture on the subclass, which will
be parametrized by the settings that the Method is applicable to.
"""
super().__init_subclass__()
if not method and not hasattr(cls, "Method"):
raise RuntimeError(
"Need to either pass `method` when subclassing or set "
"a 'Method' class attribute."
)
cls.Method = cls.Method or method
cls.setting_type: pytest.fixture = make_setting_type_fixture(cls.Method)
@classmethod
@abstractmethod
@pytest.fixture
def method(cls, config: Config) -> MethodType:
"""Fixture that returns the Method instance to use when testing/debugging.
Needs to be implemented when creating a new test class (to generate tests for a
new method).
"""
return cls.Method(**cls.method_debug_kwargs)
@abstractmethod
def validate_results(
self,
setting: Setting,
method: MethodType,
results: Setting.Results,
) -> None:
assert results
assert results.objective
assert results.objective is not None
print(results.summary())
# NOTE: Need to re-define these here, just so external packages, which maybe aren't
# in the "scope" of `sequoia/conftest.py` can also use them:
# Dropping the `self` argument by making those static methods on the class.
session_config: pytest.fixture = staticmethod(session_config)
config: pytest.fixture = staticmethod(config)
@pytest.fixture(scope="module")
def setting(self, setting_type: Type[Setting], session_config: Config):
# TODO: Fix this test setup, nb_tasks should be something low like 2, and
# perhaps use max_episode_steps to limit episode length
if issubclass(setting_type, SLSetting):
setting_kwargs = dict(
nb_tasks=5,
config=session_config,
)
setting_kwargs.setdefault("monitor_training_performance", True)
# TODO: Do we also want to parameterize the dataset? or is it too much?
setting_kwargs.update(self.setting_kwargs)
setting = setting_type(
**setting_kwargs,
)
assert setting.dataset, setting_kwargs
setting.config = session_config
setting.batch_size = 10
setting.prepare_data()
setting.setup()
nb_tasks = 5
samples_per_task = 50
# Testing this out: Shortening the train datasets:
setting.train_datasets = [
random_subset(task_dataset, samples_per_task)
for task_dataset in setting.train_datasets
]
setting.val_datasets = [
random_subset(task_dataset, samples_per_task)
for task_dataset in setting.val_datasets
]
setting.test_datasets = [
random_subset(task_dataset, samples_per_task)
for task_dataset in setting.test_datasets
]
assert len(setting.train_datasets) == nb_tasks
assert len(setting.val_datasets) == nb_tasks
assert len(setting.test_datasets) == nb_tasks
assert all(len(dataset) == samples_per_task for dataset in setting.train_datasets)
assert all(len(dataset) == samples_per_task for dataset in setting.val_datasets)
assert all(len(dataset) == samples_per_task for dataset in setting.test_datasets)
# Assert that calling setup doesn't overwrite the datasets.
setting.setup()
assert len(setting.train_datasets) == nb_tasks
assert len(setting.val_datasets) == nb_tasks
assert len(setting.test_datasets) == nb_tasks
assert all(len(dataset) == samples_per_task for dataset in setting.train_datasets)
assert all(len(dataset) == samples_per_task for dataset in setting.val_datasets)
assert all(len(dataset) == samples_per_task for dataset in setting.test_datasets)
else:
# RL setting:
setting_kwargs = dict(
nb_tasks=2,
train_max_steps=1_000,
test_max_steps=1_000,
# train_steps_per_task=2_000,
# test_steps_per_task=1_000,
config=session_config,
)
# TODO: Do we also want to parameterize the dataset? or is it too much?
setting_kwargs.update(self.setting_kwargs)
setting = setting_type(
**setting_kwargs,
)
yield setting
def test_debug(self, method: MethodType, setting: Setting, config: Config):
"""Apply the Method onto a setting, and validate the results."""
results: Setting.Results = setting.apply(method, config=config)
self.validate_results(setting=setting, method=method, results=results)
@dataclass
class NewSetting(Setting):
pass
@dataclass
class NewMethod(Method, target_setting=NewSetting):
def fit(self, train_env, valid_env):
pass
def get_actions(self, observations, action_space):
return action_space.sample()
def test_passing_arg_to_class_constructor_works():
assert NewMethod.target_setting is NewSetting
assert NewMethod().target_setting is NewSetting
@pytest.mark.xfail(reason="Not sure this is necessary.")
def test_cant_change_target_setting():
with pytest.raises(AttributeError):
NewMethod.target_setting = NewSetting
with pytest.raises(AttributeError):
NewMethod().target_setting = NewSetting
def test_target_setting_is_inherited():
@dataclass
class NewMethod2(NewMethod):
pass
assert NewMethod2.target_setting is NewSetting
@dataclass
class SettingA(Setting):
pass
@dataclass
class SettingA1(SettingA):
pass
@dataclass
class SettingA2(SettingA):
pass
@dataclass
class SettingB(Setting):
pass
class MethodA(Method, target_setting=SettingA):
def fit(self, train_env, valid_env):
pass
def get_actions(self, observations, action_space):
return action_space.sample()
class MethodB(Method, target_setting=SettingB):
def fit(self, train_env, valid_env):
pass
def get_actions(self, observations, action_space):
return action_space.sample()
class CoolGeneralMethod(Method, target_setting=Setting):
def fit(self, train_env, valid_env):
pass
def get_actions(self, observations, action_space):
return action_space.sample()
def test_method_is_applicable_to_setting():
"""Test the mechanism for determining if a method is applicable for a given
setting.
Uses the mock hierarchy created above:
- Setting
- SettingA
- SettingA1
- SettingA2
- SettingB
- Method
- MethodA (target_setting: SettingA)
- MethodB (target_setting: SettingA)
TODO: if we ever end up registering the method classes when declaring them,
then we will need to check that this dummy test hierarchy doesn't actually
show up in the real setting options.
"""
# A Method designed for `SettingA` ISN'T applicable on the root node
# `Setting`:
assert not MethodA.is_applicable(Setting)
# A Method designed for `SettingA` IS applicable on the target node, and all
# nodes below it in the tree:
assert MethodA.is_applicable(SettingA)
assert MethodA.is_applicable(SettingA1)
assert MethodA.is_applicable(SettingA2)
# A Method designed for `SettingA` ISN'T applicable on some other branch in
# the tree:
assert not MethodA.is_applicable(SettingB)
# Same for Method designed for `SettingB`
assert MethodB.is_applicable(SettingB)
assert not MethodB.is_applicable(Setting)
assert not MethodB.is_applicable(SettingA)
assert not MethodB.is_applicable(SettingA1)
assert not MethodB.is_applicable(SettingA2)
def test_is_applicable_also_works_on_instances():
assert MethodA().is_applicable(SettingA)
assert MethodA.is_applicable(SettingA())
assert MethodA().is_applicable(SettingA())
assert not MethodA().is_applicable(SettingB)
assert not MethodA.is_applicable(SettingB())
assert not MethodA().is_applicable(SettingB())
================================================
FILE: sequoia/methods/models/__init__.py
================================================
# from .actor_critic_agent import ActorCritic
# from .agent import Agent
from .base_model import BaseModel, Model, available_encoders, available_optimizers
from .forward_pass import ForwardPass
from .output_heads import ClassificationHead, OutputHead, RegressionHead
================================================
FILE: sequoia/methods/models/base_model/__init__.py
================================================
""" This module defines the `BaseModel` used by the `BaseMethod`.
Output heads are available for both Supervised and Reinforcement Learning, and can be
found in `sequoia.methods.models.output_heads`.
Instead of defining the `Model` in one large file, it is instead split into a base
class (`Model`, defined in `model.py`) on top of which a few "mixins" are added, each
of which adds additional functionality:
- [SemiSupervisedModel](self_supervised_model.py):
Adds support for semi-supervised (partially labeled or un-labeled) training, by
splitting up partially labeled batches into a fully labeled sub-batch and a fully
unlabeled sub-batch.
- [MultiHeadModel](multihead_model.py):
Adds support for:
- multi-head prediction: Using a dedicated output head for each task when
task labels are available
- Mixed batches (data coming from more than one task within the same batch)
- TODO: Task inference: When task labels aren't available, perform
some task inference in order to choose which output head to use.
- [SelfSupervisedModel](self_supervised_model.py):
Adds methods for adding self-supervised losses to the model using different
Auxiliary Tasks.
The `BaseModel` is then formed by inheriting from each of these mixins.
"""
from .base_model import BaseModel
# TODO: Maybe the naming of these could be a bit better: Model seems more 'general' than BaseModel.
from .model import Model, available_encoders, available_optimizers
from .multihead_model import MultiHeadModel
from .self_supervised_model import SelfSupervisedModel
from .semi_supervised_model import SemiSupervisedModel
================================================
FILE: sequoia/methods/models/base_model/base_model.py
================================================
""" Example/Template of a Model to be used as part of a Method.
You can use this as a base class when creating your own models, or you can
start from scratch, whatever you like best.
"""
from dataclasses import dataclass
from typing import ClassVar, Dict, Generic, Optional, Tuple, Type, TypeVar
import numpy as np
import torch
from simple_parsing import choice, mutable_field
from torch import Tensor, nn, optim
from torch.optim.optimizer import Optimizer
from torchvision import models as tv_models
from sequoia.common.config import Config
from sequoia.common.hparams import categorical, log_uniform
from sequoia.methods.aux_tasks.auxiliary_task import AuxiliaryTask
from sequoia.methods.models.output_heads import OutputHead, PolicyHead
from sequoia.methods.models.simple_convnet import SimpleConvNet
from sequoia.settings import Environment, Observations, Rewards, Setting
from sequoia.settings.assumptions.incremental import IncrementalAssumption
from sequoia.utils.logging_utils import get_logger
from .model import ForwardPass
from .multihead_model import MultiHeadModel
from .self_supervised_model import SelfSupervisedModel
from .semi_supervised_model import SemiSupervisedModel
torch.autograd.set_detect_anomaly(True)
logger = get_logger(__name__)
SettingType = TypeVar("SettingType", bound=IncrementalAssumption)
class BaseModel(SemiSupervisedModel, MultiHeadModel, SelfSupervisedModel, Generic[SettingType]):
"""Base model LightningModule (nn.Module extended by pytorch-lightning)
This model splits the learning task into a representation-learning problem
and a downstream task (output head) applied on top of it.
The most important method to understand is the `get_loss` method, which
is used by the [train/val/test]_step methods which are called by
pytorch-lightning.
"""
@dataclass
class HParams(SemiSupervisedModel.HParams, SelfSupervisedModel.HParams, MultiHeadModel.HParams):
"""HParams of the Model."""
# NOTE: All the fields below were just copied from the BaseHParams class, just
# to improve visibility a bit.
# Class variables that hold the available optimizers and encoders.
# NOTE: These don't get parsed from the command-line.
available_optimizers: ClassVar[Dict[str, Type[Optimizer]]] = {
"sgd": optim.SGD,
"adam": optim.Adam,
"rmsprop": optim.RMSprop,
}
# Which optimizer to use.
optimizer: Type[Optimizer] = categorical(available_optimizers, default=optim.Adam)
available_encoders: ClassVar[Dict[str, Type[nn.Module]]] = {
"vgg16": tv_models.vgg16,
"resnet18": tv_models.resnet18,
"resnet34": tv_models.resnet34,
"resnet50": tv_models.resnet50,
"resnet101": tv_models.resnet101,
"resnet152": tv_models.resnet152,
"alexnet": tv_models.alexnet,
"densenet": tv_models.densenet161,
# TODO: Add the self-supervised pl modules here!
"simple_convnet": SimpleConvNet,
}
# Which encoder to use.
encoder: Type[nn.Module] = choice(
available_encoders,
default=SimpleConvNet,
# # TODO: Only considering these two for now when performing an HPO sweep.
# probabilities={"resnet18": 0., "simple_convnet": 1.0},
)
# Learning rate of the optimizer.
learning_rate: float = log_uniform(1e-6, 1e-2, default=1e-3)
# L2 regularization term for the model weights.
weight_decay: float = log_uniform(1e-12, 1e-3, default=1e-6)
# Batch size to use during training and evaluation.
batch_size: Optional[int] = None
# Number of hidden units (before the output head).
# When left to None (default), the hidden size from the pretrained
# encoder model will be used. When set to an integer value, an
# additional Linear layer will be placed between the outputs of the
# encoder in order to map from the encoder's output size H_e
# to this new hidden size `new_hidden_size`.
new_hidden_size: Optional[int] = None
# Retrain the encoder from scratch or start from pretrained weights.
train_from_scratch: bool = False
# Wether we should keep the weights of the encoder frozen.
freeze_pretrained_encoder_weights: bool = False
# Hyper-parameters of the output head.
output_head: OutputHead.HParams = mutable_field(OutputHead.HParams)
# Wether the output head should be detached from the representations.
# In other words, if the gradients from the downstream task should be
# allowed to affect the representations.
detach_output_head: bool = False
def __init__(self, setting: SettingType, hparams: HParams, config: Config):
super().__init__(setting=setting, hparams=hparams, config=config)
self.save_hyperparameters({"hparams": self.hp.to_dict(), "config": self.config.to_dict()})
logger.debug(f"setting of type {type(self.setting)}")
logger.debug(f"Observation space: {self.observation_space}")
logger.debug(f"Action/Output space: {self.action_space}")
logger.debug(f"Reward/Label space: {self.reward_space}")
if self.config.debug and self.config.verbose:
logger.debug("Config:")
logger.debug(self.config.dumps(indent="\t"))
logger.debug("Hparams:")
logger.debug(self.hp.dumps(indent="\t"))
for task_name, task in self.tasks.items():
logger.debug("Auxiliary tasks:")
assert isinstance(
task, AuxiliaryTask
), f"Task {task} should be a subclass of {AuxiliaryTask}."
if task.coefficient != 0:
logger.debug(f"\t {task_name}: {task.coefficient}")
logger.info(
f"Enabling the '{task_name}' auxiliary task (coefficient of "
f"{task.coefficient})"
)
task.enable()
from pytorch_lightning.loggers import WandbLogger
self.logger: WandbLogger
def on_fit_start(self):
super().on_fit_start()
# NOTE: We could use this to log stuff to wandb.
# NOTE: The Setting already logs itself in the `wandb.config` dict.
def forward(self, observations: Setting.Observations) -> ForwardPass: # type: ignore
"""Forward pass of the model.
For the given observations, creates a `ForwardPass`, a dict-like object which
will hold the observations, the representations and the output head predictions.
NOTE: Base implementation is in `model.py`.
Parameters
----------
observations : Setting.Observations
Observations from one of the environments of a Setting.
Returns
-------
ForwardPass
A dict-like object which holds the observations, representations, and output
head predictions (actions). See the `ForwardPass` class for more info.
"""
# The observations should come from a batched environment. If they are not, we
# add a batch dimension, which we will then remove.
assert isinstance(observations.x, (Tensor, np.ndarray))
# Check if the observations are batched or not.
not_batched = not self._are_batched(observations)
if not_batched:
observations = observations.with_batch_dimension()
forward_pass = super().forward(observations)
# Simplified this for now, but we could add more flexibility later.
assert isinstance(forward_pass, ForwardPass)
# If the original observations didn't have a batch dimension,
# Remove the batch dimension from the results.
if not_batched:
forward_pass = forward_pass.remove_batch_dimension()
return forward_pass
def create_output_head(self, task_id: Optional[int]) -> OutputHead:
"""Create an output head for the current action and reward spaces.
NOTE: This assumes that the input, action and reward spaces don't change
between tasks.
Parameters
----------
task_id : Optional[int]
ID of the task associated with this new output head. Can be `None`, which is
interpreted as saying that either that task labels aren't available, or that
this output head will be used for all tasks.
Returns
-------
OutputHead
The new output head for the given task.
"""
# NOTE: Actual implementation is in `model.py`. This is added here just for
# convenience when extending the baseline model.
return super().create_output_head(task_id=task_id)
def output_head_type(self, setting: SettingType) -> Type[OutputHead]:
"""Return the type of output head we should use in a given setting."""
# NOTE: Implementation is in `model.py`.
return super().output_head_type(setting)
@property
def automatic_optimization(self) -> bool:
return not isinstance(self.output_head, PolicyHead)
def training_step(
self,
batch: Tuple[Observations, Optional[Rewards]],
batch_idx: int,
environment: Environment = None,
dataloader_idx: int = None,
optimizer_idx: int = None,
) -> ForwardPass:
return super().training_step(
batch,
batch_idx=batch_idx,
environment=environment or self.setting.train_env,
dataloader_idx=dataloader_idx,
optimizer_idx=optimizer_idx,
)
def validation_step(
self,
batch: Tuple[Observations, Optional[Rewards]],
batch_idx: int,
environment: Environment = None,
dataloader_idx: int = None,
) -> ForwardPass:
return super().validation_step(
batch,
batch_idx=batch_idx,
environment=environment or self.setting.val_env,
dataloader_idx=dataloader_idx,
)
def test_step(
self,
batch: Tuple[Observations, Optional[Rewards]],
batch_idx: int,
environment: Environment = None,
dataloader_idx: int = None,
) -> ForwardPass:
return super().test_step(
batch,
batch_idx=batch_idx,
environment=environment or self.setting.test_env,
dataloader_idx=dataloader_idx,
)
def shared_step(
self,
batch: Tuple[Observations, Optional[Rewards]],
batch_idx: int,
environment: Environment,
phase: str,
dataloader_idx: int = None,
optimizer_idx: int = None,
) -> ForwardPass:
return super().shared_step(
batch,
batch_idx=batch_idx,
environment=environment,
phase=phase,
dataloader_idx=dataloader_idx,
optimizer_idx=optimizer_idx,
)
def on_task_switch(self, task_id: Optional[int]) -> None:
"""Called when switching between tasks.
Args:
task_id (int, optional): the id of the new task. When None, we are
basically being informed that there is a task boundary, but without
knowing what task we're switching to.
"""
return super().on_task_switch(task_id)
================================================
FILE: sequoia/methods/models/base_model/model.py
================================================
"""Base for the model used by the `BaseMethod`.
This model is basically just an encoder and an output head. Both of these can be
switched out/customized as needed.
"""
import dataclasses
from dataclasses import dataclass
from typing import Any, ClassVar, Dict, Generic, List, Optional, Tuple, Type, TypeVar, Union
import gym
import numpy as np
import torch
import torchvision.models as tv_models
from gym import Space, spaces
from gym.spaces.utils import flatdim
from pytorch_lightning import LightningModule
from simple_parsing import choice, mutable_field
from simple_parsing.helpers.hparams import HyperParameters
from simple_parsing.helpers.serialization import register_decoding_fn
from torch import Tensor, nn, optim
from torch.optim.optimizer import Optimizer # type: ignore
from sequoia.common.config import Config
from sequoia.common.gym_wrappers.convert_tensors import add_tensor_support
from sequoia.common.hparams import HyperParameters, categorical, log_uniform
from sequoia.common.loss import Loss
from sequoia.common.spaces import Image
from sequoia.methods.models.output_heads import OutputHead
from sequoia.settings.assumptions.incremental import IncrementalAssumption
from sequoia.settings.base import Environment
from sequoia.settings.base.setting import Actions, Observations, Rewards
from sequoia.settings.rl import ContinualRLSetting, RLSetting
from sequoia.settings.sl import SLSetting
from sequoia.utils.logging_utils import get_logger
from sequoia.utils.pretrained_utils import get_pretrained_encoder
from ..fcnet import FCNet
from ..forward_pass import ForwardPass
from ..output_heads import (
ActorCriticHead,
ClassificationHead,
OutputHead,
PolicyHead,
RegressionHead,
)
from ..output_heads.rl.episodic_a2c import EpisodicA2C
from ..simple_convnet import SimpleConvNet
logger = get_logger(__name__)
SettingType = TypeVar("SettingType", bound=IncrementalAssumption)
available_optimizers: Dict[str, Type[Optimizer]] = {
"sgd": optim.SGD,
"adam": optim.Adam,
"rmsprop": optim.RMSprop,
}
available_encoders: Dict[str, Type[nn.Module]] = {
"vgg16": tv_models.vgg16,
"resnet18": tv_models.resnet18,
"resnet34": tv_models.resnet34,
"resnet50": tv_models.resnet50,
"resnet101": tv_models.resnet101,
"resnet152": tv_models.resnet152,
"alexnet": tv_models.alexnet,
"densenet": tv_models.densenet161,
# TODO: Add the self-supervised pl modules here!
"simple_convnet": SimpleConvNet,
}
class Model(LightningModule, Generic[SettingType]):
"""Basic Model to be used by a Method.
Based on the `LightningModule` (nn.Module extended by pytorch-lightning).
This Model can be trained on either Supervised or Reinforcement Learning environments.
This model splits the learning task into a representation-learning problem
and a downstream task (output head) applied on top of it.
The most important method to understand is the `get_loss` method, which
is used by the [train/val/test]_step methods which are called by
pytorch-lightning.
"""
@dataclass
class HParams(HyperParameters):
"""HParams of the Model."""
# Class variable versions of the above dicts, for easier subclassing.
# NOTE: These don't get parsed from the command-line.
available_optimizers: ClassVar[Dict[str, Type[Optimizer]]] = available_optimizers.copy()
available_encoders: ClassVar[Dict[str, Type[nn.Module]]] = available_encoders.copy()
# Learning rate of the optimizer.
learning_rate: float = log_uniform(1e-6, 1e-2, default=1e-3)
# L2 regularization term for the model weights.
weight_decay: float = log_uniform(1e-12, 1e-3, default=1e-6)
# Which optimizer to use.
optimizer: Type[Optimizer] = categorical(available_optimizers, default=optim.Adam)
# Use an encoder architecture from the torchvision.models package.
encoder: Type[nn.Module] = categorical(
available_encoders,
default=tv_models.resnet18,
# TODO: Only using these two by default when performing a sweep.
probabilities={"resnet18": 0.5, "simple_convnet": 0.5},
)
# Batch size to use during training and evaluation.
batch_size: Optional[int] = None
# Number of hidden units (before the output head).
# When left to None (default), the hidden size from the pretrained
# encoder model will be used. When set to an integer value, an
# additional Linear layer will be placed between the outputs of the
# encoder in order to map from the pretrained encoder's output size H_e
# to this new hidden size `new_hidden_size`.
new_hidden_size: Optional[int] = None
# Retrain the encoder from scratch.
train_from_scratch: bool = False
# Wether we should keep the weights of the pretrained encoder frozen.
freeze_pretrained_encoder_weights: bool = False
# Settings for the output head.
# TODO: This could be overwritten in a subclass to do classification or
# regression or RL, etc.
output_head: OutputHead.HParams = mutable_field(OutputHead.HParams)
# Wether the output head should be detached from the representations.
# In other words, if the gradients from the downstream task should be
# allowed to affect the representations.
detach_output_head: bool = False
# Which algorithm to use for the output head when in an RL setting.
# TODO: Run the PolicyHead in the following conditions:
# - Compare the big backward pass vs many small ones
# - Try to have it learn from pixel input, if possible
# - Try to have it learn on a multi-task RL setting,
# TODO: Finish the ActorCritic and EpisodicA2C heads.
rl_output_head_algo: Type[OutputHead] = choice(
{
"reinforce": PolicyHead,
"a2c_online": ActorCriticHead,
"a2c_episodic": EpisodicA2C,
},
default=EpisodicA2C,
)
def __init__(self, setting: SettingType, hparams: HParams, config: Config):
super().__init__()
self.setting: SettingType = setting
self.hp: Model.HParams = hparams
self.Observations: Type[Observations] = setting.Observations
self.Actions: Type[Actions] = setting.Actions
self.Rewards: Type[Rewards] = setting.Rewards
# Choose what type of output head to use depending on the kind of
# Setting.
self.OutputHead: Type[OutputHead] = self.output_head_type(setting)
self.observation_space: gym.Space = setting.observation_space
self.action_space: gym.Space = setting.action_space
self.reward_space: gym.Space = setting.reward_space
self.input_shape = self.observation_space.x.shape
self.reward_shape = self.reward_space.shape
self.config: Config = config
# NOTE: do NOT set the `datamodule` property, otherwise the trainer will ignore
# the passed train/val/test dataloader from the Setting.
# self.datamodule: LightningDataModule = setting
# (Testing) Setting this attribute is supposed to help with ddp/etc
# training in pytorch-lightning. Not 100% sure.
# self.example_input_array = torch.rand(self.batch_size, *self.input_shape)
# Create the encoder and the output head.
# Space of our encoder representations.
self.representation_space: gym.Space
observing_state = not isinstance(setting.observation_space.x, Image)
if isinstance(setting, ContinualRLSetting) and observing_state:
# ISSUE # 62: Need to add a dense network instead of no encoder, and
# change the PolicyHead to have only one layer.
# Only pass the image, not the task labels to the encoder (for now).
input_dims = flatdim(self.observation_space["x"])
output_dims = self.hp.new_hidden_size or 128
self.encoder = FCNet(
in_features=input_dims,
out_features=output_dims,
hidden_layers=3,
hidden_neurons=[256, 128, output_dims],
activation=nn.ReLU,
)
self.representation_space = add_tensor_support(
spaces.Box(low=-np.inf, high=np.inf, shape=[output_dims])
)
self.hidden_size = output_dims
else:
self.encoder, self.hidden_size = self.make_encoder()
# TODO: Check that the outputs of the encoders are actually
# flattened. I'm not sure they all are, which case the samples
# wouldn't match with this space.
self.representation_space = spaces.Box(-np.inf, np.inf, (self.hidden_size,), np.float32)
logger.info(f"Moving encoder to device {self.config.device}")
self.encoder = self.encoder.to(self.config.device)
self.representation_space = add_tensor_support(self.representation_space)
# Upgrade the type of hparams for the output head based on the setting, if
# needed.
if not isinstance(self.hp.output_head, self.OutputHead.HParams):
self.hp.output_head = self.hp.output_head.upgrade(target_type=self.OutputHead.HParams)
# Then, create the 'default' output head.
self.output_head: OutputHead = self.create_output_head(task_id=0)
def make_encoder(self) -> Tuple[nn.Module, int]:
"""Creates an Encoder model and returns the number of output dimensions.
Returns:
Tuple[nn.Module, int]: the encoder and the hidden size.
TODO: Could instead return its output space, in case we didn't necessarily want
to flatten the representations (e.g. for image segmentation tasks).
"""
# Get the chosen type of encoder
encoder_type: Type[nn.Module] = self.hp.encoder
# This does a few things:
# 1. Instantiate the model (with pretrained weights if desired)
# 2. Infer the output size of the model
# 3. Remove the output fully-connected layer, if present.
encoder, hidden_size = get_pretrained_encoder(
encoder_model=encoder_type,
pretrained=not self.hp.train_from_scratch,
freeze_pretrained_weights=self.hp.freeze_pretrained_encoder_weights,
new_hidden_size=self.hp.new_hidden_size,
)
return encoder, hidden_size
def forward(self, observations: IncrementalAssumption.Observations) -> ForwardPass:
"""Forward pass of the Model.
Returns a ForwardPass object (acts like a dict of Tensors.)
"""
# If there's any additional 'input preprocessing' to do, do it here.
# NOTE (@lebrice): This is currently done this way so that we don't have
# to pass transforms to the settings from the method side.
observations = self.preprocess_observations(observations)
# Encode the observation to get representations.
assert observations.x.device == self.device
representations = self.encode(observations)
# Pass the observations and representations to the output head to get
# the 'action' (prediction).
if self.hp.detach_output_head:
representations = representations.detach()
actions = self.output_head(observations=observations, representations=representations)
# NOTE: Need to put a `rewards` field in this forward_pass, so we can pass it
# to the training_step_end method, which will calculate and aggregate the loss
forward_pass = ForwardPass(
observations=observations,
representations=representations,
actions=actions,
rewards=None,
)
return forward_pass
def encode(self, observations: Observations) -> Tensor:
"""Encodes a batch of samples `x` into a hidden vector.
Args:
observations (Union[Tensor, Observation]): Tensor of Observation
containing a batch of samples (before preprocess_observations).
Returns:
Tensor: The hidden vector / embedding for that sample, with size
[B, `self.hidden_size`].
"""
# Here in this base model the encoder only takes the 'x' from the
# observations.
x = torch.as_tensor(observations.x, device=self.device, dtype=self.dtype)
assert x.device == self.device
encoder_parameters = list(self.encoder.parameters())
encoder_device = encoder_parameters[0].device if encoder_parameters else self.device
# BUG: WHen using the EWCTask, there seems to be some issues related to which
# device the model is stored on.
if encoder_device != self.device:
x = x.to(encoder_device)
# self.encoder = self.encoder.to(self.device)
h_x = self.encoder(x)
if encoder_device != self.device:
h_x = h_x.to(self.device)
if isinstance(h_x, list) and len(h_x) == 1:
# Some pretrained encoders sometimes give back a list with one tensor. (?)
h_x = h_x[0]
if not isinstance(h_x, Tensor):
h_x = torch.as_tensor(h_x, device=self.device, dtype=self.dtype)
return h_x
def create_output_head(self, task_id: Optional[int]) -> OutputHead:
"""Create an output head for the current action and reward spaces.
NOTE: This assumes that the input, action and reward spaces don't change
between tasks.
Parameters
----------
task_id : Optional[int]
ID of the task associated with this new output head. Can be `None`, which is
interpreted as saying that either that task labels aren't available, or that
this output head will be used for all tasks.
Returns
-------
OutputHead
The new output head for the given task.
"""
# NOTE: This assumes that the input, action and reward spaces don't change
# between tasks.
# TODO: Maybe add something like `setting.get_action_space(task_id)`
input_space: Space = self.representation_space
action_space: Space = self.action_space
reward_space: Space = self.reward_space
hparams: OutputHead.HParams = self.hp.output_head
# NOTE: self.OutputHead is the type of output head used for the current setting.
# NOTE: Could also use a name for the output head using the task id, for example
output_head_name = None # Use the name defined on the output head.
output_head = self.OutputHead(
input_space=input_space,
action_space=action_space,
reward_space=reward_space,
hparams=hparams,
name=output_head_name,
).to(self.device)
# Do not add the output head's parameters to the optimizer of the whole model,
# if it already has an `optimizer` attribute of its own. (NOTE: this isn't the
# case in practice so far)
add_to_optimizer = not getattr(output_head, "optimizer", None)
if add_to_optimizer:
# Add the new parameters to the Optimizer, if it already exists.
# If we don't yet have a Trainer, the Optimizer hasn't been created
# yet. Once it is created though, it will get the parameters of this output
# head from `self.parameters()` is passed to its constructor, since the
# output head will be stored in `self.output_heads`.
if self.trainer:
optimizer: Optimizer = self.optimizers()
assert isinstance(optimizer, Optimizer)
optimizer.add_param_group({"params": output_head.parameters()})
return output_head
def output_head_type(self, setting: SettingType) -> Type[OutputHead]:
"""Return the type of output head we should use in a given setting."""
if isinstance(setting, RLSetting):
if not isinstance(setting.action_space, spaces.Discrete):
raise NotImplementedError("Only support discrete actions for now.")
assert issubclass(self.hp.rl_output_head_algo, OutputHead)
return self.hp.rl_output_head_algo
assert isinstance(setting, SLSetting)
if isinstance(setting.action_space, spaces.Discrete):
# Discrete actions: i.e. classification problem.
if isinstance(setting.reward_space, spaces.Discrete):
# Classification problem: Discrete action, Discrete rewards (labels).
return ClassificationHead
# Reinforcement learning problem: Discrete action, float rewards.
# TODO: There might be some RL environments with discrete
# rewards, right? For instance CartPole is, on-paper, a discrete
# reward setting, since its always 1.
if isinstance(setting.action_space, spaces.Box):
# Regression problem: For now there is only RL that has such a
# space.
return RegressionHead
raise NotImplementedError(f"Unsupported action space: {setting.action_space}")
def training_step(
self,
batch: Tuple[Observations, Optional[Rewards]],
batch_idx: int,
environment: Environment = None,
dataloader_idx: int = None,
optimizer_idx: int = None,
) -> ForwardPass:
return self.shared_step(
batch,
batch_idx=batch_idx,
environment=environment or self.setting.train_env,
phase="train",
dataloader_idx=dataloader_idx,
optimizer_idx=optimizer_idx,
)
def validation_step(
self,
batch: Tuple[Observations, Optional[Rewards]],
batch_idx: int,
environment: Environment = None,
dataloader_idx: int = None,
) -> ForwardPass:
return self.shared_step(
batch,
batch_idx=batch_idx,
environment=environment or self.setting.val_env,
phase="val",
dataloader_idx=dataloader_idx,
)
def test_step(
self,
batch: Tuple[Observations, Optional[Rewards]],
batch_idx: int,
environment: Environment = None,
dataloader_idx: int = None,
) -> ForwardPass:
return self.shared_step(
batch,
batch_idx=batch_idx,
environment=environment or self.setting.test_env,
phase="test",
dataloader_idx=dataloader_idx,
)
def shared_step(
self,
batch: Tuple[Observations, Optional[Rewards]],
batch_idx: int,
environment: Environment,
phase: str,
dataloader_idx: int = None,
optimizer_idx: int = None,
) -> ForwardPass:
"""Main logic of the "forward pass".
This is used as part of `training_step`, `validation_step` and `test_step`.
See the PL docs for `training_step` for more info.
NOTE: The prediction / environment interaction / loss calculation has been
moved into the `shared_step_end` method for DP to also work.
"""
# Split the batch into observations and (maybe) rewards.
observations: Observations
rewards: Optional[Rewards]
if isinstance(batch, tuple) and len(batch) == 2:
observations, rewards = batch
else:
assert isinstance(batch, self.Observations), batch
observations, rewards = batch, None
# Get the forward pass results, containing:
# - "observation": the augmented/transformed/processed observation.
# - "representations": the representations for the observations.
# - "actions": The actions (predictions)
forward_pass: ForwardPass = self(observations)
if rewards is not None:
forward_pass = dataclasses.replace(forward_pass, rewards=rewards)
return forward_pass
def training_step_end(self, step_outputs: Union[Loss, List[Loss]]) -> Loss:
loss_object: Loss = self.shared_step_end(
step_outputs=step_outputs, phase="train", environment=self.setting.train_env
)
loss = loss_object.loss
if not isinstance(loss, Tensor) or not loss.requires_grad:
# NOTE: There might be no loss at some steps, because for instance
# we haven't reached the end of an episode in an RL setting.
return None
# NOTE In RL, we can only update the model's weights on steps where the output
# head has as loss, because the output head has buffers of tensors whose grads
# would become invalidated if we performed the optimizer step.
if loss.requires_grad and not self.automatic_optimization:
output_head_loss = loss_object.losses.get(self.output_head.name)
update_model = output_head_loss is not None and output_head_loss.requires_grad
optimizer = self.optimizers()
self.manual_backward(loss, optimizer, retain_graph=not update_model)
if update_model:
optimizer.step()
optimizer.zero_grad()
# BUG: Need to return this dict, otherwise the optimizer closure in the DP
# accelerator fails (it only expects to get `dict` or `Tensor` values for
# `training_step_output` in `_process_training_step_output`)
# return loss
# NOTE: the 'hidden' key isn't currently used, but it could be in the future if
# we added support for BBPT, i.e. recurrent policies or output heads, etc.
return {"loss": loss, "hidden": loss_object.tensors.get("hidden")}
def validation_step_end(self, step_outputs: Union[ForwardPass, List[ForwardPass]]) -> Loss:
return self.shared_step_end(
step_outputs=step_outputs, phase="val", environment=self.setting.val_env
)
def test_step_end(self, step_outputs: Union[ForwardPass, List[ForwardPass]]) -> Loss:
return self.shared_step_end(
step_outputs=step_outputs, phase="test", environment=self.setting.test_env
)
def shared_step_end(
self,
step_outputs: Union[ForwardPass, List[ForwardPass]],
phase: str,
environment: Environment,
) -> Loss:
"""Called with the outputs of each replica's `[train/validation/test]_step`:
- Sends the Actions from each worker to the environment to obtain rewards, if
necessary;
- Calculates the loss, given the merged forward pass and the rewards/labels;
- Aggregates the losses/metrics from each replica, logs the relevant values, and
returns the aggregated losses and metrics (a single Loss object).
"""
forward_pass: ForwardPass
if isinstance(step_outputs, list):
forward_pass = ForwardPass.concatenate(step_outputs)
else:
forward_pass = step_outputs
# get the actions from the forward pass:
actions = forward_pass.actions
rewards: Optional[Rewards] = forward_pass.rewards
if rewards is None:
# Get the reward from the environment (the dataloader).
if self.config.debug and self.config.render:
environment.render("human")
# import matplotlib.pyplot as plt
# plt.waitforbuttonpress(10)
assert isinstance(actions, Actions), actions
rewards = environment.send(actions)
assert rewards is not None
# BUG: Rewards is array of [None]s in TraditionalSL and MultiTask SL!
assert isinstance(rewards, Rewards), rewards
# Now that we have the rewards, we calculate the loss.
loss: Loss = self.get_loss(forward_pass, rewards, loss_name=phase)
loss_tensor: Tensor = loss.loss
if loss_tensor == 0.0:
return loss
loss_pbar_dict = loss.to_pbar_message()
for key, value in loss_pbar_dict.items():
assert not isinstance(value, dict), "shouldn't be nested at this point!"
self.log(key, value, prog_bar=self.config.debug, logger=False)
logger.debug(f"{key}: {value}")
loss_log_dict = loss.to_log_dict(verbose=self.config.verbose)
for key, value in loss_log_dict.items():
assert not isinstance(value, dict), "shouldn't be nested at this point!"
self.log(key, value, prog_bar=False, logger=True)
return loss
def split_batch(self, batch: Any) -> Tuple[Observations, Optional[Rewards]]:
"""Splits the batch into the observations and the rewards.
Uses the types defined on the setting that this model is being applied
on (which were copied to `self.Observations` and `self.Actions`) to
figure out how many fields each type requires.
TODO: This is slightly confusing, should probably get rid of this.
"""
observations: Observations
rewards: Optional[Rewards]
if isinstance(batch, self.Observations):
observations, rewards = batch, None
else:
assert isinstance(batch, (tuple, list)) and len(batch) == 2
observations, rewards = batch
assert isinstance(observations, self.Observations), (
observations,
type(observations),
self.Observations,
)
# Move the observations to the right device, and convert numpy arrays to
# tensors.
observations = observations.torch(device=self.device)
if rewards is not None:
rewards = rewards.torch(device=self.device)
return observations, rewards
def get_loss(
self, forward_pass: ForwardPass, rewards: Rewards = None, loss_name: str = ""
) -> Loss:
"""Gets a Loss given the results of the forward pass and the reward.
Args:
forward_pass (Dict[str, Tensor]): Results of the forward pass.
reward (Tensor, optional): The reward that resulted from the action
chosen in the forward pass. Defaults to None.
loss_name (str, optional): The name for the resulting Loss.
Defaults to "".
Returns:
Loss: a Loss object containing the loss tensor, associated metrics
and sublosses.
This could look a bit like this, for example:
```
action = forward_pass["action"]
predicted_reward = forward_pass["predicted_reward"]
nce = self.loss_fn(predicted_reward, reward)
loss = Loss(loss_name, loss=nce)
return loss
```
"""
assert loss_name
# Create an 'empty' Loss object with the given name, so that we always
# return a Loss object, even when `y` is None and we can't the loss from
# the output_head.
total_loss = Loss(name=loss_name)
if rewards:
assert rewards.y is not None
# TODO: If we decide to re-organize the forward pass object to also
# contain the predictions of the self-supervised tasks, (atm they
# perform their 'forward pass' in their get_loss functions)
# then we could change 'actions' to be a dict, and index the
# dict with the 'name' of each output head, like so:
# actions_of_head = forward_pass.actions[self.output_head.name]
# rewards_of_head = forward_pass.rewards[self.output_head.name]
# For now though, we only have one "prediction" in the actions:
actions = forward_pass.actions
# So far we only use 'y' from the rewards in the output head.
supervised_loss = self.output_head_loss(forward_pass, actions=actions, rewards=rewards)
total_loss += supervised_loss
return total_loss
def output_head_loss(
self, forward_pass: ForwardPass, actions: Actions, rewards: Rewards
) -> Loss:
"""Gets the Loss of the output head."""
# TODO: The rewards can still contain just numpy arrays, keeping it so for now.
assert actions.device == self.device # == rewards.device (would be None)
return self.output_head.get_loss(
forward_pass,
actions=actions,
rewards=rewards,
)
def preprocess_observations(self, observations: Observations) -> Observations:
assert isinstance(observations, self.Observations)
# TODO: Make sure this also works in the supervised setting.
# Convert all numpy arrays to tensors if possible.
# TODO: Make sure this still works in settings without task labels (
# None in numpy arrays)
observations = observations.torch(device=self.device)
return observations
def preprocess_rewards(self, reward: Rewards) -> Rewards:
return reward
def configure_optimizers(self):
optimizer_class: Type[Optimzier] = self.hp.optimizer
options = {
"lr": self.hp.learning_rate,
"weight_decay": self.hp.weight_decay,
}
return optimizer_class(
self.parameters(),
lr=self.hp.learning_rate,
weight_decay=self.hp.weight_decay,
)
@property
def batch_size(self) -> int:
return self.hp.batch_size
@batch_size.setter
def batch_size(self, value: int) -> None:
self.hp.batch_size = value
@property
def learning_rate(self) -> float:
return self.hp.learning_rate
@learning_rate.setter
def learning_rate(self, value: float) -> None:
self.hp.learning_rate = value
def on_task_switch(self, task_id: Optional[int]) -> None:
"""Called when switching between tasks.
Args:
task_id (Optional[int]): the Id of the task.
"""
def shared_modules(self) -> Dict[str, nn.Module]:
"""Returns any trainable modules in `self` that are shared across tasks.
By giving this information, these weights can then be used in
regularization-based auxiliary tasks like EWC, for example.
Returns
-------
Dict[str, nn.Module]:
Dictionary mapping from name to the shared modules, if any.
"""
shared_modules: Dict[str, nn.Module] = nn.ModuleDict()
if self.encoder:
shared_modules["encoder"] = self.encoder
if self.output_head:
shared_modules["output_head"] = self.output_head
return shared_modules
# def summarize(self, mode: str = ModelSummary.MODE_DEFAULT) -> ModelSummary:
# model_summary = ModelSummary(self, mode=mode)
# log.debug("\n" + str(model_summary))
# return model_summary
def _are_batched(self, observations: IncrementalAssumption.Observations) -> bool:
"""Returns wether these observations are batched."""
assert isinstance(self.observation_space, spaces.Dict)
# if observations.task_labels is not None:
# if isinstance(observations.task_labels, int):
# return True
# assert isinstance(observations.task_labels, (np.ndarray, Tensor))
# assert False, observations.shapes
# return observations.task_labels.shape and observations.task_labels.shape[0]
x_space: spaces.Box = self.observation_space["x"]
if isinstance(x_space, Image) or len(x_space.shape) == 4:
return observations.x.ndim == 4
if not isinstance(x_space, spaces.Box):
raise NotImplementedError(
f"Don't know how to tell if obs space {x_space} is batched, only "
f"support Box spaces for the observation's 'x' for now."
)
# self.observation_space *should* usually reflect the shapes of individual
# (non-batched) observations.
return observations.x.ndim == len(x_space.shape) + 1
# Registering this handler for decoding the type of output head to use (a field in the
# hparams) from a dictionary.
register_decoding_fn(Type[OutputHead], lambda v: v)
================================================
FILE: sequoia/methods/models/base_model/multihead_model.py
================================================
from dataclasses import dataclass, replace
from typing import Dict, List, Optional, Sequence, Tuple, TypeVar, Union
import numpy as np
import torch
import torch.nn.functional as F
from torch import Tensor, nn
from sequoia.common import Batch, Config, Loss
from sequoia.settings import Actions, Environment, Observations, Rewards
from sequoia.settings.assumptions.incremental import IncrementalAssumption
from sequoia.utils.generic_functions import concatenate, get_slice, stack
from sequoia.utils.logging_utils import get_logger
from ..forward_pass import ForwardPass
from ..output_heads import OutputHead
from .model import Model, SettingType
logger = get_logger(__name__)
class MultiHeadModel(Model[SettingType]):
"""Mixin that adds multi-head prediction to the Model when task labels are
available.
"""
@dataclass
class HParams(Model.HParams):
"""Hyperparameters specific to a multi-head model."""
# Wether to create one output head per task.
multihead: Optional[bool] = None
def __init__(self, setting: SettingType, hparams: HParams, config: Config):
super().__init__(setting=setting, hparams=hparams, config=config)
# Dictionary of output heads!
self.output_heads: Dict[str, OutputHead] = nn.ModuleDict()
self.hp: MultiHeadModel.HParams
self.setting: SettingType
# TODO: Add an optional task inference mechanism
# See https://github.com/lebrice/Sequoia/issues/49
self.task_inference_module: Optional[nn.Module] = None
self.previous_task: Optional[int] = None
self.current_task: Optional[int] = None
self.previous_task_labels: Optional[Sequence[int]] = None
if setting.task_labels_at_train_time:
# NOTE: Not sure if this could cause an issue when setting is a SettingProxy
starting_task_id = 0 # setting.current_task_id
else:
starting_task_id = None
self.output_heads[str(starting_task_id)] = self.output_head
def output_head_loss(
self, forward_pass: ForwardPass, actions: Actions, rewards: Rewards
) -> Loss:
"""TODO: Need to then re-split stuff (undo the work we did in forward) to get a
loss per output head?
"""
# Asks each output head for its contribution to the loss.
observations: IncrementalAssumption.Observations = forward_pass.observations
task_labels = observations.task_labels
if isinstance(task_labels, Tensor):
task_labels = task_labels.cpu().numpy()
batch_size = forward_pass.batch_size
assert batch_size is not None
if task_labels is None:
if self.task_inference_module:
# TODO: Predict the task ids using some kind of task
# inference mechanism.
task_labels = self.task_inference_module(forward_pass)
else:
raise NotImplementedError(
"Multihead model doesn't have access to task labels and "
"doesn't have a task inference module!"
)
# TODO: Maybe use the last trained output head, by default?
# TODO: Check if this is still necessary
if self.previous_task_labels is None:
self.previous_task_labels = task_labels
# Default behaviour: use the (only) output head.
if not self.hp.multihead:
return self.output_head.get_loss(
forward_pass,
actions=actions,
rewards=rewards,
)
# The sum of all the losses from all the output heads.
total_loss = Loss(self.output_head.name)
task_switched_in_env = task_labels != self.previous_task_labels
# This `done` attribute isn't added in supervised settings.
episode_ended = getattr(observations, "done", np.zeros(batch_size, dtype=bool))
# TODO: Remove all this useless conversion from Tensors to ndarrays
if isinstance(episode_ended, Tensor):
episode_ended = episode_ended.cpu().numpy()
# logger.debug(f"Task labels: {task_labels}, task switched in env: {task_switched_in_env}, episode ended: {episode_ended}")
done_set_to_false_temporarily_indices = []
if any(episode_ended & task_switched_in_env):
# In the environments where there was a task switch to a different task and
# where some episodes ended, we need to first get the corresponding output
# head losses from these environments first.
if self.batch_size in {None, 1}:
# If the batch size is 1, this is a little bit simpler to deal with.
previous_task: int = self.previous_task_labels[0].item()
from sequoia.methods.models.output_heads.rl import PolicyHead
previous_output_head = self.output_heads[str(previous_task)]
assert isinstance(
previous_output_head, PolicyHead
), "todo: assuming that this only happends in RL currently."
# We want the loss from that output head, but we don't want to
# re-compute it below!
env_index_in_previous_batch = 0
# breakpoint()
logger.debug(
f"Getting a loss from the output head for task {previous_task}, that was used for the last task."
)
env_episode_loss = previous_output_head.get_episode_loss(
env_index_in_previous_batch, done=True
)
# logger.debug(f"Loss from that output head: {env_episode_loss}")
# Add this end-of-episode loss to the total loss.
# breakpoint()
# BUG: This can sometimes (rarely) be None! Need to better understand
# why this is happening.
if env_episode_loss is None:
logger.warning(
RuntimeWarning(
f"BUG: Env {env_index_in_previous_batch} gave back a loss "
f"of `None`, when we expected a loss from that output head "
f"for task id {previous_task}."
)
)
else:
total_loss += env_episode_loss
# We call on_episode_end so the output head can clear the relevant
# buffers. Note that get_episode_loss(env_index, done=True) doesn't
# clear the buffers, it just calculates a loss.
previous_output_head.on_episode_end(env_index_in_previous_batch)
# Set `done` to `False` for that env, to prevent the output head for the
# new task from seeing the first observation in the episode as the last.
observations.done[env_index_in_previous_batch] = False
# FIXME: If we modify that entry in-place, then even after this method
# returns, the change will persist.. Therefore we just save the indices
# that we altered, and reset them before returning.
done_set_to_false_temporarily_indices.append(env_index_in_previous_batch)
else:
raise NotImplementedError(
"TODO: The BaseModel doesn't yet support having multiple "
"different tasks within the same batch in RL. "
)
# IDEA: Need to somehow pass the indices of which env to take care of to
# each output head, so they can create / clear buffers only when needed.
assert task_labels is not None
all_task_indices: Dict[int, Tensor] = get_task_indices(task_labels)
# Get the loss from each output head:
if len(all_task_indices) == 1:
# If everything is in the same task (only one key), no need to split/merge
# stuff, so it's a bit easier:
task_id: int = task_labels[0].item()
self.setup_for_task(task_id)
# task_output_head = self.output_heads[str(task_id)]
total_loss += super().output_head_loss(forward_pass, actions=actions, rewards=rewards)
# total_loss += self.output_head.get_loss(
# forward_pass, actions=actions, rewards=rewards,
# )
else:
# Split off the input batch, do a forward pass for each sub-task.
# (could be done in parallel but whatever.)
# TODO: Also, not sure if this will play well with DP, DDP, etc.
for task_id, task_indices in all_task_indices.items():
# Make a partial observation without the task labels, so that
# super().forward will use the current output head.
logger.debug(
f"Getting output head loss for "
f"{len(task_indices)/batch_size:.0%} of the batch which "
f"has task_id of '{task_id}'."
)
self.setup_for_task(task_id)
task_loss = super().output_head_loss(
forward_pass=get_slice(forward_pass, task_indices),
actions=get_slice(actions, task_indices),
rewards=get_slice(rewards, task_indices),
)
# NOTE: useful for debugging, but shouldn't be enabled normally.
# task_loss.name += f"(task {task_id})"
logger.debug(f"Task {task_id} loss: {task_loss}")
total_loss += task_loss
self.previous_task_labels = task_labels
# FIXME: Reset the 'done' to True, if we manually set it to False.
for index in done_set_to_false_temporarily_indices:
observations.done[index] = True
return total_loss
def on_before_zero_grad(self, optimizer):
super().on_before_zero_grad(optimizer)
from sequoia.methods.models.output_heads.rl import PolicyHead
for task_id_string, output_head in self.output_heads.items():
if isinstance(output_head, PolicyHead):
output_head.detach_all_buffers()
def shared_step(
self,
batch: Tuple[Observations, Optional[Rewards]],
batch_idx: int,
environment: Environment,
phase: str,
dataloader_idx: int = None,
optimizer_idx: int = None,
) -> Dict:
assert phase
if dataloader_idx is not None:
logger.debug(
"TODO: We were indirectly given a task id with the "
"dataloader_idx. Ignoring for now, as we're trying to avoid "
"this (the task labels should be given for each example "
"anyway). "
)
dataloader_idx = None
return super().shared_step(
batch=batch,
batch_idx=batch_idx,
environment=environment,
phase=phase,
dataloader_idx=dataloader_idx,
optimizer_idx=optimizer_idx,
)
def on_task_switch(self, task_id: Optional[int]):
"""Called when switching between tasks.
Args:
task_id (int, optional): the id of the new task. When None, we are
basically being informed that there is a task boundary, but without
knowing what task we're switching to.
NOTE: You can check wether this task switch is occuring at train or test time
using `self.training`.
"""
logger.info(f"Switching from task {self.current_task} -> {task_id}.")
# TODO: Move these to the base model perhaps? (In case there is ever a
# re-ordering of the mixins that make up the BaseModel)
super().on_task_switch(task_id)
self.previous_task = self.current_task
self.current_task = task_id
if task_id is not None and self.hp.multihead:
# Switch the output head to use.
self.output_head = self.get_or_create_output_head(task_id)
def shared_modules(self) -> Dict[str, nn.Module]:
"""Returns any trainable modules in `self` that are shared across tasks.
By giving this information, these weights can then be used in
regularization-based auxiliary tasks like EWC, for example.
This dict contains the encoder and output head, by default, as well as any
shared modules in the auxiliary tasks.
When using only multiple output heads (i.e. when `self.hp.multihead` is `True`),
then we remove the output head from the dict before returning it.
Returns
-------
Dict[str, nn.Module]:
Dictionary mapping from name to the shared modules, if any.
"""
shared_modules = super().shared_modules()
if self.hp.multihead:
shared_modules.pop("output_head")
return shared_modules
def load_state_dict(
self,
state_dict: Union[Dict[str, Tensor], Dict[str, Tensor]],
strict: bool = True,
):
if self.hp.multihead:
# TODO: Figure out exactly where/when/how pytorch-lightning is
# trying to load the model from, because there are some keys
# missing (['output_heads.1.output.weight', 'output_heads.1.output.bias'])
# For now, we're just gonna pretend it's not a problem, I guess?
strict = False
missing_keys, unexpected_keys = super().load_state_dict(state_dict=state_dict, strict=False)
# TODO: Double-check that this makes sense and works properly.
if self.hp.multihead and unexpected_keys:
for i in range(self.setting.nb_tasks):
# Try to load the output head weights
logger.info(f"Creating a new output head for task {i}")
new_output_head = self.create_output_head(self.setting, task_id=i)
# FIXME: TODO: This is wrong. We should create all the
# output heads if they aren't already created, and then try to
# load the state_dict again.
new_output_head.load_state_dict(
{k: state_dict[k] for k in unexpected_keys},
strict=False,
)
key = str(i)
self.output_heads[key] = new_output_head.to(self.device)
if missing_keys or unexpected_keys:
logger.debug(f"Missing keys: {missing_keys}, unexpected keys: {unexpected_keys}")
return missing_keys, unexpected_keys
def get_or_create_output_head(self, task_id: int) -> nn.Module:
"""Retrieves or creates a new output head for the given task index.
Also stores it in the `output_heads`, and adds its parameters to the
optimizer.
"""
task_output_head: nn.Module
assert self.hp.multihead, "This should get called when model isnt multi-headed!"
if str(task_id) in self.output_heads.keys():
task_output_head = self.output_heads[str(task_id)]
else:
logger.info(f"Creating a new output head for task {task_id}.")
# NOTE: This also takes care to add the output head's parameters to the
# optimizer.
task_output_head = self.create_output_head(task_id=task_id)
self.output_heads[str(task_id)] = task_output_head
return task_output_head
def forward(self, observations: IncrementalAssumption.Observations) -> ForwardPass:
"""Smart forward pass with multi-head predictions and task inference.
This forward pass can handle three different scenarios, depending on the
contents of `observations.task_labels`:
1. Base case: task labels are present, and all examples are from the same task.
- Perform the 'usual' forward pass (e.g. `super().forward(observations)`).
2. Task labels are present, and the batch contains a mix of samples from
different tasks:
- Create slices of the batch for each task, where all items in each
'sub-batch' come from the same task.
- Perform a forward pass for each task, by calling `forward` recursively
with the sub-batch for each task as an argument (Case 1).
3. Task labels are *not* present. Perform some type of task inference, using
the `task_inference_forward_pass` method. Check its docstring for more info.
Parameters
----------
observations : Observations
Observations from an environment. As of right now, all Settings produce
observations with (at least) the two following attributes:
- x: Tensor (the images/inputs)
- task_labels: Optional[Tensor] (The task labels, when available, else None)
Returns
-------
Tensor
The outputs, which in this case are the classification logits.
All three cases above produce the same kind of outputs.
"""
# TODO: Shouldn't have to do this here, since we have the @auto_move_data dec...
# observations = observations.to(self.device)
task_ids: Optional[Tensor] = observations.task_labels
if isinstance(task_ids, np.ndarray) and task_ids.dtype == np.object:
task_ids = task_ids.tolist()
if len(task_ids) == 1:
task_ids = task_ids[0]
if task_ids is None:
# Run the forward pass with task inference turned on.
return self.task_inference_forward_pass(observations)
task_ids = torch.as_tensor(task_ids, device=self.device, dtype=int)
task_ids_present_in_batch = torch.unique(task_ids)
if len(task_ids_present_in_batch) > 1:
# Case 2: The batch contains data from more than one task.
return self.split_forward_pass(observations)
# Base case: "Normal" forward pass, where all items come from the same task.
# - Setup the model for this task, however you want, and then do a forward pass,
# as you normally would.
# NOTE: If you want to reuse this cool multi-headed forward pass in your
# own model, these lines here are what you'd want to change.
task_id: int = task_ids_present_in_batch.item()
if task_id != self.current_task and self.hp.multihead:
# Setup the model for this task. For now we just switch the output head.
self.output_head = self.get_or_create_output_head(task_id)
return super().forward(observations)
def setup_for_task(self, task_id: int) -> None:
if task_id is not None and self.hp.multihead:
# Setup the model for this task. For now we just switch the output head.
self.output_head = self.get_or_create_output_head(task_id)
def split_forward_pass(self, observations: Observations) -> ForwardPass:
"""Perform a forward pass for a batch of observations from different tasks.
This is called in `forward` when there is more than one unique task label in the
batch.
This will call `forward` for each task id present in the batch, passing it a
slice of the batch, in which all items are from that task.
NOTE: This cannot cause recursion problems, because `forward`(d=2) will be
called with a bach of items, all of which come from the same task. This makes it
so `split_forward_pass` cannot then be called again.
Parameters
----------
observations : Observations
Observations, in which the task labels might not all be the same.
Returns
-------
Tensor
The outputs/logits from each task, re-assembled into a single batch, with
the task ordering from `observations` preserved.
"""
assert observations.task_labels is not None
assert self.hp.multihead, "Can only use split forward pass with multiple heads."
# We have task labels.
task_labels = observations.task_labels
if isinstance(task_labels, Tensor):
task_labels = task_labels.cpu().numpy()
# Get the indices of the items from each task.
all_task_indices_dict: Dict[int, np.ndarray] = get_task_indices(task_labels)
if len(all_task_indices_dict) == 1:
# No need to split the input, since everything is from the same task.
task_id: int = task_labels[0].item()
self.setup_for_task(task_id)
return self.forward(observations)
# Placeholder for the predicitons for each item in the batch.
# NOTE: We put each item in the batch in this list and then stack the results.
batch_size = len(task_labels)
task_outputs: List[Batch] = [None for _ in range(batch_size)]
for task_id, task_indices in all_task_indices_dict.items():
# Take a slice of the observations, in which all items come from this task.
task_observations = get_slice(observations, task_indices)
# Perform a "normal" forward pass (Base case).
task_output = self.forward(task_observations)
# Store the outputs for the items from this task in the list.
for i, index in enumerate(task_indices):
task_outputs[index] = get_slice(task_output, i)
# Stack the results.
assert all(item is not None for item in task_outputs)
merged_outputs = concatenate(task_outputs)
return merged_outputs
def task_inference_forward_pass(self, observations: Observations) -> Tensor:
"""Forward pass with a simple form of task inference."""
# We don't have access to task labels (`task_labels` is None).
# --> Perform a simple kind of task inference:
# 1. Perform a forward pass with each task's output head;
# 2. Merge these predictions into a single prediction somehow.
assert observations.task_labels is None or all(observations.task_labels == None)
# NOTE: This assumes that the observations are batched.
# These are used below to indicate the shape of the different tensors.
B = observations.x.shape[0]
T = n_known_tasks = len(self.output_heads)
N = self.action_space.n
# Tasks encountered previously and for which we have an output head.
known_task_ids: list[int] = list(range(n_known_tasks))
assert known_task_ids
# Placeholder for the predictions from each output head for each item in the
# batch
task_outputs = [None for _ in known_task_ids] # [T, B, N]
# Get the forward pass for each task.
for task_id in known_task_ids:
# Create 'fake' Observations for this forward pass, with 'fake' task labels.
# NOTE: We do this so we can call `self.forward` and not get an infinite
# recursion.
task_labels = torch.full([B], task_id, device=self.device, dtype=int)
task_observations = replace(observations, task_labels=task_labels)
# Setup the model for task `task_id`, and then do a forward pass.
task_forward_pass = self.forward(task_observations)
task_outputs[task_id] = task_forward_pass
# 'Merge' the predictions from each output head using some kind of task
# inference.
assert all(item is not None for item in task_outputs)
# Stack the predictions (logits) from each output head.
stacked_forward_pass: ForwardPass = stack(task_outputs, dim=1)
logits_from_each_head = stacked_forward_pass.actions.logits
assert logits_from_each_head.shape == (B, T, N), (logits_from_each_head.shape, (B, T, N))
# Normalize the logits from each output head with softmax.
# Example with batch size of 1, output heads = 2, and classes = 4:
# logits from each head: [[[123, 456, 123, 123], [1, 1, 2, 1]]]
# 'probs' from each head: [[[0.1, 0.6, 0.1, 0.1], [0.2, 0.2, 0.4, 0.2]]]
probs_from_each_head = torch.softmax(logits_from_each_head, dim=-1)
assert probs_from_each_head.shape == (B, T, N)
# Simple kind of task inference:
# For each item in the batch, use the class that has the highest probability
# accross all output heads.
max_probs_across_heads, chosen_head_per_class = probs_from_each_head.max(dim=1)
assert max_probs_across_heads.shape == (B, N)
assert chosen_head_per_class.shape == (B, N)
# Example (continued):
# max probs across heads: [[0.2, 0.6, 0.4, 0.2]]
# chosen output heads per class: [[1, 0, 1, 1]]
# Determine which output head has highest "confidence":
max_prob_value, most_probable_class = max_probs_across_heads.max(dim=1)
assert max_prob_value.shape == (B,)
assert most_probable_class.shape == (B,)
# Example (continued):
# max_prob_value: [0.6]
# max_prob_class: [1]
# A bit of boolean trickery to get what we need, which is, for each item, the
# index of the output head that gave the most confident prediction.
mask = F.one_hot(most_probable_class, N).to(dtype=bool, device=self.device)
chosen_output_head_per_item = chosen_head_per_class[mask]
assert mask.shape == (B, N)
assert chosen_output_head_per_item.shape == (B,)
# Example (continued):
# mask: [[False, True, False, True]]
# chosen_output_head_per_item: [0]
# Create a bool tensor to select items associated with the chosen output head.
selected_mask = F.one_hot(chosen_output_head_per_item, T).to(dtype=bool, device=self.device)
assert selected_mask.shape == (B, T)
# Select the logits using the mask:
selected_forward_pass = stacked_forward_pass[selected_mask]
assert selected_forward_pass.actions.logits.shape == (B, N)
return selected_forward_pass
from typing import Dict, Tuple, TypeVar
Dataclass = TypeVar("Dataclass", bound=Batch)
def get_task_indices(
task_labels: Union[List[Optional[int]], np.ndarray, Tensor]
) -> Dict[Optional[int], Union[np.ndarray, Tensor]]:
"""Given an array-like of task labels, gives back a dictionary mapping from task id
to an array-like of indices for the corresponding indices in the batch.
Parameters
----------
task_labels : Union[np.ndarray, Tensor]
[description]
Returns
-------
Dict[Optional[int], Union[np.ndarray, Tensor]]
Dictionary mapping from task index (int or None) to an ndarray or Tensor
(depending on the type of `task_labels`) of indices corresponding to the indices
in `task_labels` that correspond to that task.
"""
all_task_indices: Dict[Optional[int], Union[np.ndarray, Tensor]] = {}
if task_labels is None:
return {}
output_type = np.asarray
assert isinstance(task_labels, (np.ndarray, Tensor))
if isinstance(task_labels, Tensor):
assert task_labels.ndim == 1 or task_labels.size() == 1, task_labels
task_labels = task_labels.reshape(-1)
else:
assert task_labels.ndim == 1 or task_labels.size == 1, task_labels
task_labels = task_labels.reshape(-1)
unique_task_labels = list(set(task_labels.tolist()))
batch_size = len(task_labels)
# Get the indices for each task.
for task_id in unique_task_labels:
if isinstance(task_labels, np.ndarray):
task_indices = np.arange(batch_size)[task_labels == task_id]
else:
assert isinstance(task_labels, Tensor), task_labels
task_indices = torch.arange(batch_size, device=task_labels.device)[
task_labels == task_id
]
all_task_indices[task_id] = task_indices
return all_task_indices
# TODO: Remove this, currently unused.
def cleanup_task_labels(
task_labels: Optional[Sequence[Optional[int]]],
) -> Optional[np.ndarray]:
"""'cleans up' the task labels, by returning either None or an integer numpy array.
TODO: Not clear why we really have to do this in the first place. The point is, if
we wanted to allow only a fraction of task labels for instance, then we have to deal
with np.ndarrays with `object` dtypes.
Parameters
----------
task_labels : Optional[Sequence[Optional[int]]]
Some sort of array of task ids, or None.
Returns
-------
Optional[np.ndarray]
None if there are no task ids, or an integer numpy array if there are.
Raises
------
NotImplementedError
If only a portion of the task labels are available.
"""
if isinstance(task_labels, np.ndarray):
if task_labels.dtype == object:
if all(task_labels == None):
task_labels = None
elif not any(task_labels == None):
task_labels = torch.as_tensor(task_labels.astype(int))
else:
raise NotImplementedError(f"TODO: Only given a portion of task labels?")
# IDEA: Maybe set task_id to -1 in those cases, and return an int
# ndarray as well?
if task_labels is None:
return None
assert isinstance(task_labels, (np.ndarray, Tensor)), task_labels
if not task_labels.shape:
task_labels = task_labels.reshape([1])
if isinstance(task_labels, Tensor):
task_labels = task_labels.cpu().numpy()
if task_labels is not None:
task_labels = task_labels.astype(int)
assert task_labels is None or isinstance(task_labels, np.ndarray)
return task_labels
================================================
FILE: sequoia/methods/models/base_model/multihead_model_test.py
================================================
"""Tests for the class-incremental version of the Model class.
"""
# from sequoia.conftest import config
from collections import defaultdict
from typing import Dict, List, Optional, Tuple, Type
import numpy as np
import pytest
import torch
from continuum import ClassIncremental
from continuum.datasets import MNIST
from continuum.tasks import TaskSet
from gym import spaces
from torch import Tensor, nn
from sequoia.common import Loss
from sequoia.common.config import Config
from sequoia.methods.base_method import BaseMethod
from sequoia.methods.models.forward_pass import ForwardPass
from sequoia.methods.models.output_heads.rl.episodic_a2c import EpisodicA2C
from sequoia.settings import ClassIncrementalSetting, RLSetting, TraditionalRLSetting
from sequoia.settings.rl import IncrementalRLSetting
from .base_model import BaseModel
from .multihead_model import MultiHeadModel, OutputHead, get_task_indices
@pytest.fixture()
def mixed_samples(config: Config):
"""Fixture that produces some samples from each task."""
dataset = MNIST(config.data_dir, download=True, train=True)
datasets: List[TaskSet] = ClassIncremental(dataset, nb_tasks=5)
n_samples_per_task = 10
indices = list(range(10))
samples_per_task: Dict[int, Tensor] = {
i: tuple(map(torch.as_tensor, taskset.get_samples(indices)))
for i, taskset in enumerate(datasets)
}
return samples_per_task
class MockOutputHead(OutputHead):
def __init__(self, *args, Actions: Type, task_id: int = -1, **kwargs):
super().__init__(*args, **kwargs)
self.task_id = task_id
self.Actions = Actions
self.name = f"task_{task_id}"
def forward(self, observations, representations) -> Tensor: # type: ignore
"""This mock forward just creates an action that is related to the observation
and the task id for this output head.
"""
x: Tensor = observations.x
assert (observations.task_labels == self.task_id).all()
h_x = representations
# actions = torch.stack([h_i.mean() * self.task_id for h_i in h_z])
# actions = torch.stack([x_i.mean() * self.task_id for x_i in x])
actions = [x_i.mean() * self.task_id for x_i in x]
actions = torch.stack(actions)
fake_logits = torch.rand([actions.shape[0], self.action_space.n])
from sequoia.methods.models.output_heads.classification_head import ClassificationOutput
# assert issubclass(ClassificationOutput, self.Actions)
# TODO: Ideally self.Actions would already be a subclass of ClassificationActions!
# return self.Actions(y_pred=actions, logits=fake_logits)
return ClassificationOutput(y_pred=actions, logits=fake_logits)
def get_loss(self, forward_pass, actions, rewards):
return Loss(self.name, 0.0)
# def mock_output_task(self: MultiHeadModel, x: Tensor, h_x: Tensor) -> Tensor:
# return self.output_head(x)
# def mock_encoder(self: MultiHeadModel, x: Tensor) -> Tensor:
# return x.new_ones(self.hp.hidden_size)
@pytest.mark.parametrize(
"indices",
[
slice(0, 10), # all the same task (0)
slice(0, 20), # 10 from task 0, 10 from task 1
slice(0, 30), # 10 from task 0, 10 from task 1, 10 from task 2
slice(0, 50), # 10 from each task.
],
)
def test_multiple_tasks_within_same_batch(
mixed_samples: Dict[int, Tuple[Tensor, Tensor, Tensor]],
indices: slice,
monkeypatch,
config: Config,
):
"""TODO: Write out a test that checks that when given a batch with data
from different tasks, and when the model is multiheaded, it will use the
right output head for each image.
"""
# Get a mixed batch
xs, ys, ts = map(torch.cat, zip(*mixed_samples.values()))
xs = xs[indices]
ys = ys[indices]
ts = ts[indices].int()
obs = ClassIncrementalSetting.Observations(x=xs, task_labels=ts)
setting = ClassIncrementalSetting()
model = MultiHeadModel(
setting=setting,
hparams=MultiHeadModel.HParams(batch_size=30, multihead=True),
config=config,
)
class MockEncoder(nn.Module):
def forward(self, x: Tensor):
return x.new_ones([x.shape[0], model.hidden_size])
mock_encoder = MockEncoder()
model.encoder = mock_encoder
for i in range(5):
model.output_heads[str(i)] = MockOutputHead(
input_space=spaces.Box(0, 1, [model.hidden_size]),
action_space=spaces.Discrete(2),
Actions=setting.Actions,
task_id=i,
)
model.output_head = model.output_heads["0"]
forward_pass = model(obs)
y_preds = forward_pass["y_pred"]
assert y_preds.shape == ts.shape
assert torch.all(y_preds == ts * xs.view([xs.shape[0], -1]).mean(1))
def test_multitask_rl_bug_without_PL(monkeypatch):
"""TODO: on_task_switch is called on the new observation, but we need to produce a
loss for the output head that we were just using!
"""
# NOTE: Tasks don't have anything to do with the task schedule. They are sampled at
# each episode.
max_episode_steps = 5
setting = TraditionalRLSetting(
dataset="cartpole",
batch_size=1,
nb_tasks=2,
train_max_steps=100,
max_episode_steps=max_episode_steps,
add_done_to_observations=True,
)
assert setting.stationary_context
# setting = RLSetting.load_benchmark("monsterkong")
config = Config(debug=True, verbose=True, seed=123)
config.seed_everything()
model = BaseModel(
setting=setting,
hparams=MultiHeadModel.HParams(
multihead=True,
output_head=EpisodicA2C.HParams(accumulate_losses_before_backward=True),
),
config=config,
)
# TODO: Maybe add some kind of "hook" to check which losses get returned when?
model.train()
# from pytorch_lightning import Trainer
# trainer = Trainer(fast_dev_run=True)
# trainer.fit(model, train_dataloader=setting.train_dataloader())
# trainer.setup(model, stage="fit")
# from pytorch_lightning import Trainer
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
episodes = 0
max_episodes = 5
# Dict mapping from step to loss at that step.
losses: Dict[int, Loss] = {}
with setting.train_dataloader() as env:
env.seed(123)
# env = TimeLimit(env, max_episode_steps=max_episode_steps)
# Iterate over the environment, which yields one observation at a time:
for step, obs in enumerate(env):
assert isinstance(obs, RLSetting.Observations)
if step == 0:
assert not any(obs.done)
start_task_label = obs["task_labels"][0]
stored_steps_in_each_head_before = {
task_key: output_head.num_stored_steps(0)
for task_key, output_head in model.output_heads.items()
}
forward_pass: ForwardPass = model.forward(observations=obs)
rewards = env.send(forward_pass.actions)
loss: Loss = model.get_loss(
forward_pass=forward_pass, rewards=rewards, loss_name="debug"
)
stored_steps_in_each_head_after = {
task_key: output_head.num_stored_steps(0)
for task_key, output_head in model.output_heads.items()
}
# if step == 5:
# assert False, (loss, stored_steps_in_each_head_before, stored_steps_in_each_head_after)
if any(obs.done):
assert loss.loss != 0.0, step
assert loss.loss.requires_grad
# Backpropagate the loss, update the models, etc etc.
loss.loss.backward()
model.on_after_backward()
optimizer.step()
model.on_before_zero_grad(optimizer)
optimizer.zero_grad()
# TODO: Need to let the model know than an update is happening so it can clear
# buffers etc.
episodes += sum(obs.done)
losses[step] = loss
else:
assert loss.loss == 0.0
# TODO:
print(
f"Step {step}, episode {episodes}: x={obs.x}, done={obs.done}, reward={rewards} task labels: {obs.task_labels}, loss: {loss.losses.keys()}: {loss.loss}"
)
if episodes > max_episodes:
break
# assert False, losses
@pytest.mark.xfail(reason=f"TODO: Re-enable this test once the BaseMethod works in RL again.")
def test_multitask_rl_bug_with_PL(monkeypatch, config: Config):
""" """
# NOTE: Tasks don't have anything to do with the task schedule. They are sampled at
# each episode.
cpu_config = config
# cpu_config = Config(device="cpu", num_workers=0)
setting = TraditionalRLSetting(
dataset="cartpole",
batch_size=1,
num_workers=0,
nb_tasks=2,
train_max_steps=200,
test_max_steps=200,
max_episode_steps=5,
add_done_to_observations=True,
config=cpu_config,
)
assert setting.train_max_steps == 200
assert setting.test_max_steps == 200
assert setting.stationary_context
# setting = RLSetting.load_benchmark("monsterkong")
cpu_config.seed_everything()
model = BaseModel(
setting=setting,
hparams=MultiHeadModel.HParams(
multihead=True,
output_head=EpisodicA2C.HParams(accumulate_losses_before_backward=True),
),
config=cpu_config,
).to(device=config.device)
# TODO: Maybe add some kind of "hook" to check which losses get returned when?
model.train()
assert not model.automatic_optimization
# Import this and use it to create the Trainer, rather than creating the Trainer
# directly, so we don't get the same bug (due to with_is_last in PL) from the
# DataConnector.
from sequoia.methods.base_method import TrainerConfig
# NOTE: We only do this so that the Model has a self.trainer attribute and so the
# model.training_step below can be used:
if config.device.type == "cuda":
trainer_config = TrainerConfig(fast_dev_run=True)
else:
trainer_config = TrainerConfig(
fast_dev_run=True,
gpus=0,
distributed_backend=None,
)
trainer = trainer_config.make_trainer(config=cpu_config)
# Fit in 'fast_dev_run' mode, so just a single batch of train / valid / test data.
with setting.train_dataloader() as temp_env:
temp_env.seed(123)
trainer.fit(model, train_dataloader=temp_env)
# NOTE: If we don't clear the buffers, there is a bug because the things that get put
# in buffers aren't on the same device as later.
model.output_head.clear_all_buffers()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
episodes = 0
max_episodes = 5
# Dict mapping from step to loss at that step.
losses: Dict[int, List[Loss]] = defaultdict(list)
with setting.train_dataloader() as env:
env.seed(123)
# TODO: Interesting bug/problem: Since the VectorEnvs always want to reset the
# env at the end of the episode, they also also so on the individual envs.
# In order to solve that, we need to NOT put any 'ActionLimit' on the inside
# envs, but only on the outer env.
for step, obs in enumerate(env):
assert isinstance(obs, RLSetting.Observations)
print(step, env.is_closed())
forward_pass = model.training_step(batch=obs, batch_idx=step)
step_results: Optional[Loss] = model.training_step_end([forward_pass])
loss_tensor: Optional[Tensor] = None
if step > 0 and step % 5 == 0:
# We should get a loss at each episode end:
assert all(obs.done), step # Since batch_size == 1 for now.
assert step_results is not None, (step, obs.task_labels)
loss_tensor = step_results["loss"]
loss: Loss = step_results["loss_object"]
print(f"Loss at step {step}: {loss}")
losses[step].append(loss)
else:
assert step_results is None
print(
f"Step {step}, episode {episodes}: x={obs.x}, done={obs.done}, task labels: {obs.task_labels}, loss_tensor: {loss_tensor}"
)
if step >= setting.train_max_steps:
assert False, "Shouldn't the environment have closed at this point?"
for step, step_losses in losses.items():
print(f"Losses at step {step}:")
for loss in step_losses:
print(f"\t{loss}")
# assert False, losses
@pytest.mark.parametrize(
"input, expected",
[
(np.array([0, 0, 0, 0]), {0: np.arange(4)}),
(torch.as_tensor([0, 0, 0, 0]), {0: torch.arange(4)}),
(
torch.as_tensor([0, 0, 1, 0]),
{0: torch.LongTensor([0, 1, 3]), 1: torch.LongTensor([2])},
),
(
np.array([0, 0, 1, None]),
{0: np.array([0, 1]), 1: np.array([2]), None: np.array([3])},
),
],
)
def test_get_task_indices(input, expected):
actual = get_task_indices(input)
assert str(actual) == str(expected)
@pytest.mark.parametrize(
"indices",
[
slice(0, 10), # all the same task (0)
slice(0, 20), # 10 from task 0, 10 from task 1
slice(0, 30), # 10 from task 0, 10 from task 1, 10 from task 2
slice(0, 50), # 10 from each task.
],
)
def test_task_inference_sl(
mixed_samples: Dict[int, Tuple[Tensor, Tensor, Tensor]],
indices: slice,
config: Config,
):
"""TODO: Write out a test that checks that when given a batch with data
from different tasks, and when the model is multiheaded, it will use the
right output head for each image.
"""
# Get a mixed batch
xs, ys, ts = map(torch.cat, zip(*mixed_samples.values()))
xs = xs[indices]
ys = ys[indices]
ts = ts[indices].int()
obs = ClassIncrementalSetting.Observations(x=xs, task_labels=None)
setting = ClassIncrementalSetting()
model = MultiHeadModel(
setting=setting,
hparams=MultiHeadModel.HParams(batch_size=30, multihead=True),
config=config,
)
class MockEncoder(nn.Module):
def forward(self, x: Tensor):
return x.new_ones([x.shape[0], model.hidden_size])
mock_encoder = MockEncoder()
model.encoder = mock_encoder
for i in range(5):
model.output_heads[str(i)] = MockOutputHead(
input_space=spaces.Box(0, 1, [model.hidden_size]),
action_space=spaces.Discrete(setting.action_space.n),
Actions=setting.Actions,
task_id=i,
)
model.output_head = model.output_heads["0"]
forward_pass = model(obs)
y_preds = forward_pass.actions.y_pred
assert y_preds.shape == ts.shape
# TODO: Check that the task inference works by changing the logits to be based on
# the assigned task in the Mock output head.
# assert torch.all(y_preds == ts * xs.view([xs.shape[0], -1]).mean(1))
@pytest.mark.skip(reason=f"TODO: Re-enable this test once the BaseMethod works in RL again.")
@pytest.mark.timeout(120)
def test_task_inference_rl_easy(config: Config):
from sequoia.methods.base_method import BaseMethod
method = BaseMethod(config=config)
from sequoia.settings.rl import IncrementalRLSetting
setting = IncrementalRLSetting(
dataset="cartpole",
nb_tasks=2,
max_episode_steps=20,
train_max_steps=200,
test_max_steps=200,
config=config,
)
results = setting.apply(method)
assert results
# assert False, results.to_log_dict()
@pytest.mark.skip(reason=f"TODO: Re-enable this test once the BaseMethod works in RL again.")
@pytest.mark.timeout(120)
def test_task_inference_rl_hard(config: Config):
method = BaseMethod(config=config)
setting = IncrementalRLSetting(
dataset="cartpole",
nb_tasks=2,
train_max_steps=1000,
test_max_steps=1000,
config=config,
)
results = setting.apply(method)
assert results
# assert False, results.to_log_dict()
from sequoia.methods.base_method import BaseMethod
from sequoia.settings.sl import TraditionalSLSetting
from sequoia.settings.sl.continual.setting import subset
@pytest.mark.timeout(30)
def test_task_inference_multi_task_sl(config: Config):
setting = TraditionalSLSetting(dataset="mnist", nb_tasks=2, config=config)
# TODO: Maybe add this kind of 'max_steps_per_task' argument even in supervised
# settings:
dataset_length = 1000
# TODO: Shorten the train/test datasets?
method = BaseMethod(config=config, max_epochs=1)
setting.setup()
setting.train_datasets = [
subset(dataset, list(range(dataset_length))) for dataset in setting.train_datasets
]
setting.val_datasets = [
subset(dataset, list(range(dataset_length))) for dataset in setting.val_datasets
]
setting.test_datasets = [
subset(dataset, list(range(dataset_length))) for dataset in setting.test_datasets
]
results = setting.apply(method)
assert 0.80 <= results.average_final_performance.objective
================================================
FILE: sequoia/methods/models/base_model/self_supervised_model.py
================================================
""" Base class for a Self-Supervised model.
This is meant to be a kind of 'Mixin' that you can use and extend in order
to add self-supervised losses to your model.
"""
import warnings
from dataclasses import dataclass
from typing import Dict, Optional, TypeVar
from torch import Tensor, nn
from sequoia.common.config import Config
from sequoia.common.loss import Loss
from sequoia.methods.aux_tasks.auxiliary_task import AuxiliaryTask
from sequoia.settings import Rewards, Setting, SettingType
from sequoia.utils.logging_utils import get_logger
from sequoia.utils.utils import flatten_dict
from .model import Model
# from sequoia.utils.module_dict import ModuleDict
logger = get_logger(__name__)
HParamsType = TypeVar("HParamsType", bound="SelfSupervisedModel.HParams")
class SelfSupervisedModel(Model[SettingType]):
"""
Model 'mixin' that adds support for modular, configurable "auxiliary tasks".
These auxiliary tasks are used to get a self-supervised loss to train on
when labels aren't available.
"""
@dataclass
class HParams(Model.HParams):
"""Hyperparameters of a Self-Supervised method."""
# vae: Optional[VAEReconstructionTask.Options] = None
# ae: Optional[AEReconstructionTask.Options] = None
def __init__(self, setting: Setting, hparams: HParams, config: Config):
super().__init__(setting, hparams, config)
self.hp: SelfSupervisedModel.HParams
# Dictionary of auxiliary tasks.
self.tasks: Dict[str, AuxiliaryTask] = self.create_auxiliary_tasks()
def get_loss(
self,
forward_pass: Dict[str, Tensor],
rewards: Rewards = None,
loss_name: str = "",
) -> Loss:
# Get the output task loss (the loss of the base model)
loss: Loss = super().get_loss(forward_pass, rewards=rewards, loss_name=loss_name)
# Add the self-supervised losses from all the enabled auxiliary tasks.
for task_name, aux_task in self.tasks.items():
assert task_name, "Auxiliary tasks should have a name!"
if aux_task.enabled:
# TODO: Auxiliary tasks all share the same 'y' for now, but it
# might make more sense to organize this differently.
y = rewards.y if rewards else None
aux_loss: Loss = aux_task.get_loss(forward_pass, y=y)
# Scale the loss by the corresponding coefficient before adding
# it to the total loss.
loss += aux_task.coefficient * aux_loss.to(self.device)
if self.config.debug and self.config.verbose:
logger.debug(f"{task_name} loss: {aux_loss.total_loss}")
return loss
def add_auxiliary_task(
self, aux_task: AuxiliaryTask, key: str = None, coefficient: float = None
) -> None:
"""Adds an auxiliary task to the self-supervised model."""
key = aux_task.name if key is None else key
if key in self.tasks:
raise RuntimeError(f"There is already an auxiliary task with name {key} in the model!")
self.tasks[key] = aux_task.to(self.device)
if coefficient is not None:
aux_task.coefficient = coefficient
elif not aux_task.coefficient:
warnings.warn(
UserWarning(f"Adding auxiliary task with name {key}, but with coefficient of 0.!")
)
if aux_task.coefficient:
aux_task.enable()
def create_auxiliary_tasks(self) -> Dict[str, AuxiliaryTask]:
# Share the relevant parameters with all the auxiliary tasks.
# We do this by setting class attributes.
# TODO: Make sure that we aren't duplicating all of the model's weights
# by setting a class attribute.
AuxiliaryTask._model = self
AuxiliaryTask.hidden_size = self.hidden_size
AuxiliaryTask.input_shape = self.input_shape
AuxiliaryTask.encoder = self.encoder
AuxiliaryTask.output_head = self.output_head
# AuxiliaryTask.preprocessing = self.preprocess_batch
tasks: Dict[str, AuxiliaryTask] = nn.ModuleDict()
# TODO(@lebrice): Should we create the tasks even if they aren't used,
# and then 'enable' them when they are needed? (I'm thinking that maybe
# being enable/disable auxiliary tasks when needed might be useful
# later?)
# if self.hp.vae and self.hp.vae.coefficient:
# tasks[VAEReconstructionTask.name] = VAEReconstructionTask(options=self.hp.vae)
# if self.hp.ae and self.hp.ae.coefficient:
# tasks[AEReconstructionTask.name] = AEReconstructionTask(options=self.hp.ae)
# if self.hp.ewc and self.hp.ewc.coefficient:
# tasks[EWCTask.name] = EWCTask(options=self.hp.ewc)
return tasks
def on_task_switch(self, task_id: Optional[int]) -> None:
"""Called when switching between tasks.
Args:
task_id (int): the Id of the task.
"""
for task_name, task in self.tasks.items():
if task.enabled:
task.on_task_switch(task_id=task_id)
super().on_task_switch(task_id=task_id)
def shared_modules(self) -> Dict[str, nn.Module]:
"""Returns any trainable modules in `self` that are shared across tasks.
By giving this information, these weights can then be used in
regularization-based auxiliary tasks like EWC, for example.
For the base model, this returns a dictionary with the encoder, for example.
When using auxiliaryt tasks, they also add their shared weights, if any.
Returns
-------
Dict[str, nn.Module]:
Dictionary mapping from name to the shared modules, if any.
"""
shared_modules = super().shared_modules()
for task_name, task in self.tasks.items():
# TODO: What separator to use when dealing with nested dictionaries? I seem
# to recall that ModuleDicts don't like some separators.
sep = "."
task_modules = task.shared_modules()
flattened_task_modules = flatten_dict(task_modules, separator=sep)
for module_name, module in flattened_task_modules.items():
shared_modules[f"{task_name}{sep}{module_name}"] = module
return shared_modules
================================================
FILE: sequoia/methods/models/base_model/self_supervised_model_test.py
================================================
from typing import Dict, List, Tuple, Type
import pytest
from sequoia.conftest import id_fn, parametrize, slow
from sequoia.methods.aux_tasks import AE, EWC, VAE
from sequoia.methods.base_method import BaseMethod
from sequoia.settings.base import Results, Setting
from sequoia.settings.sl import TaskIncrementalSLSetting, TraditionalSLSetting
from sequoia.settings.sl.incremental import ClassIncrementalSetting
Method = BaseMethod
# Use 'Method' as an alias for the actual Method subclass under test. (since at
# the moment quite a few tests share some code.
# List of datasets that are currently supported for this method.
supported_datasets: List[str] = [
"mnist",
"fashion_mnist",
"cifar10",
"cifar100",
"kmnist",
]
def test_get_applicable_settings():
settings = Method.get_applicable_settings()
assert ClassIncrementalSetting in settings
assert TaskIncrementalSLSetting in settings
assert TraditionalSLSetting in settings
@pytest.fixture(
scope="module",
params=[
{},
{VAE: 1},
{AE: 1},
{EWC: 1},
], # no aux task.
ids=id_fn,
)
def method_and_coefficients(request, tmp_path_factory):
"""Fixture that creates a method to be reused for the tests below as well
as return the coefficients for each auxiliary task.
"""
# Reuse the Method accross all tests below
log_dir = tmp_path_factory.mktemp("log_dir")
aux_task_coefficients = request.param
args = f"""
--debug
--log_dir_root {log_dir}
--default_root_dir {log_dir}
--knn_samples 0
--seed 123
--fast_dev_run
"""
for aux_task_name, coef in aux_task_coefficients.items():
args += f"--{aux_task_name}.coef {coef} "
return Method.from_args(args, strict=False), aux_task_coefficients
# @parametrize("dataset", get_dataset_params(Method, supported_datasets))
from sequoia.methods.method_test import key_fn
@slow
@parametrize("setting_type", sorted(Method.get_applicable_settings(), key=key_fn))
def test_fast_dev_run(
method_and_coefficients: Tuple[Method, Dict[str, float]],
setting_type: Type[Setting],
test_dataset: str,
):
"""Performs a quick run with only one batch of train / val / test data and
check that the 'Results' objects are ok.
"""
method, aux_task_coefficients = method_and_coefficients
if test_dataset not in setting_type.available_datasets:
pytest.skip(msg=f"dataset {test_dataset} isn't available for this setting.")
# Instantiate the setting
setting: Setting = setting_type(dataset=test_dataset, nb_tasks=2)
results: Results = setting.apply(method)
validate_results(results, aux_task_coefficients)
def validate_results(results: Results, aux_task_coefficients: Dict[str, float]):
"""Makes sure that the results make sense for the method being tested.
Checks that the Loss object has losses for each 'enabled' auxiliary task.
Args:
results (Results): A given Results object.
"""
assert results is not None
assert results.hparams is not None
assert results.test_loss is not None
for loss in results.task_losses:
for aux_task_name, coef in aux_task_coefficients.items():
assert aux_task_name in loss.losses
aux_task_loss = loss.losses[aux_task_name]
assert aux_task_loss.loss >= 0.0
assert aux_task_loss._coefficient == coef
================================================
FILE: sequoia/methods/models/base_model/semi_supervised_model.py
================================================
"""
Addon that enables training on semi-supervised batches.
NOTE: Not used at the moment, but should work just fine.
"""
from dataclasses import dataclass
from typing import Dict, Optional, Sequence, Union
import numpy as np
from torch import Tensor
# from sequoia.common.callbacks import KnnCallback
from sequoia.common.loss import Loss
from sequoia.settings import Rewards, SettingType
from sequoia.utils.logging_utils import get_logger
from .model import Model
logger = get_logger(__name__)
class SemiSupervisedModel(Model[SettingType]):
@dataclass
class HParams(Model.HParams):
"""Hyperparameters of a Self-Supervised method."""
# Adds Options for a KNN classifier callback, which is used to evaluate
# the quality of the representations on each task after each training
# epoch.
# TODO: Debug/test this callback to make sure it still works fine.
# knn_callback: KnnCallback = mutable_field(KnnCallback)
def get_loss(
self,
forward_pass: Dict[str, Tensor],
rewards: Optional[Rewards] = None,
loss_name: str = "",
) -> Loss:
"""Trains the model on a batch of (potentially partially labeled) data.
Args:
forward_pass (Dict[str, Tensor]): WIP: The results of the forward
pass (processed input, predictions, etc.)
rewards (Union[Optional[Tensor], List[Optional[Tensor]]]):
Labels associated with the data. Can either be:
- None: fully unlabeled batch
- Tensor: fully labeled batch
- List[Optional[Tensor]]: Partially labeled batch.
loss_name (str, optional): Name of the resulting loss object. Defaults to
"Train".
Returns:
Loss: a loss object made from both the unsupervised and
supervised losses.
"""
# TODO: We could also just use '-1' instead as the 'no-label' val: this
# would make it a bit simpler than having both numpy arrays and tensors
# in the batch
y: Union[Optional[Tensor], Sequence[Optional[Tensor]]] = rewards.y
if y is None or all(y_i is not None for y_i in y):
# Fully labeled/unlabeled batch
# NOTE: Tensors can't have None items, so if we get a Tensor that
# means that we have all task labels.
labeled_ratio = float(y is not None)
return super().get_loss(forward_pass, rewards, loss_name=loss_name)
is_labeled: np.ndarray = np.asarray([y_i is not None for y_i in y])
# Batch is maybe a mix of labeled / unlabeled data.
labeled_y = y[is_labeled]
# TODO: Might have to somehow re-order the results based on the indices?
# TODO: Join (merge) the metrics? or keep them separate?
labeled_forward_pass = {k: v[is_labeled] for k, v in forward_pass.items()}
unlabeled_forward_pass = {k: v[~is_labeled] for k, v in forward_pass.items()}
labeled_ratio = len(labeled_y) / len(y)
logger.debug(f"Labeled ratio: {labeled_ratio}")
# Create the 'total' loss for the batch, with the required name.
# We will then create two 'sublosses', one named 'unsupervised' and one
# named 'supervised', each containing the respective losses and metrics.
# TODO: Make sure that this doesn't make it harder to get the metrics
# from the Loss object. If it does, then we could maybe just fuse the
# labeled and unlabeled losses and metrics, but that might also cause
# issues.
loss = Loss(name=loss_name)
if unlabeled_forward_pass:
# TODO: Setting a different loss name for the for this is definitely going to cause trouble!
unsupervised_loss = super().get_loss(
unlabeled_forward_pass,
rewards=None,
loss_name="unsupervised",
)
loss += unsupervised_loss
if labeled_forward_pass:
supervised_loss = super().get_loss(
labeled_forward_pass,
rewards=labeled_y,
loss_name="supervised",
)
loss += supervised_loss
return loss
================================================
FILE: sequoia/methods/models/baseline_model.puml
================================================
@startuml base_model
' !include output_heads.puml
package base_model {
package model {
abstract class Model {
+ hparams: Model.HParams
+ encoder: nn.Module
+ output_head: OutputHead
+ forward(Observations): ForwardPass
+ get_loss(ForwardPass, Rewards): Loss
+ get_actions(observations: Observations, action_space: Space): Actions
}
' class Model.HParams extends BaseHParams {}
' class BaseHParams {
class Model.HParams {
{static} + available_optimizers: Dict[str, Type[Optimizer]]
{static} + available_encoders: Dict[str, Type[nn.Module]]
+ learning_rate: float = 0.001
+ weight_decay: float = 1e-6
+ optimizer: str = "adam"
+ encoder: str = "resnet18"
+ batch_size: Optional[int]
+ train_from_scratch: bool = False
+ freeze_pretrained_encoder_weights: bool = False
+ output_head: OutputHead.HParams
+ detach_output_head: bool = False
}
}
together {
package semi_supervised_model {
abstract class SemiSupervisedModel extends Model {
+ forward(Observations): ForwardPass
+ get_loss(ForwardPass, Optional[Rewards]): Loss
}
abstract class SemiSupervisedModel.HParams extends Model.HParams {
+ knn_callback: KnnCallback note (todo: unused atm)
}
}
package self_supervised_model {
abstract class SelfSupervisedModel extends Model {
+ hparams: SelfSupervisedModel.HParams
+ tasks: dict[str, AuxiliaryTask]
+ add_auxiliary_task(task AuxiliaryTask)
}
abstract class SelfSupervisedModel.HParams extends Model.HParams {
+ simclr: Optional[SimCLRTask.Options]
+ vae: Optional[VAEReconstructionTask.Options]
+ ae: Optional[AEReconstructionTask.Options]
+ ewc: Optional[EWCTask.Options]
}
}
package multihead_model {
abstract class MultiHeadModel extends Model {
+ output_heads: dict[str, OutputHead]
+ forward(Observations): ForwardPass
+ on_task_switch(task_id: Optional[int])
}
abstract class MultiHeadModel.HParams extends Model.HParams {
+ multihead: Optional[bool]
}
}
}
package base_model as base_model.base_model {
class BaseModel extends SemiSupervisedModel, SelfSupervisedModel, MultiHeadModel
{
+ hparams: BaseModel.HParams
}
class BaseModel.HParams extends SelfSupervisedModel.HParams, MultiHeadModel.HParams, SemiSupervisedModel.HParams {
}
}
Model "1" *-- "1" OutputHead
' Model *-- Model.HParams
' BaseModel *-- BaseModel.HParams
' SemiSupervisedModel *-- SemiSupervisedModel.HParams
' SelfSupervisedModel *-- SelfSupervisedModel.HParams
' MultiHeadModel *-- MultiHeadModel.HParams
SelfSupervisedModel "1" o-- "many" aux_tasks.AuxiliaryTask
' BaseMethod "1" *--> "1" BaseModel : uses
MultiHeadModel "1" *-- "many" OutputHead
' MultiHeadModel "1" *-- "1" OutputHead
}
@enduml
================================================
FILE: sequoia/methods/models/fcnet.py
================================================
""" TODO: Take out the dense network from the OutputHead. """
from dataclasses import dataclass
from typing import ClassVar, Dict, List, Optional, Type, Union, overload
from torch import nn
from sequoia.common.hparams import HyperParameters, categorical, uniform
class FCNet(nn.Sequential):
"""Fully-connected network."""
@dataclass
class HParams(HyperParameters):
"""Hyper-parameters of a fully-connected network."""
available_activations: ClassVar[Dict[str, Type[nn.Module]]] = {
"relu": nn.ReLU,
"tanh": nn.Tanh,
"elu": nn.ELU, # No idea what these do, but hey, they are available!
"gelu": nn.GELU,
"relu6": nn.ReLU6,
}
# Number of hidden layers in the output head.
hidden_layers: int = uniform(0, 10, default=3)
# Number of neurons in each hidden layer of the output head.
# If a single value is given, than each of the `hidden_layers` layers
# will have that number of neurons.
# If `n > 1` values are given, then `hidden_layers` must either be 0 or
# `n`, otherwise a RuntimeError will be raised.
hidden_neurons: Union[int, List[int]] = uniform(16, 512, default=64)
activation: Type[nn.Module] = categorical(available_activations, default=nn.Tanh)
# Dropout probability. Dropout is applied after each layer.
# Set to None or 0 for no dropout.
# TODO: Not sure if this is how it's typically used. Need to check.
dropout_prob: Optional[float] = uniform(0, 0.8, default=0.2)
def __post_init__(self):
super().__post_init__()
if isinstance(self.activation, str):
self.activation = self.available_activations[self.activation.lower()]
if isinstance(self.hidden_neurons, int):
self.hidden_neurons = [self.hidden_neurons]
# no value passed to --hidden_layers
if self.hidden_layers == 0:
if len(self.hidden_neurons) == 1:
# Default Setting: No hidden layers.
self.hidden_neurons = []
elif len(self.hidden_neurons) > 1:
# Set the number of hidden layers to the number of passed values.
self.hidden_layers = len(self.hidden_neurons)
elif self.hidden_layers > 0 and len(self.hidden_neurons) == 1:
# Duplicate that value for each of the `hidden_layers` layers.
self.hidden_neurons *= self.hidden_layers
elif self.hidden_layers == 1 and not self.hidden_neurons:
self.hidden_layers = 0
if self.hidden_layers != len(self.hidden_neurons):
raise RuntimeError(
f"Invalid values: hidden_layers ({self.hidden_layers}) != "
f"len(hidden_neurons) ({len(self.hidden_neurons)})."
)
@overload
def __init__(self, in_features: int, out_features: int, hparams: HParams = None):
...
@overload
def __init__(
self,
in_features: int,
out_features: int,
hidden_layers: int = 1,
hidden_neurons: List[int] = None,
activation: Type[nn.Module] = nn.Tanh,
):
...
def __init__(self, in_features: int, out_features: int, hparams: HParams = None, **kwargs):
self.in_features = in_features
self.out_features = out_features
self.hparams = hparams or self.HParams(**kwargs)
hidden_layers: List[nn.Module] = []
output_size = out_features
assert isinstance(self.hparams.hidden_neurons, list)
for i, neurons in enumerate(self.hparams.hidden_neurons):
out_features = neurons
if self.hparams.dropout_prob:
hidden_layers.append(nn.Dropout(p=self.hparams.dropout_prob))
hidden_layers.append(nn.Linear(in_features, out_features))
hidden_layers.append(self.hparams.activation())
in_features = out_features # next input size is output size of prev.
super().__init__(nn.Flatten(), *hidden_layers, nn.Linear(in_features, output_size))
# TODO: IDEA: use @singledispatchmethod to add a `forward` implementation
# for mapping input space to output space.
# def forward(self, input: Any)
================================================
FILE: sequoia/methods/models/forward_pass.py
================================================
""" Typed object that represents the outputs of the forward pass of a model. """
from dataclasses import dataclass
from typing import Any, Optional
from simple_parsing.helpers.flatten import FlattenedAccess
from torch import Tensor
from sequoia.common import Batch
from sequoia.settings.base.objects import Actions, Observations, Rewards
@dataclass(frozen=True)
class ForwardPass(Batch, FlattenedAccess):
"""Typed version of the result of a forward pass through a model.
FlattenedAccess is pretty cool, but potentially confusing. We can get
any attributes in the children by getting them directly on the
parent. So if the `observation` has an `x` attribute, we can get on this
object directly with `self.x`, and it will fetch the attribute from the
observation.
"""
observations: Observations
representations: Tensor
actions: Actions
rewards: Optional[Rewards] = None
# Note: Might be annoying later if there is a need for subclasses of ForwardPass,
# since dataclass fields without a default value can't follow fields that have one.
@property
def h_x(self) -> Any:
return self.representations
================================================
FILE: sequoia/methods/models/output_heads/__init__.py
================================================
from .classification_head import ClassificationHead
from .output_head import OutputHead
from .regression_head import RegressionHead
from .rl import ActorCriticHead, PolicyHead
================================================
FILE: sequoia/methods/models/output_heads/classification_head.py
================================================
from dataclasses import dataclass
from typing import ClassVar, Dict, List, Optional, Type, Union
import gym
import torch
from gym import spaces
from torch import LongTensor, Tensor, nn
from sequoia.common import ClassificationMetrics, Loss
from sequoia.common.hparams import categorical, uniform
from sequoia.settings import Actions, Observations, Rewards
from ..fcnet import FCNet
from ..forward_pass import ForwardPass
from .output_head import OutputHead
# TODO: This is based on 'Actions' which is currently basically the same for all settings
# However, there should probably have a different `Action` class on a
# IncrementalSLSetting("mnist") vs IncrementalSLSetting("some_regression_dataset")!
# IDEA: What if Settings were actually meta-classes, where the 'instances' were for a
# particular choice of dataset? (e.g. `IncrementalSLSetting("mnist")` -> )
# This would maybe look a bit like the 'fully compositional' approach as well?
@dataclass(frozen=True)
class ClassificationOutput(Actions):
"""Typed dict-like class that represents the 'forward pass'/output of a
classification head, which correspond to the 'actions' to be sent to the
environment, in the general formulation.
"""
y_pred: Union[LongTensor, Tensor]
logits: Tensor
@property
def action(self) -> LongTensor:
return self.y_pred
@property
def y_pred_log_prob(self) -> Tensor:
"""returns the log probabilities for the chosen actions/predictions."""
return self.logits[:, self.y_pred]
@property
def y_pred_prob(self) -> Tensor:
"""returns the log probabilities for the chosen actions/predictions."""
return self.probabilities[self.y_pred]
@property
def probabilities(self) -> Tensor:
"""Returns the normalized probabilies for each class, i.e. the
softmax-ed version of `self.logits`.
"""
return self.logits.softmax(-1)
class ClassificationHead(OutputHead):
@dataclass
class HParams(FCNet.HParams, OutputHead.HParams):
"""Hyper-parameters of the OutputHead used for classification."""
# NOTE: These hparams were basically copied over from FCNet.HParams, just so its a
# bit more visible.
available_activations: ClassVar[Dict[str, Type[nn.Module]]] = {
"relu": nn.ReLU,
"tanh": nn.Tanh,
"elu": nn.ELU, # No idea what these do, but hey, they are available!
"gelu": nn.GELU,
"relu6": nn.ReLU6,
}
# Number of hidden layers in the output head.
hidden_layers: int = uniform(0, 3, default=0)
# Number of neurons in each hidden layer of the output head.
# If a single value is given, than each of the `hidden_layers` layers
# will have that number of neurons.
# If `n > 1` values are given, then `hidden_layers` must either be 0 or
# `n`, otherwise a RuntimeError will be raised.
hidden_neurons: Union[int, List[int]] = uniform(16, 512, default=64)
activation: Type[nn.Module] = categorical(available_activations, default=nn.Tanh)
# Dropout probability. Dropout is applied after each layer.
# Set to None or 0 for no dropout.
# TODO: Not sure if this is how it's typically used. Need to check.
dropout_prob: Optional[float] = uniform(0, 0.8, default=0.2)
def __init__(
self,
input_space: gym.Space,
action_space: gym.Space,
reward_space: gym.Space = None,
hparams: "ClassificationHead.HParams" = None,
name: str = "classification",
):
super().__init__(
input_space=input_space,
action_space=action_space,
reward_space=reward_space,
hparams=hparams,
name=name,
)
self.hparams: ClassificationHead.HParams
assert isinstance(action_space, spaces.Discrete)
output_size = action_space.n
self.dense = FCNet(
in_features=self.input_size,
out_features=output_size,
hparams=self.hparams,
)
# if output_size == 2:
# # TODO: Should we be using this loss instead?
# self.loss_fn = nn.BCEWithLogitsLoss()
self.loss_fn = nn.CrossEntropyLoss()
def forward(self, observations: Observations, representations: Tensor) -> ClassificationOutput:
# TODO: This should probably take in a dict and return a dict, or something like that?
# TODO: We should maybe convert this to also return a dict instead
# of a Tensor, just to be consistent with everything else. This could
# also maybe help with having multiple different output heads, each
# having a different name and giving back a dictionary of their own
# forward pass tensors (if needed) and predictions?
logits = self.dense(representations)
y_pred = logits.argmax(dim=-1)
return ClassificationOutput(
logits=logits,
y_pred=y_pred,
)
def get_loss(
self, forward_pass: ForwardPass, actions: ClassificationOutput, rewards: Rewards
) -> Loss:
logits: Tensor = actions.logits
y_pred: Tensor = actions.y_pred
rewards = rewards.to(logits.device)
y: Tensor = rewards.y
n_classes = logits.shape[-1]
# Could remove these: just used for debugging.
assert len(y.shape) == 1, y.shape
assert not torch.is_floating_point(y), y.dtype
assert 0 <= y.min(), y
assert y.max() < n_classes, y
loss = self.loss_fn(logits, y)
assert loss.shape == ()
metrics = ClassificationMetrics(y_pred=logits, y=y)
assert self.name, "Output Heads should have a name!"
loss_object = Loss(
name=self.name,
loss=loss,
# NOTE: we're passing the tensors to the Loss object because we let
# it create the Metrics for us automatically.
metrics={self.name: metrics},
)
return loss_object
================================================
FILE: sequoia/methods/models/output_heads/output_head.py
================================================
""" Abstract base class for an output head of the BaseModel. """
import dataclasses
from abc import ABC, abstractmethod
from dataclasses import dataclass
from typing import ClassVar, List, Sequence, Type
import gym
import numpy as np
from gym import spaces
from gym.spaces.utils import flatdim
from torch import Tensor, nn
from torch.nn import Flatten # type: ignore
from torch.optim.optimizer import Optimizer
from sequoia.common.hparams import HyperParameters
from sequoia.common.loss import Loss
from sequoia.settings import Actions, Rewards, Setting
from sequoia.utils import Parseable, get_logger
from ..forward_pass import ForwardPass
logger = get_logger(__name__)
class OutputHead(nn.Module, ABC):
"""Module for the output head of the model.
This output head is meant for classification, but you could inherit from it
and customize it for doing something different like RL or reconstruction,
for instance.
"""
# TODO: Rename this to 'output' and create some ClassificationHead,
# RegressionHead, ValueHead, etc. subclasses with the corresponding names.
name: ClassVar[str] = "classification"
# Reference to the optimizer of the BaseModel.
base_model_optimizer: ClassVar[Optimizer]
@dataclass
class HParams(HyperParameters, Parseable):
"""Hyperparameters of the output head."""
def __init__(
self,
input_space: gym.Space,
action_space: gym.Space,
reward_space: gym.Space = None,
hparams: "OutputHead.HParams" = None,
name: str = "",
):
super().__init__()
self.input_space = input_space
self.action_space = action_space
self.reward_space = reward_space or spaces.Box(-np.inf, np.inf, ())
self.input_size = flatdim(input_space)
self.hparams = hparams or self.HParams()
if not isinstance(self.hparams, self.HParams):
# Upgrade the hparams to the right type, if needed.
self.hparams = self.upgrade_hparams()
self.name = name or type(self).name
def make_dense_network(
self,
in_features: int,
hidden_neurons: Sequence[int],
out_features: int,
activation: Type[nn.Module] = nn.ReLU,
):
hidden_layers: List[nn.Module] = []
output_size = out_features
for i, neurons in enumerate(hidden_neurons):
out_features = neurons
hidden_layers.append(nn.Linear(in_features, out_features))
hidden_layers.append(activation())
in_features = out_features # next input size is output size of prev.
return nn.Sequential(nn.Flatten(), *hidden_layers, nn.Linear(in_features, output_size))
@abstractmethod
def forward(
self, observations: Setting.Observations, representations: Tensor
) -> Setting.Actions:
"""Given the observations and their representations, produce "actions".
Parameters
----------
observations : Observations
Object containing the input examples.
representations : Any
The results of encoding the input examples.
Returns
-------
Actions
An object containing the action to take, and which can be used to
calculate the loss later on.
"""
@abstractmethod
def get_loss(self, forward_pass: ForwardPass, actions: Actions, rewards: Rewards) -> Loss:
"""Given the forward pass,(a dict-like object that includes the
observations, representations and actions, the actions produced by this
output head and the resulting rewards, returns a Loss to use.
"""
def clear_all_buffers(self) -> None:
"""Optional method that gets called when using multiple output heads, to
prevent keeping stale gradients around after the model that produced them gets
updated during training.
"""
def upgrade_hparams(self):
"""Upgrades the hparams at `self.hparams` to the right type for this
output head (`type(self).HParams`), filling in any missing values by
parsing them from the command-line.
Returns
-------
type(self).HParams
Hparams of the type `self.HParams`, with the original values
preserved and any new values parsed from the command-line.
"""
# NOTE: This (getting the wrong hparams class) could happen for
# instance when parsing a BaseMethod from the command-line, the
# default type of hparams on the method is BaseModel.HParams,
# whose `output_head` field doesn't have the right type exactly.
current_hparams = self.hparams.to_dict()
# TODO: If a value is not at its current default, keep it.
default_hparams = self.HParams()
missing_fields = [
f.name
for f in dataclasses.fields(self.HParams)
if f.name not in current_hparams
or current_hparams[f.name] == getattr(type(self.hparams)(), f.name, None)
or current_hparams[f.name] == getattr(default_hparams, f.name)
]
logger.warning(
RuntimeWarning(
f"Upgrading the hparams from type {type(self.hparams)} to "
f"type {self.HParams}. This will try to fetch the values for "
f"the missing fields {missing_fields} from the command-line. "
)
)
# Get the missing values
if self.hparams._argv:
return self.HParams.from_args(argv=self.hparams._argv, strict=False)
hparams = self.HParams.from_args(argv=self.hparams._argv, strict=False)
for missing_field in missing_fields:
current_hparams[missing_field] = getattr(hparams, missing_field)
return self.HParams(**current_hparams)
================================================
FILE: sequoia/methods/models/output_heads/regression_head.py
================================================
from dataclasses import dataclass
from typing import List
import gym
from gym import spaces
from torch import Tensor, nn
from sequoia.common import Loss, RegressionMetrics
from sequoia.settings import Actions, Observations, Rewards
from sequoia.utils.utils import prod
from ..fcnet import FCNet
from ..forward_pass import ForwardPass
from .output_head import OutputHead
class RegressionHead(OutputHead):
"""Output head used for regression problems."""
@dataclass
class HParams(FCNet.HParams, OutputHead.HParams):
"""Hyper-parameters of the regression output head."""
def __init__(
self,
input_space: gym.Space,
action_space: gym.Space,
reward_space: gym.Space = None,
hparams: OutputHead.HParams = None,
name: str = "regression",
):
assert isinstance(action_space, spaces.Box)
if len(action_space.shape) > 1:
raise NotImplementedError(
f"TODO: Regression head doesn't support output shapes that are "
f"more than 1d for atm, (output space: {action_space})."
)
# TODO: Add support for something like a "decoder head" (maybe as a
# subclass of RegressionHead)?
super().__init__(
input_space=input_space,
action_space=action_space,
reward_space=reward_space,
hparams=hparams,
name=name,
)
assert isinstance(action_space, spaces.Box)
output_size = prod(action_space.shape)
hidden_layers: List[nn.Module] = []
in_features = self.input_size
for i, neurons in enumerate(self.hparams.hidden_neurons):
out_features = neurons
hidden_layers.append(nn.Linear(in_features, out_features))
hidden_layers.append(nn.ReLU())
in_features = out_features # next input size is output size of prev.
self.dense = nn.Sequential(
nn.Flatten(), *hidden_layers, nn.Linear(in_features, output_size)
)
self.loss_fn = nn.MSELoss()
def forward(self, observations: Observations, representations: Tensor) -> Actions:
y_pred = self.dense(representations)
return Actions(y_pred)
def get_loss(self, forward_pass: ForwardPass, actions: Actions, rewards: Rewards) -> Loss:
actions: Actions = forward_pass.actions
y_pred: Tensor = actions.y_pred
y: Tensor = rewards.y
loss = self.loss_fn(y_pred, y)
metrics = RegressionMetrics(y_pred=y_pred, y=y)
assert self.name, "Output Heads should have a name!"
loss = Loss(
name=self.name,
loss=loss,
# NOTE: we're passing the tensors to the Loss object because we let
# it create the Metrics for us automatically.
metrics={self.name: metrics},
)
return loss
================================================
FILE: sequoia/methods/models/output_heads/rl/__init__.py
================================================
from .actor_critic_head import ActorCriticHead
from .policy_head import PolicyHead
================================================
FILE: sequoia/methods/models/output_heads/rl/actor_critic_head.py
================================================
""" An output head for RL based on Advantage Actor Critic.
NOTE: This is the 'online' version of an Advantage Actor Critic, based
on the following blog:
https://medium.com/deeplearningmadeeasy/advantage-actor-critic-a2c-implementation-944e98616b
"""
from dataclasses import dataclass
from typing import Optional, Tuple
import torch
from gym import spaces
from gym.spaces.utils import flatdim
from torch import Tensor, nn
from sequoia.common import Loss
from sequoia.settings import ContinualRLSetting
from sequoia.utils import get_logger
from ...forward_pass import ForwardPass
from ..classification_head import ClassificationHead
from .policy_head import Categorical, PolicyHeadOutput
logger = get_logger(__name__)
class ActorCriticHead(ClassificationHead):
@dataclass
class HParams(ClassificationHead.HParams):
"""Hyper-parameters of the Actor-Critic head."""
gamma: float = 0.95
learning_rate: float = 1e-3
def __init__(
self,
input_space: spaces.Space,
action_space: spaces.Discrete,
reward_space: spaces.Box,
hparams: "ActorCriticHead.HParams" = None,
name: str = "actor_critic",
):
assert isinstance(action_space, spaces.Discrete), "Only support discrete space for now."
super().__init__(
input_space=input_space,
action_space=action_space,
reward_space=reward_space,
hparams=hparams,
name=name,
)
if not isinstance(self.hparams, self.HParams):
self.hparams = self.upgrade_hparams()
action_dims = flatdim(action_space)
# Critic takes in state-action pairs? or just state?
self.critic_input_dims = self.input_size
# self.critic_input_dims = self.input_size + action_dims
self.critic_output_dims = 1
self.critic = nn.Sequential(
# Lambda(concat_obs_and_action),
nn.Flatten(),
nn.Linear(self.critic_input_dims, 32),
nn.ReLU(),
nn.Linear(32, self.critic_output_dims),
)
self.actor_input_dims = self.input_size
self.actor_output_dims = action_dims
self.actor = nn.Sequential(
nn.Flatten(),
nn.Linear(self.actor_input_dims, 32),
nn.ReLU(),
nn.Linear(32, self.actor_output_dims),
)
self._current_state: Optional[Tensor] = None
self._previous_state: Optional[Tensor] = None
self._step = 0
self.optimizer = torch.optim.Adam(self.actor.parameters(), lr=self.hparams.learning_rate)
self.optimizer_critic = torch.optim.Adam(
self.critic.parameters(), lr=self.hparams.learning_rate
)
def forward(
self, observations: ContinualRLSetting.Observations, representations: Tensor
) -> PolicyHeadOutput:
# NOTE: Here we could probably use either as the 'state':
# state = observations.x
# state = representations
representations = representations.float()
if len(representations.shape) != 2:
representations = representations.reshape([-1, self.actor_input_dims])
self._previous_state = self._current_state
self._current_state = representations
# TODO: Actually implement the actor-critic forward pass.
# predicted_reward = self.critic([state, action])
# Do we want to detach the representations? or not?
logits = self.actor(representations)
# The policy is the distribution over actions given the current state.
action_dist = Categorical(logits=logits)
if action_dist.has_rsample:
sample = action_dist.rsample()
else:
sample = action_dist.sample()
actions = PolicyHeadOutput(
y_pred=sample,
logits=logits,
action_dist=action_dist,
)
return actions
def get_loss(
self,
forward_pass: ForwardPass,
actions: PolicyHeadOutput,
rewards: ContinualRLSetting.Rewards,
) -> Loss:
action_dist: Categorical = actions.action_dist
rewards = rewards.to(device=actions.device)
env_reward = torch.as_tensor(rewards.y, device=actions.device)
observations: ContinualRLSetting.Observations = forward_pass.observations
done = observations.done
assert done is not None, "Need the end-of-episode signal!"
done = torch.as_tensor(done, device=actions.device)
assert self._current_state is not None
if self._previous_state is None:
# Only allow this once!
assert self._step == 0
self._previous_state = self._current_state
self._step += 1
# TODO: Need to detach something here, right?
advantage: Tensor = (
env_reward
+ (~done) * self.hparams.gamma * self.critic(self._current_state)
- self.critic(self._previous_state) # detach previous representations?
)
total_loss = Loss(self.name)
if self.training:
self.optimizer_critic.zero_grad()
critic_loss_tensor = (advantage**2).mean()
critic_loss = Loss("critic", loss=critic_loss_tensor)
if self.training:
critic_loss_tensor.backward()
self.optimizer_critic.step()
total_loss += critic_loss.detach()
if self.training:
self.optimizer.zero_grad()
actor_loss_tensor = -action_dist.log_prob(actions.action) * advantage.detach()
actor_loss_tensor = actor_loss_tensor.mean()
actor_loss = Loss("actor", loss=actor_loss_tensor)
if self.training:
actor_loss_tensor.backward()
self.optimizer.step()
total_loss += actor_loss.detach()
return total_loss
def concat_obs_and_action(observation_action: Tuple[Tensor, Tensor]) -> Tensor:
observation, action = observation_action
batch_size = observation.shape[0]
observation = observation.reshape([batch_size, -1])
action = action.reshape([batch_size, -1])
return torch.cat([observation, action], dim=-1)
================================================
FILE: sequoia/methods/models/output_heads/rl/episodic_a2c.py
================================================
""" TODO: IDEA: Similar to ActorCriticHead, but episodic, i.e. only gives a Loss at
the end of the episode, rather than at each step.
"""
from dataclasses import dataclass
from typing import ClassVar, Deque, List, Optional
import numpy as np
import torch
from gym import spaces
from torch import Tensor, nn
from torch.nn import functional as F
from sequoia.common import Loss
from sequoia.common.hparams import categorical, uniform
from sequoia.common.metrics.rl_metrics import EpisodeMetrics
from sequoia.settings import ContinualRLSetting
from sequoia.settings.base import Rewards
from sequoia.utils import get_logger
from .policy_head import PolicyHead, PolicyHeadOutput, normalize
logger = get_logger(__name__)
@dataclass(frozen=True)
class A2CHeadOutput(PolicyHeadOutput):
"""Output produced by the A2C output head."""
# The value estimate coming from the critic.
value: Tensor
class EpisodicA2C(PolicyHead):
"""Advantage-Actor-Critic output head that produces a loss only at end of
episode.
TODO: This could actually produce a loss every N steps, rather than just at
the end of the episode.
"""
name: ClassVar[str] = "episodic_a2c"
@dataclass
class HParams(PolicyHead.HParams):
"""Hyper-parameters of the episodic A2C output head."""
# Wether to normalize the advantages for each episode.
normalize_advantages: bool = categorical(True, False, default=False)
actor_loss_coef: float = uniform(0.1, 1, default=0.5)
critic_loss_coef: float = uniform(0.1, 1, default=0.5)
entropy_loss_coef: float = uniform(0, 1, default=0.1)
# Maximum norm of the policy gradient.
max_policy_grad_norm: Optional[float] = None
# The discount factor.
gamma: float = uniform(0.9, 0.999, default=0.99)
def __init__(
self,
input_space: spaces.Box,
action_space: spaces.Discrete,
reward_space: spaces.Box,
hparams: HParams = None,
name: str = "episodic_a2c",
):
super().__init__(
input_space=input_space,
action_space=action_space,
reward_space=reward_space,
hparams=hparams,
name=name,
)
self.hparams: EpisodicA2C.HParams
# Critic takes in state-action pairs? or just state?
self.critic_input_dims = self.input_size
# self.critic_input_dims = self.input_size + action_dims
self.critic_output_dims = 1
self.critic = self.make_dense_network(
in_features=self.critic_input_dims,
hidden_neurons=self.hparams.hidden_neurons,
out_features=self.critic_output_dims,
activation=self.hparams.activation,
)
self.actions: List[Deque[A2CHeadOutput]]
self._current_state: Optional[Tensor] = None
self._previous_state: Optional[Tensor] = None
self._step = 0
@property
def actor(self) -> nn.Module:
return self.dense
def forward(
self, observations: ContinualRLSetting.Observations, representations: Tensor
) -> A2CHeadOutput:
actions: PolicyHeadOutput = super().forward(observations, representations)
# TODO: Shouldn't the critic also take the actor's action as an input?
value = self.critic(representations)
# We just need to add the value to the actions of the PolicyHead.
# This works, because `self.actor` :== `self.dense`, which is what's used by
# the PolicyHead.
actions = A2CHeadOutput(
y_pred=actions.y_pred,
logits=actions.logits,
action_dist=actions.action_dist,
value=value,
)
return actions
def num_stored_steps(self, env_index: int) -> Optional[int]:
"""Returns the number of steps stored in the buffer for the given
environment index.
If there are no buffers for the given env, returns None
"""
if not self.actions or env_index >= len(self.actions):
return None
return len(self.actions[env_index])
def get_episode_loss(self, env_index: int, done: bool) -> Optional[Loss]:
# IDEA: Actually, now that I think about it, instead of detaching the
# tensors, we could instead use the critic's 'value' estimate and get a
# loss for that incomplete episode using the tensors in the buffer,
# rather than detaching them!
if not done:
return None
# TODO: Add something like a 'num_steps_since_update' for each env? (it
# would actually be a num_steps_since_backward)
# if self.num_steps_since_update?
n_stored_steps = self.num_stored_steps(env_index)
if n_stored_steps < 5:
# For now, we only give back a loss at the end of the episode.
# TODO: Test if giving back a loss at each step or every few steps
# would work better!
logger.warning(
RuntimeWarning(
f"Returning None as the episode loss, because only have "
f"{n_stored_steps} steps stored for that environment."
)
)
return None
inputs: Tensor
actions: A2CHeadOutput
rewards: Rewards
inputs, actions, rewards = self.stack_buffers(env_index)
logits: Tensor = actions.logits
action_log_probs: Tensor = actions.action_log_prob
values: Tensor = actions.value
assert rewards.y is not None
episode_rewards: Tensor = rewards.y
# target values are calculated backward
# it's super important to handle correctly done states,
# for those cases we want our to target to be equal to the reward only
episode_length = len(episode_rewards)
dones = torch.zeros(episode_length, dtype=torch.bool)
dones[-1] = bool(done)
returns = self.get_returns(episode_rewards, gamma=self.hparams.gamma).type_as(values)
advantages = returns - values
# Normalize advantage (not present in the original implementation)
if self.hparams.normalize_advantages:
advantages = normalize(advantages)
# Create the Loss to be returned.
loss = Loss(self.name)
# Policy gradient loss (actor loss)
policy_gradient_loss = -(advantages.detach() * action_log_probs).mean()
actor_loss = Loss("actor", policy_gradient_loss)
loss += self.hparams.actor_loss_coef * actor_loss
# Value loss: Try to get the critic's values close to the actual return,
# which means the advantages should be close to zero.
value_loss_tensor = F.mse_loss(values, returns.reshape(values.shape))
critic_loss = Loss("critic", value_loss_tensor)
loss += self.hparams.critic_loss_coef * critic_loss
# Entropy loss, to "favor exploration".
entropy_loss_tensor = -actions.action_dist.entropy().mean()
entropy_loss = Loss("entropy", entropy_loss_tensor)
loss += self.hparams.entropy_loss_coef * entropy_loss
if done:
episode_rewards_array = episode_rewards.reshape([-1])
loss.metric = EpisodeMetrics(
n_samples=1,
mean_episode_reward=float(episode_rewards_array.sum()),
mean_episode_length=len(episode_rewards_array),
)
loss.metrics["gradient_usage"] = self.get_gradient_usage_metrics(env_index)
return loss
def optimizer_step(self):
# Clip grad norm if desired.
if self.hparams.max_policy_grad_norm is not None:
original_norm: Tensor = torch.nn.utils.clip_grad_norm_(
self.actor.parameters(),
self.hparams.max_policy_grad_norm,
)
self.loss.metrics["policy_gradient_norm"] = original_norm.item()
super().optimizer_step()
def compute_returns_and_advantage(self, last_values: Tensor, dones: np.ndarray) -> None:
"""
TODO: Adapting this snippet from SB3's common/buffers.py RolloutBuffer.
Post-processing step: compute the returns (sum of discounted rewards)
and GAE advantage.
Adapted from Stable-Baselines PPO2.
Uses Generalized Advantage Estimation (https://arxiv.org/abs/1506.02438)
to compute the advantage. To obtain vanilla advantage (A(s) = R - V(S))
where R is the discounted reward with value bootstrap,
set ``gae_lambda=1.0`` during initialization.
:param last_values:
:param dones:
"""
buffer_size: int = self.buffer_size
dones: np.ndarray = self.dones
rewards: np.ndarray = self.rewards
values: np.ndarray = self.values
gamma: float = self.gamma
gae_lambda: float = 1.0
# convert to numpy
last_values = last_values.clone().cpu().numpy().flatten()
advantages = np.zeros_like(rewards)
last_gae_lam = 0
for step in reversed(range(buffer_size)):
if step == buffer_size - 1:
next_non_terminal = 1.0 - dones
next_values = last_values
else:
next_non_terminal = 1.0 - dones[step + 1]
next_values = values[step + 1]
delta = rewards[step] + gamma * next_values * next_non_terminal - values[step]
last_gae_lam = delta + gamma * gae_lambda * next_non_terminal * last_gae_lam
self.advantages[step] = last_gae_lam
self.returns = self.advantages + self.values
================================================
FILE: sequoia/methods/models/output_heads/rl/episodic_a2c_test.py
================================================
from functools import partial
from typing import Callable, Optional, Sequence
import gym
import numpy as np
import pytest
import torch
from gym import spaces
from gym.spaces.utils import flatdim
from gym.vector import SyncVectorEnv
from gym.vector.utils import batch_space
from torch import Tensor, nn
from sequoia.common.gym_wrappers import AddDoneToObservation, ConvertToFromTensors, EnvDataset
from sequoia.common.loss import Loss
from sequoia.conftest import DummyEnvironment
from sequoia.methods.models.forward_pass import ForwardPass
from sequoia.settings.rl.continual import ContinualRLSetting
from .episodic_a2c import EpisodicA2C
from .policy_head import PolicyHead
class FakeEnvironment(SyncVectorEnv):
def __init__(
self,
env_fn: Callable[[], gym.Env],
batch_size: int,
new_episode_length: Callable[[int], int],
episode_lengths: Sequence[int] = None,
):
super().__init__([env_fn for _ in range(batch_size)])
self.new_episode_length = new_episode_length
self.batch_size = batch_size
self.episode_lengths = np.array(
episode_lengths or [new_episode_length(i) for i in range(self.num_envs)]
)
self.steps_left_in_episode = self.episode_lengths.copy()
reward_space = spaces.Box(*self.reward_range, shape=())
self.single_reward_space = reward_space
self.reward_space = batch_space(reward_space, batch_size)
def step(self, actions):
self.steps_left_in_episode[:] -= 1
# obs, reward, done, info = super().step(actions)
obs = self.observation_space.sample()
reward = np.ones(self.batch_size)
assert not any(self.steps_left_in_episode < 0)
done = self.steps_left_in_episode == 0
info = np.array([{} for _ in range(self.batch_size)])
for env_index, env_done in enumerate(done):
if env_done:
next_episode_length = self.new_episode_length(env_index)
self.episode_lengths[env_index] = next_episode_length
self.steps_left_in_episode[env_index] = next_episode_length
return obs, reward, done, info
@pytest.mark.xfail(reason="TODO: Adapt this test for EpisodicA2C (copied form policy_head_test.py)")
@pytest.mark.parametrize("batch_size", [1, 2, 5])
def test_with_controllable_episode_lengths(batch_size: int, monkeypatch):
"""TODO: Test out the EpisodicA2C output head in a very controlled environment,
where we know exactly the lengths of each episode.
"""
env = FakeEnvironment(
partial(gym.make, "CartPole-v0"),
batch_size=batch_size,
episode_lengths=[5, *(10 for _ in range(batch_size - 1))],
new_episode_length=lambda env_index: 10,
)
env = AddDoneToObservation(env)
env = ConvertToFromTensors(env)
env = EnvDataset(env)
obs_space = env.single_observation_space
x_dim = flatdim(obs_space["x"])
# Create some dummy encoder.
encoder = nn.Linear(x_dim, x_dim)
representation_space = obs_space["x"]
output_head = EpisodicA2C(
input_space=representation_space,
action_space=env.single_action_space,
reward_space=env.single_reward_space,
hparams=PolicyHead.HParams(
max_episode_window_length=100,
min_episodes_before_update=1,
accumulate_losses_before_backward=False,
),
)
# TODO: Simplify the loss function somehow using monkeypatch so we know exactly what
# the loss should be at each step.
batch_size = env.batch_size
obs = env.reset()
step_done = np.zeros(batch_size, dtype=np.bool)
for step in range(200):
x, obs_done = obs
# The done from the obs should always be the same as the 'done' from the 'step' function.
assert np.array_equal(obs_done, step_done)
representations = encoder(x)
observations = ContinualRLSetting.Observations(
x=x,
done=obs_done,
)
actions_obj = output_head(observations, representations)
actions = actions_obj.y_pred
# TODO: kinda useless to wrap a single tensor in an object..
forward_pass = ForwardPass(
observations=observations,
representations=representations,
actions=actions,
)
obs, rewards, step_done, info = env.step(actions)
rewards_obj = ContinualRLSetting.Rewards(y=rewards)
loss = output_head.get_loss(
forward_pass=forward_pass,
actions=actions_obj,
rewards=rewards_obj,
)
print(f"Step {step}")
print(f"num episodes since update: {output_head.num_episodes_since_update}")
print(f"steps left in episode: {env.steps_left_in_episode}")
print(f"Loss for that step: {loss}")
if any(obs_done):
assert loss != 0.0
if step == 5.0:
# Env 0 first episode from steps 0 -> 5
assert loss.loss == 5.0
assert loss.metrics["gradient_usage"].used_gradients == 5.0
assert loss.metrics["gradient_usage"].wasted_gradients == 0.0
elif step == 10:
# Envs[1:batch_size], first episode, from steps 0 -> 10
# NOTE: At this point, both envs have reached the required number of episodes.
# This means that the gradient usage on the next time any env reaches
# an end-of-episode will be one less than the total number of items.
assert loss.loss == 10.0 * (batch_size - 1)
assert loss.metrics["gradient_usage"].used_gradients == 10.0 * (batch_size - 1)
assert loss.metrics["gradient_usage"].wasted_gradients == 0.0
elif step == 15:
# Env 0 second episode from steps 5 -> 15
assert loss.loss == 10.0
assert loss.metrics["gradient_usage"].used_gradients == 4
assert loss.metrics["gradient_usage"].wasted_gradients == 6
elif step == 20:
# Envs[1:batch_size]: second episode, from steps 0 -> 10
# NOTE: At this point, both envs have reached the required number of episodes.
# This means that the gradient usage on the next time any env reaches
# an end-of-episode will be one less than the total number of items.
assert loss.loss == 10.0 * (batch_size - 1)
assert loss.metrics["gradient_usage"].used_gradients == 9 * (batch_size - 1)
assert loss.metrics["gradient_usage"].wasted_gradients == 1 * (batch_size - 1)
elif step == 25:
# Env 0 third episode from steps 5 -> 15
assert loss.loss == 10.0
assert loss.metrics["gradient_usage"].used_gradients == 4
assert loss.metrics["gradient_usage"].wasted_gradients == 6
elif step > 0 and step % 10 == 0:
# Same pattern as step 20 above
assert loss.loss == 10.0 * (batch_size - 1), step
assert loss.metrics["gradient_usage"].used_gradients == 9 * (batch_size - 1)
assert loss.metrics["gradient_usage"].wasted_gradients == 1 * (batch_size - 1)
elif step > 0 and step % 5 == 0:
# Same pattern as step 25 above
assert loss.loss == 10.0
assert loss.metrics["gradient_usage"].used_gradients == 4
assert loss.metrics["gradient_usage"].wasted_gradients == 6
else:
assert loss.loss == 0.0, step
@pytest.mark.parametrize(
"batch_size",
[
1,
2,
5,
],
)
def test_loss_is_nonzero_at_episode_end(batch_size: int):
"""Test that when stepping through the env, when the episode ends, a
non-zero loss is returned by the output head.
"""
with gym.make("CartPole-v0") as temp_env:
temp_env = AddDoneToObservation(temp_env)
obs_space = temp_env.observation_space
action_space = temp_env.action_space
reward_space = getattr(
temp_env, "reward_space", spaces.Box(*temp_env.reward_range, shape=())
)
env = gym.vector.make("CartPole-v0", num_envs=batch_size, asynchronous=False)
env = AddDoneToObservation(env)
env = ConvertToFromTensors(env)
env = EnvDataset(env)
head = EpisodicA2C(
input_space=obs_space["x"],
action_space=action_space,
reward_space=reward_space,
hparams=EpisodicA2C.HParams(accumulate_losses_before_backward=False),
)
head.train()
env.seed(123)
obs = env.reset()
# obs = torch.as_tensor(obs, dtype=torch.float32)
done = torch.zeros(batch_size, dtype=bool)
info = np.array([{} for _ in range(batch_size)])
loss = None
non_zero_losses = 0
encoder = nn.Linear(4, 4)
encoder.train()
for i in range(100):
representations = encoder(obs["x"])
observations = ContinualRLSetting.Observations(
x=obs["x"],
done=done,
# info=info,
)
head_output = head.forward(observations, representations=representations)
actions = head_output.actions.numpy().tolist()
# actions = np.zeros(batch_size, dtype=int).tolist()
obs, rewards, done, info = env.step(actions)
done = torch.as_tensor(done, dtype=bool)
rewards = ContinualRLSetting.Rewards(rewards)
assert len(info) == batch_size
print(f"Step {i}, obs: {obs}, done: {done}, info: {info}")
forward_pass = ForwardPass(
observations=observations,
representations=representations,
actions=head_output,
)
loss = head.get_loss(forward_pass, actions=head_output, rewards=rewards)
print("loss:", loss)
assert observations.done is not None
for env_index, env_is_done in enumerate(observations.done):
if env_is_done:
print(f"Episode ended for env {env_index} at step {i}")
assert loss.loss != 0.0
non_zero_losses += 1
break
else:
print(f"No episode ended on step {i}, expecting no loss.")
assert loss is None or loss.loss == 0.0
assert non_zero_losses > 0
@pytest.mark.xfail(reason="TODO: Adapt this test for EpisodicA2C (copied form policy_head_test.py)")
@pytest.mark.parametrize("batch_size", [1, 2, 5])
def test_loss_is_nonzero_at_episode_end_iterate(batch_size: int):
"""Test that when *iterating* through the env (active-dataloader style),
when the episode ends, a non-zero loss is returned by the output head.
"""
with gym.make("CartPole-v0") as temp_env:
temp_env = AddDoneToObservation(temp_env)
obs_space = temp_env.observation_space
action_space = temp_env.action_space
reward_space = getattr(
temp_env, "reward_space", spaces.Box(*temp_env.reward_range, shape=())
)
env = gym.vector.make("CartPole-v0", num_envs=batch_size, asynchronous=False)
env = AddDoneToObservation(env)
env = ConvertToFromTensors(env)
env = EnvDataset(env)
head = EpisodicA2C(
# observation_space=obs_space,
input_space=obs_space["x"],
action_space=action_space,
reward_space=reward_space,
hparams=EpisodicA2C.HParams(accumulate_losses_before_backward=False),
)
env.seed(123)
non_zero_losses = 0
for i, obs in zip(range(100), env):
print(i, obs)
x = obs["x"]
done = obs[1]
representations = x
assert isinstance(x, Tensor)
assert isinstance(done, Tensor)
observations = ContinualRLSetting.Observations(
x=x,
done=done,
# info=info,
)
head_output = head.forward(observations, representations=representations)
actions = head_output.actions.numpy().tolist()
# actions = np.zeros(batch_size, dtype=int).tolist()
rewards = env.send(actions)
# print(f"Step {i}, obs: {obs}, done: {done}")
assert isinstance(representations, Tensor)
forward_pass = ForwardPass(
observations=observations,
representations=representations,
actions=head_output,
)
rewards = ContinualRLSetting.Rewards(rewards)
loss = head.get_loss(forward_pass, actions=head_output, rewards=rewards)
print("loss:", loss)
for env_index, env_is_done in enumerate(observations.done):
if env_is_done:
print(f"Episode ended for env {env_index} at step {i}")
assert loss.total_loss != 0.0
non_zero_losses += 1
break
else:
print(f"No episode ended on step {i}, expecting no loss.")
assert loss.total_loss == 0.0
assert non_zero_losses > 0
@pytest.mark.xfail(reason="TODO: Adapt this test for EpisodicA2C (copied form policy_head_test.py)")
@pytest.mark.xfail(reason="TODO: Fix this test")
def test_buffers_are_stacked_correctly(monkeypatch):
"""TODO: Test that when "de-synced" episodes, when fed to the output head,
get passed, re-stacked correctly, to the get_episode_loss function.
"""
batch_size = 5
starting_values = [i for i in range(batch_size)]
targets = [10 for i in range(batch_size)]
env = SyncVectorEnv(
[
partial(DummyEnvironment, start=start, target=target, max_value=10 * 2)
for start, target in zip(starting_values, targets)
]
)
obs = env.reset()
assert obs.tolist() == list(range(batch_size))
reward_space = spaces.Box(*env.reward_range, shape=())
output_head = PolicyHead( # observation_space=spaces.Tuple([env.observation_space,
# spaces.Box(False, True, [batch_size], np.bool)]),
input_space=spaces.Box(0, 1, (1,)),
action_space=env.single_action_space,
reward_space=reward_space,
)
# Set the max window length, for testing.
output_head.hparams.max_episode_window_length = 100
obs = initial_obs = env.reset()
done = np.zeros(batch_size, dtype=bool)
obs = torch.from_numpy(obs)
done = torch.from_numpy(done)
def mock_get_episode_loss(
self: PolicyHead,
env_index: int,
inputs: Tensor,
actions: ContinualRLSetting.Observations,
rewards: ContinualRLSetting.Rewards,
done: bool,
) -> Optional[Loss]:
print(f"Environment at index {env_index}, episode ended: {done}")
if done:
print(f"Full episode: {inputs}")
else:
print(f"Episode so far: {inputs}")
n_observations = len(inputs)
assert inputs.flatten().tolist() == (env_index + np.arange(n_observations)).tolist()
if done:
# Unfortunately, we don't get the final state, because of how
# VectorEnv works atm.
assert inputs[-1] == targets[env_index] - 1
monkeypatch.setattr(PolicyHead, "get_episode_loss", mock_get_episode_loss)
# perform 10 iterations, incrementing each DummyEnvironment's counter at
# each step (action of 1).
# Therefore, at first, the counters should be [0, 1, 2, ... batch-size-1].
info = [{} for _ in range(batch_size)]
for step in range(10):
print(f"Step {step}.")
# Wrap up the obs to pretend that this is the data coming from a
# ContinualRLSetting.
observations = ContinualRLSetting.Observations(x=obs, done=done) # , info=info)
# We don't use an encoder for testing, so the representations is just x.
representations = obs.reshape([batch_size, 1])
assert observations.task_labels is None
actions = output_head(observations.float(), representations.float())
# Wrap things up to pretend like the output head is being used in the
# BaseModel:
forward_pass = ForwardPass(
observations=observations,
representations=representations,
actions=actions,
)
action_np = actions.actions_np
obs, rewards, done, info = env.step(action_np)
obs = torch.from_numpy(obs)
rewards = torch.from_numpy(rewards)
done = torch.from_numpy(done)
rewards = ContinualRLSetting.Rewards(y=rewards)
loss = output_head.get_loss(forward_pass, actions=actions, rewards=rewards)
# Check the contents of the episode buffers.
assert len(output_head.representations) == batch_size
for env_index in range(batch_size):
# obs_buffer = output_head.observations[env_index]
representations_buffer = output_head.representations[env_index]
action_buffer = output_head.actions[env_index]
reward_buffer = output_head.rewards[env_index]
if step >= batch_size:
if step + env_index == targets[env_index]:
assert len(representations_buffer) == 1 and output_head.done[env_index] == False
# if env_index == step - batch_size:
continue
assert len(representations_buffer) == step + 1
# Check to see that the last entry in the episode buffer for this
# environment corresponds to the slice of the most recent
# observations/actions/rewards at the index corresponding to this
# environment.
# observation_tuple = input_buffer[-1]
step_action = action_buffer[-1]
step_reward = reward_buffer[-1]
# assert observation_tuple.x == observations.x[env_index]
# assert observation_tuple.task_labels is None
# assert observation_tuple.done == observations.done[env_index]
# The last element in the buffer should be the slice in the batch
# for that environment.
assert step_action.y_pred == actions.y_pred[env_index]
assert step_reward.y == rewards.y[env_index]
if step < batch_size:
assert obs.tolist() == (np.arange(batch_size) + step + 1).tolist()
# if step >= batch_size:
# if step + env_index == targets[env_index]:
# assert done
# assert False, (obs, rewards, done, info)
# loss: Loss = output_head.get_loss(forward_pass, actions=actions, rewards=rewards)
================================================
FILE: sequoia/methods/models/output_heads/rl/policy_head.py
================================================
""" Defines a (hopefully general enough) Output Head class to be used by the
BaseMethod when applied on an RL setting.
NOTE: The training procedure is fundamentally on-policy atm, i.e. the
observation is a single state, not a rollout, and the reward is the
immediate reward at the current step.
Therefore, what we do here is to first split things up and push the
observations/actions/rewards into a per-environment buffer, of max
length `self.hparams.max_episode_window_length`. These buffers get
cleared when starting a new episode in their corresponding environment.
The contents of this buffer are then rearranged and presented to the
`get_episode_loss` method in order to get a loss for the given episode.
The `get_episode_loss` method is also given the environment index, and
is passed a boolean `done` that indicates wether the last
items in the sequences it received mark the end of the episode.
TODO: My hope is that this will allow us to implement RL methods that
need a complete episode in order to give a loss to train with, as well
as methods (like A2C, I think) which can give a Loss even when the
episode isn't over yet.
Also, standard supervised learning could be recovered by setting the
maximum length of the 'episode buffer' to 1, and consider all
observations as final, i.e., when episode length == 1
"""
from collections import deque
from dataclasses import dataclass
from typing import ClassVar, Deque, List, Optional, Sequence, Tuple, TypeVar, Union
import numpy as np
import torch
from gym import spaces
from gym.spaces.utils import flatdim
from simple_parsing import list_field
from torch import Tensor
from sequoia.common import Loss
from sequoia.common.metrics.rl_metrics import EpisodeMetrics, GradientUsageMetric
from sequoia.methods.models.forward_pass import ForwardPass
from sequoia.settings.rl.continual import ContinualRLSetting
from sequoia.utils.categorical import Categorical
from sequoia.utils.generic_functions import stack
from sequoia.utils.logging_utils import get_logger
from sequoia.utils.utils import flag
from ..classification_head import ClassificationHead, ClassificationOutput
logger = get_logger(__name__)
T = TypeVar("T")
@dataclass(frozen=True)
class PolicyHeadOutput(ClassificationOutput):
"""WIP: Adds the action pdf to ClassificationOutput."""
# The distribution over the actions, either as a single
# (batched) distribution or as a list of distributions, one for each
# environment in the batch.
action_dist: Categorical
@property
def y_pred_prob(self) -> Tensor:
"""returns the probabilities for the chosen actions/predictions."""
return self.action_dist.probs(self.y_pred)
@property
def y_pred_log_prob(self) -> Tensor:
"""returns the log probabilities for the chosen actions/predictions."""
return self.action_dist.log_prob(self.y_pred)
@property
def action_log_prob(self) -> Tensor:
return self.y_pred_log_prob
@property
def action_prob(self) -> Tensor:
return self.y_pred_log_prob
## NOTE: Since the gym VectorEnvs actually auto-reset the individual
## environments (and also discard the final state, for some weird
## reason), I added a way to save it into the 'info' dict at the key
## 'final_state'. Assuming that the env this output head gets applied
## on adds the info dict to the observations (using the
## AddInfoToObservations wrapper, for instance), then the 'final'
## observation would be stored in the dict for this environment in
## the Observations object, while the 'observation' you get from step
## is the 'initial' observation of the new episode.
class PolicyHead(ClassificationHead):
"""[WIP] Output head for RL settings.
Uses the REINFORCE algorithm to calculate its loss.
TODOs/issues:
- Only currently works with batch_size == 1
- The buffers are common to training/validation/testing atm..
"""
name: ClassVar[str] = "policy"
@dataclass
class HParams(ClassificationHead.HParams):
hidden_layers: int = 0
hidden_neurons: List[int] = list_field()
# The discount factor for the Return term.
gamma: float = 0.99
# The maximum length of the buffer that will hold the most recent
# states/actions/rewards of the current episode.
max_episode_window_length: int = 1000
# Minumum number of epidodes that need to be completed in each env
# before we update the parameters of the output head.
min_episodes_before_update: int = 1
# TODO: Add this mechanism, so that this method could work even when
# episodes are very long.
max_steps_between_updates: Optional[int] = None
# NOTE: Here we have two options:
# 1- `True`: sum up all the losses and do one larger backward pass,
# and have `retrain_graph=False`, or
# 2- `False`: Perform multiple little backward passes, one for each
# end-of-episode in a single env, w/ `retain_graph=True`.
# Option 1 is maybe more performant, as it might only require
# unrolling the graph once, but would use more memory to store all the
# intermediate graphs.
accumulate_losses_before_backward: bool = flag(True)
def __init__(
self,
input_space: spaces.Space,
action_space: spaces.Discrete,
reward_space: spaces.Box,
hparams: "PolicyHead.HParams" = None,
name: str = "policy",
):
assert isinstance(
input_space, spaces.Box
), f"Only support Tensor (box) input space. (got {input_space})."
assert isinstance(
action_space, spaces.Discrete
), f"Only support discrete action space (got {action_space})."
assert isinstance(
reward_space, spaces.Box
), f"Reward space should be a Box (scalar rewards) (got {reward_space})."
super().__init__(
input_space=input_space,
action_space=action_space,
reward_space=reward_space,
hparams=hparams,
name=name,
)
logger.debug("New Output head with hparams: " + self.hparams.dumps_json(indent="\t"))
self.hparams: PolicyHead.HParams
# Type hints for the spaces;
self.input_space: spaces.Box
self.action_space: spaces.Discrete
self.reward_space: spaces.Box
# List of buffers for each environment that will hold some items.
# TODO: Won't use the 'observations' anymore, will only use the
# representations from the encoder, so renaming 'representations' to
# 'observations' in this case.
# (Should probably come up with another name so this isn't ambiguous).
# TODO: Perhaps we should register these as buffers so they get
# persisted correclty? But then we also need to make sure that the grad
# stuff would work the same way..
self.representations: List[Deque[Tensor]] = []
# self.representations: List[deque] = []
self.actions: List[Deque[PolicyHeadOutput]] = []
self.rewards: List[Deque[ContinualRLSetting.Rewards]] = []
# The actual "internal" loss we use for training.
self.loss: Loss = Loss(self.name)
self.batch_size: int = 0
self.num_episodes_since_update: np.ndarray = np.zeros(1)
self.num_steps_in_episode: np.ndarray = np.zeros(1)
self._training: bool = True
self.device: Optional[Union[str, torch.device]] = None
def create_buffers(self):
"""Creates the buffers to hold the items from each env."""
logger.debug(f"Creating buffers (batch size={self.batch_size})")
logger.debug(f"Maximum buffer length: {self.hparams.max_episode_window_length}")
self.representations = self._make_buffers()
self.actions = self._make_buffers()
self.rewards = self._make_buffers()
self.num_steps_in_episode = np.zeros(self.batch_size, dtype=int)
self.num_episodes_since_update = np.zeros(self.batch_size, dtype=int)
def forward(
self, observations: ContinualRLSetting.Observations, representations: Tensor
) -> PolicyHeadOutput:
"""Forward pass of a Policy head.
TODO: Do we actually need the observations here? It is here so we have
access to the 'done' from the env, but do we really need it here? or
would there be another (cleaner) way to do this?
"""
if len(representations.shape) < 2:
# Flatten the representations.
representations = representations.reshape([-1, flatdim(self.input_space)])
# Setup the buffers, which will hold the most recent observations,
# actions and rewards within the current episode for each environment.
if not self.batch_size:
self.batch_size = representations.shape[0]
self.create_buffers()
representations = representations.float()
logits = self.dense(representations)
# The policy is the distribution over actions given the current state.
action_dist = Categorical(logits=logits)
sample = action_dist.sample()
actions = PolicyHeadOutput(
y_pred=sample,
logits=logits,
action_dist=action_dist,
)
return actions
T = TypeVar("T")
def to(self: T, device: Optional[Union[int, torch.device]] = None, **kwargs) -> T:
result = super().to(device=device, **kwargs)
if device is not None:
result.device = torch.device(device)
return result
def get_loss(
self,
forward_pass: ForwardPass,
actions: PolicyHeadOutput,
rewards: ContinualRLSetting.Rewards,
) -> Loss:
"""Given the forward pass, the actions produced by this output head and
the corresponding rewards for the current step, get a Loss to use for
training.
TODO: Replace the `forward_pass` argument with just `observations` and
`representations` and provide the right (augmented) observations to the
aux tasks. (Need to design that part later).
NOTE: If an end of episode was reached in a given environment, we always
calculate the losses and clear the buffers before adding in the new observation.
"""
observations: ContinualRLSetting.Observations = forward_pass.observations
representations: Tensor = forward_pass.representations
assert self.batch_size, "forward() should have been called before this."
if not self.hparams.accumulate_losses_before_backward:
# Reset the loss for the current step, if we're not accumulating it.
self.loss = Loss(self.name)
observations = forward_pass.observations
representations = forward_pass.representations
assert observations.done is not None, "need the end-of-episode signal"
# Calculate the loss for each environment.
for env_index, done in enumerate(observations.done):
env_loss = self.get_episode_loss(env_index, done=done)
if env_loss is not None:
self.loss += env_loss
if done:
# End of episode reached in that env!
if self.training:
# BUG: This seems to be failing, during testing:
# assert env_loss is not None, (self.name)
pass
self.on_episode_end(env_index)
if self.batch_size != forward_pass.batch_size:
raise NotImplementedError(
"TODO: The batch size changed, because the batch contains different "
"tasks. The BaseModel isn't yet applicable in the setup where "
"there are multiple different tasks in the same batch in RL. "
)
# IDEA: Need to get access to the 'original' env indices (before slicing),
# so that even when one more environment is in this task, the other
# environment's buffers remain at the same index.. Something like a
# remapping of env indices?
assert len(representations.shape) == 2, (
f"Need batched representations, with a shape [16, 128] or similar, but "
f"representations have shape {representations.shape}."
)
self.batch_size = representations.shape[0]
self.create_buffers()
for env_index in range(self.batch_size):
# Take a slice across the first dimension
# env_observations = get_slice(observations, env_index)
env_representations = representations[env_index]
env_actions = actions.slice(env_index)
# env_actions = actions[env_index, ...] # TODO: Is this nicer?
env_rewards = rewards.slice(env_index)
# BUG: Seems to be some issue of things in the buffers not all being on the
# same device
# assert self.device is not None
# # TODO: Should we be storing these tensors in GPU memory though? Not sure if
# # this makes sense.
# env_representations = move(env_representations, device=self.device)
# env_actions = move(env_actions, device=self.device)
# env_rewards = move(env_rewards, device=self.device)
self.representations[env_index].append(env_representations)
self.actions[env_index].append(env_actions)
self.rewards[env_index].append(env_rewards)
self.num_steps_in_episode += 1
# TODO:
# If we want to accumulate the losses before backward, then we just return self.loss
# If we DONT want to accumulate the losses before backward, then we do the
# 'small' backward pass, and return a detached loss.
if self.hparams.accumulate_losses_before_backward:
if all(self.num_episodes_since_update >= self.hparams.min_episodes_before_update):
# Every environment has seen the required number of episodes.
# We return the accumulated loss, so that the model can do the backward
# pass and update the weights.
returned_loss = self.loss
self.loss = Loss(self.name)
self.detach_all_buffers()
self.num_episodes_since_update[:] = 0
return returned_loss
return Loss(self.name)
# Perform the backward pass as soon as a loss is available (with
# retain_graph=True).
if all(self.num_episodes_since_update >= self.hparams.min_episodes_before_update):
# Every environment has seen the required number of episodes.
# We return the loss for this step, with gradients, to indicate to the
# Model that it can perform the backward pass and update the weights.
returned_loss = self.loss
self.loss = Loss(self.name)
self.detach_all_buffers()
self.num_episodes_since_update[:] = 0
return returned_loss
if self.loss.requires_grad:
# Not all environments are done, but we have a Loss from one of them.
self.loss.backward(retain_graph=True)
# self.loss will be reset at each step in the `forward` method above.
return self.loss.detach()
# TODO: Why is self.loss non-zero here?
if self.loss.loss != 0.0:
# BUG: This is a weird edge-case, where at least one env produced
# a loss, but that loss doesn't require grad.
# This should only happen if the model isn't in training mode, for
# instance.
# assert not self.training, self.loss
# return self.loss
pass
return self.loss
def on_episode_end(self, env_index: int) -> None:
self.num_episodes_since_update[env_index] += 1
self.num_steps_in_episode[env_index] = 0
self.clear_buffers(env_index)
def get_episode_loss(self, env_index: int, done: bool) -> Optional[Loss]:
"""Calculate a loss to train with, given the last (up to
max_episode_window_length) observations/actions/rewards of the current
episode in the environment at the given index in the batch.
If `done` is True, then this is for the end of an episode. If `done` is
False, the episode is still underway.
NOTE: While the Batch Observations/Actions/Rewards objects usually
contain the "batches" of data coming from the N different environments,
now they are actually a sequence of items coming from this single
environment. For more info on how this is done, see the
"""
inputs: Tensor
actions: PolicyHeadOutput
rewards: ContinualRLSetting.Rewards
if not done:
# This particular algorithm (REINFORCE) can't give a loss until the
# end of the episode is reached.
return None
if len(self.actions[env_index]) == 0:
logger.error(
f"Weird, asked to get episode loss, but there is " f"nothing in the buffer?"
)
return None
inputs, actions, rewards = self.stack_buffers(env_index)
episode_length = actions.batch_size
assert len(inputs) == len(actions.y_pred) == len(rewards.y)
if episode_length <= 1:
# TODO: If the episode has len of 1, we can't really get a loss!
logger.error("Episode is too short!")
return None
log_probabilities = actions.y_pred_log_prob
rewards = rewards.y
loss_tensor = self.policy_gradient(
rewards=rewards,
log_probs=log_probabilities,
gamma=self.hparams.gamma,
)
loss = Loss(self.name, loss_tensor)
loss.metric = EpisodeMetrics(
n_samples=1,
mean_episode_reward=float(rewards.sum()),
mean_episode_length=len(rewards),
)
# TODO: add something like `add_metric(self, metric: Metrics, name: str=None)`
# to `Loss`.
loss.metrics["gradient_usage"] = self.get_gradient_usage_metrics(env_index)
return loss
def get_gradient_usage_metrics(self, env_index: int) -> GradientUsageMetric:
"""Returns a Metrics object that describes how many of the actions
from an episode that are used to calculate a loss still have their
graphs, versus ones that don't have them (due to being created before
the last model update, and therefore having been detached.)
Does this by inspecting the contents of `self.actions[env_index]`.
"""
episode_actions = self.actions[env_index]
n_stored_items = len(self.actions[env_index])
n_items_with_grad = sum(v.logits.requires_grad for v in episode_actions)
n_items_without_grad = n_stored_items - n_items_with_grad
return GradientUsageMetric(
used_gradients=n_items_with_grad,
wasted_gradients=n_items_without_grad,
)
@staticmethod
def get_returns(rewards: Union[Tensor, List[Tensor]], gamma: float) -> Tensor:
"""Calculates the returns, as the sum of discounted future rewards at
each step.
"""
return discounted_sum_of_future_rewards(rewards, gamma=gamma)
@staticmethod
def policy_gradient(
rewards: List[float], log_probs: Union[Tensor, List[Tensor]], gamma: float = 0.95
):
"""Implementation of the REINFORCE algorithm.
Adapted from https://medium.com/@thechrisyoon/deriving-policy-gradients-and-implementing-reinforce-f887949bd63
Parameters
----------
- episode_rewards : List[Tensor]
The rewards at each step in an episode
- episode_log_probs : List[Tensor]
The log probabilities associated with the actions that were taken at
each step.
Returns
-------
Tensor
The "vanilla policy gradient" / REINFORCE gradient resulting from
that episode.
"""
return vanilla_policy_gradient(rewards, log_probs, gamma=gamma)
@property
def training(self) -> bool:
return self._training
@training.setter
def training(self, value: bool) -> None:
# logger.debug(f"setting training to {value} on the Policy output head")
if hasattr(self, "_training") and value != self._training:
before = "train" if self._training else "test"
after = "train" if value else "test"
logger.debug(
f"Clearing buffers, since we're transitioning between from {before}->{after}"
)
self.clear_all_buffers()
self.batch_size = None
self.num_episodes_since_update[:] = 0
self._training = value
def clear_all_buffers(self) -> None:
if self.batch_size is None:
assert not self.rewards
assert not self.representations
assert not self.actions
return
for env_id in range(self.batch_size):
self.clear_buffers(env_id)
self.rewards.clear()
self.representations.clear()
self.actions.clear()
self.batch_size = None
def clear_buffers(self, env_index: int) -> None:
"""Clear the buffers associated with the environment at env_index."""
self.representations[env_index].clear()
self.actions[env_index].clear()
self.rewards[env_index].clear()
def detach_all_buffers(self):
if not self.batch_size:
assert not self.actions
# No buffers to detach!
return
for env_index in range(self.batch_size):
self.detach_buffers(env_index)
def detach_buffers(self, env_index: int) -> None:
"""Detach all the tensors in the buffers for a given environment.
We have to do this when we update the model while an episode in one of
the enviroment isn't done.
"""
# detached_representations = map(detach, )
# detached_actions = map(detach, self.actions[env_index])
# detached_rewards = map(detach, self.rewards[env_index])
self.representations[env_index] = self._detach_buffer(self.representations[env_index])
self.actions[env_index] = self._detach_buffer(self.actions[env_index])
self.rewards[env_index] = self._detach_buffer(self.rewards[env_index])
# assert False, (self.representations[0], self.representations[-1])
def _detach_buffer(self, old_buffer: Sequence[Tensor]) -> deque:
new_items = self._make_buffer()
for item in old_buffer:
detached = item.detach()
new_items.append(detached)
return new_items
def _make_buffer(self, elements: Sequence[T] = None) -> Deque[T]:
buffer: Deque[T] = deque(maxlen=self.hparams.max_episode_window_length)
if elements:
buffer.extend(elements)
return buffer
def _make_buffers(self) -> List[deque]:
return [self._make_buffer() for _ in range(self.batch_size)]
def stack_buffers(self, env_index: int):
"""Stack the observations/actions/rewards for this env and return them."""
# episode_observations = tuple(self.observations[env_index])
episode_representations = tuple(self.representations[env_index])
episode_actions = tuple(self.actions[env_index])
episode_rewards = tuple(self.rewards[env_index])
assert len(episode_representations)
assert len(episode_actions)
assert len(episode_rewards)
# BUG: Need to make sure that all tensors are on the same device:
# assert self.device is not None
# episode_representations = [
# move(item, device=self.device) for item in episode_representations
# ]
# episode_actions = [
# move(item, device=self.device) for item in episode_actions
# ]
# episode_rewards = [
# move(item, device=self.device) for item in episode_rewards
# ]
stacked_inputs = stack(episode_representations)
stacked_actions = stack(episode_actions)
stacked_rewards = stack(episode_rewards)
return stacked_inputs, stacked_actions, stacked_rewards
def discounted_sum_of_future_rewards(rewards: Union[Tensor, List[Tensor]], gamma: float) -> Tensor:
"""Calculates the returns, as the sum of discounted future rewards at
each step.
"""
T = len(rewards)
if not isinstance(rewards, Tensor):
rewards = torch.as_tensor(rewards)
# Construct a reward matrix, with previous rewards masked out (with each
# row as a step along the trajectory).
reward_matrix = rewards.expand([T, T]).triu()
# Get the gamma matrix (upper triangular), see make_gamma_matrix for
# more info.
gamma_matrix = make_gamma_matrix(gamma, T, device=reward_matrix.device)
# Multiplying by the gamma coefficients gives the discounted rewards.
discounted_rewards = reward_matrix * gamma_matrix
# Summing up over time gives the return at each step.
return discounted_rewards.sum(-1)
def vanilla_policy_gradient(
rewards: Sequence[float], log_probs: Union[Tensor, List[Tensor]], gamma: float = 0.95
):
"""Implementation of the REINFORCE algorithm.
Adapted from https://medium.com/@thechrisyoon/deriving-policy-gradients-and-implementing-reinforce-f887949bd63
Parameters
----------
- episode_rewards : Sequence[float]
The rewards at each step in an episode
- episode_log_probs : List[Tensor]
The log probabilities associated with the actions that were taken at
each step.
Returns
-------
Tensor
The "vanilla policy gradient" / REINFORCE gradient resulting from
that episode.
"""
if isinstance(log_probs, Tensor):
action_log_probs = log_probs
else:
action_log_probs = torch.stack(log_probs)
reward_tensor = torch.as_tensor(rewards).type_as(action_log_probs)
returns = PolicyHead.get_returns(reward_tensor, gamma=gamma)
# Need both tensors to be 1-dimensional for the dot-product below.
action_log_probs = action_log_probs.reshape(returns.shape)
policy_gradient = -action_log_probs.dot(returns)
return policy_gradient
# @torch.jit.script
# @lru_cache()
def make_gamma_matrix(gamma: float, T: int, device=None) -> Tensor:
"""
Create an upper-triangular matrix [T, T] with the gamma factors,
starting at 1.0 on the diagonal, and decreasing exponentially towards
the right.
"""
gamma_matrix = torch.empty([T, T]).triu_()
# Neat indexing trick to fill up the upper triangle of the matrix:
rows, cols = torch.triu_indices(T, T)
# Precompute all the powers of gamma in range [0, T]
all_gammas = gamma ** torch.arange(T)
# Put the right value at each entry in the upper triangular matrix.
gamma_matrix[rows, cols] = all_gammas[cols - rows]
return gamma_matrix.to(device) if device else gamma_matrix
def normalize(x: Tensor):
return (x - x.mean()) / (x.std() + 1e-9)
T = TypeVar("T")
def tuple_of_lists(list_of_tuples: List[Tuple[T, ...]]) -> Tuple[List[T], ...]:
return tuple(map(list, zip(*list_of_tuples)))
def list_of_tuples(tuple_of_lists: Tuple[List[T], ...]) -> List[Tuple[T, ...]]:
return list(zip(*tuple_of_lists))
================================================
FILE: sequoia/methods/models/output_heads/rl/policy_head_test.py
================================================
from functools import partial
from typing import Callable, Optional, Sequence
import gym
import numpy as np
import pytest
import torch
from gym import spaces
from gym.spaces.utils import flatdim
from gym.vector import SyncVectorEnv
from gym.vector.utils import batch_space
from torch import Tensor, nn
from sequoia.common.gym_wrappers import (
AddDoneToObservation,
ConvertToFromTensors,
EnvDataset,
PixelObservationWrapper,
)
from sequoia.common.loss import Loss
from sequoia.conftest import DummyEnvironment
from sequoia.methods.models.forward_pass import ForwardPass
from sequoia.settings.rl.continual import ContinualRLSetting
from sequoia.settings.rl.continual.make_env import make_batched_env
from .policy_head import PolicyHead
class FakeEnvironment(SyncVectorEnv):
def __init__(
self,
env_fn: Callable[[], gym.Env],
batch_size: int,
new_episode_length: Callable[[int], int],
episode_lengths: Sequence[int] = None,
):
super().__init__([env_fn for _ in range(batch_size)])
self.new_episode_length = new_episode_length
self.batch_size = batch_size
self.episode_lengths = np.array(
episode_lengths or [new_episode_length(i) for i in range(self.num_envs)]
)
self.steps_left_in_episode = self.episode_lengths.copy()
reward_space = spaces.Box(*self.reward_range, shape=())
self.single_reward_space = reward_space
self.reward_space = batch_space(reward_space, batch_size)
def step(self, actions):
self.steps_left_in_episode[:] -= 1
# obs, reward, done, info = super().step(actions)
obs = self.observation_space.sample()
reward = np.ones(self.batch_size)
assert not any(self.steps_left_in_episode < 0)
done = self.steps_left_in_episode == 0
info = np.array([{} for _ in range(self.batch_size)])
for env_index, env_done in enumerate(done):
if env_done:
next_episode_length = self.new_episode_length(env_index)
self.episode_lengths[env_index] = next_episode_length
self.steps_left_in_episode[env_index] = next_episode_length
return obs, reward, done, info
@pytest.mark.parametrize("batch_size", [2, 5])
def test_with_controllable_episode_lengths(batch_size: int, monkeypatch):
"""TODO: Test out the PolicyHead in a very controlled environment, where we
know exactly the lengths of each episode.
"""
env = FakeEnvironment(
partial(gym.make, "CartPole-v0"),
batch_size=batch_size,
episode_lengths=[5, *(10 for _ in range(batch_size - 1))],
new_episode_length=lambda env_index: 10,
)
env = AddDoneToObservation(env)
env = ConvertToFromTensors(env)
env = EnvDataset(env)
obs_space = env.single_observation_space
x_dim = flatdim(obs_space["x"])
# Create some dummy encoder.
encoder = nn.Linear(x_dim, x_dim)
representation_space = obs_space["x"]
output_head = PolicyHead(
input_space=representation_space,
action_space=env.single_action_space,
reward_space=env.single_reward_space,
hparams=PolicyHead.HParams(
max_episode_window_length=100,
min_episodes_before_update=1,
accumulate_losses_before_backward=False,
),
)
# TODO: Simulating as if the output head were attached to a BaseModel.
PolicyHead.base_model_optimizer = torch.optim.Adam(output_head.parameters(), lr=1e-3)
# Simplify the loss function so we know exactly what the loss should be at
# each step.
def mock_policy_gradient(
rewards: Sequence[float], log_probs: Sequence[float], gamma: float = 0.95
) -> Optional[Loss]:
log_probs = (log_probs - log_probs.clone()) + 1
# Return the length of the episode, but with a "gradient" flowing back into log_probs.
return len(rewards) * log_probs.mean()
monkeypatch.setattr(output_head, "policy_gradient", mock_policy_gradient)
batch_size = env.batch_size
obs = env.reset()
step_done = np.zeros(batch_size, dtype=np.bool)
for step in range(200):
x, obs_done = obs["x"], obs["done"]
# The done from the obs should always be the same as the 'done' from the 'step' function.
assert np.array_equal(obs_done, step_done)
representations = encoder(x)
observations = ContinualRLSetting.Observations(
x=x,
done=obs_done,
)
actions_obj = output_head(observations, representations)
actions = actions_obj.y_pred
# TODO: kinda useless to wrap a single tensor in an object..
forward_pass = ForwardPass(
observations=observations,
representations=representations,
actions=actions,
)
obs, rewards, step_done, info = env.step(actions)
rewards_obj = ContinualRLSetting.Rewards(y=rewards)
loss = output_head.get_loss(
forward_pass=forward_pass,
actions=actions_obj,
rewards=rewards_obj,
)
print(f"Step {step}")
print(f"num episodes since update: {output_head.num_episodes_since_update}")
print(f"steps left in episode: {env.steps_left_in_episode}")
print(f"Loss for that step: {loss}")
if any(obs_done):
assert loss != 0.0
if step == 5.0:
# Env 0 first episode from steps 0 -> 5
assert loss.loss == 5.0
assert loss.metrics["gradient_usage"].used_gradients == 5.0
assert loss.metrics["gradient_usage"].wasted_gradients == 0.0
elif step == 10:
# Envs[1:batch_size], first episode, from steps 0 -> 10
# NOTE: At this point, both envs have reached the required number of episodes.
# This means that the gradient usage on the next time any env reaches
# an end-of-episode will be one less than the total number of items.
assert loss.loss == 10.0 * (batch_size - 1)
assert loss.metrics["gradient_usage"].used_gradients == 10.0 * (batch_size - 1)
assert loss.metrics["gradient_usage"].wasted_gradients == 0.0
elif step == 15:
# Env 0 second episode from steps 5 -> 15
assert loss.loss == 10.0
assert loss.metrics["gradient_usage"].used_gradients == 4
assert loss.metrics["gradient_usage"].wasted_gradients == 6
elif step == 20:
# Envs[1:batch_size]: second episode, from steps 0 -> 10
# NOTE: At this point, both envs have reached the required number of episodes.
# This means that the gradient usage on the next time any env reaches
# an end-of-episode will be one less than the total number of items.
assert loss.loss == 10.0 * (batch_size - 1)
assert loss.metrics["gradient_usage"].used_gradients == 9 * (batch_size - 1)
assert loss.metrics["gradient_usage"].wasted_gradients == 1 * (batch_size - 1)
elif step == 25:
# Env 0 third episode from steps 5 -> 15
assert loss.loss == 10.0
assert loss.metrics["gradient_usage"].used_gradients == 4
assert loss.metrics["gradient_usage"].wasted_gradients == 6
elif step > 0 and step % 10 == 0:
# Same pattern as step 20 above
assert loss.loss == 10.0 * (batch_size - 1), step
assert loss.metrics["gradient_usage"].used_gradients == 9 * (batch_size - 1)
assert loss.metrics["gradient_usage"].wasted_gradients == 1 * (batch_size - 1)
elif step > 0 and step % 5 == 0:
# Same pattern as step 25 above
assert loss.loss == 10.0
assert loss.metrics["gradient_usage"].used_gradients == 4
assert loss.metrics["gradient_usage"].wasted_gradients == 6
else:
assert loss.loss == 0.0, step
@pytest.mark.parametrize("batch_size", [1, 2, 5])
def test_loss_is_nonzero_at_episode_end(batch_size: int):
"""Test that when stepping through the env, when the episode ends, a
non-zero loss is returned by the output head.
"""
with gym.make("CartPole-v0") as temp_env:
temp_env = AddDoneToObservation(temp_env)
obs_space = temp_env.observation_space
action_space = temp_env.action_space
reward_space = getattr(
temp_env, "reward_space", spaces.Box(*temp_env.reward_range, shape=())
)
env = gym.vector.make("CartPole-v0", num_envs=batch_size, asynchronous=False)
env = AddDoneToObservation(env)
env = ConvertToFromTensors(env)
env = EnvDataset(env)
head = PolicyHead(
input_space=obs_space.x,
action_space=action_space,
reward_space=reward_space,
hparams=PolicyHead.HParams(accumulate_losses_before_backward=False),
)
# TODO: Simulating as if the output head were attached to a BaseModel.
PolicyHead.base_model_optimizer = torch.optim.Adam(head.parameters(), lr=1e-3)
head.train()
env.seed(123)
obs = env.reset()
# obs = torch.as_tensor(obs, dtype=torch.float32)
done = torch.zeros(batch_size, dtype=bool)
info = np.array([{} for _ in range(batch_size)])
loss = None
non_zero_losses = 0
encoder = nn.Linear(4, 4)
encoder.train()
for i in range(100):
representations = encoder(obs["x"])
observations = ContinualRLSetting.Observations(
x=obs["x"],
done=done,
# info=info,
)
head_output = head.forward(observations, representations=representations)
actions = head_output.actions.numpy().tolist()
# actions = np.zeros(batch_size, dtype=int).tolist()
obs, rewards, done, info = env.step(actions)
done = torch.as_tensor(done, dtype=bool)
rewards = ContinualRLSetting.Rewards(rewards)
assert len(info) == batch_size
print(f"Step {i}, obs: {obs}, done: {done}, info: {info}")
forward_pass = ForwardPass(
observations=observations,
representations=representations,
actions=head_output,
)
loss = head.get_loss(forward_pass, actions=head_output, rewards=rewards)
print("loss:", loss)
assert observations.done is not None
for env_index, env_is_done in enumerate(observations.done):
if env_is_done:
print(f"Episode ended for env {env_index} at step {i}")
assert loss.loss != 0.0
non_zero_losses += 1
break
else:
print(f"No episode ended on step {i}, expecting no loss.")
assert loss is None or loss.loss == 0.0
assert non_zero_losses > 0
@pytest.mark.parametrize("batch_size", [1, 2, 5])
def test_done_is_sometimes_True_when_iterating_through_env(batch_size: int):
"""Test that when *iterating* through the env, done is sometimes 'True'."""
env = gym.vector.make("CartPole-v0", num_envs=batch_size, asynchronous=True)
env = AddDoneToObservation(env)
env = ConvertToFromTensors(env)
env = EnvDataset(env)
for i, obs in zip(range(100), env):
print(i, obs)
_ = env.send(env.action_space.sample())
if any(obs["done"]):
break
else:
pytest.fail(reason="Never encountered done=True!")
@pytest.mark.parametrize("batch_size", [1, 2, 5])
def test_loss_is_nonzero_at_episode_end_iterate(batch_size: int):
"""Test that when *iterating* through the env (active-dataloader style),
when the episode ends, a non-zero loss is returned by the output head.
"""
with gym.make("CartPole-v0") as temp_env:
temp_env = AddDoneToObservation(temp_env)
obs_space = temp_env.observation_space
action_space = temp_env.action_space
reward_space = getattr(
temp_env, "reward_space", spaces.Box(*temp_env.reward_range, shape=())
)
env = gym.vector.make("CartPole-v0", num_envs=batch_size, asynchronous=False)
env = AddDoneToObservation(env)
env = ConvertToFromTensors(env)
env = EnvDataset(env)
head = PolicyHead(
# observation_space=obs_space,
input_space=obs_space["x"],
action_space=action_space,
reward_space=reward_space,
hparams=PolicyHead.HParams(accumulate_losses_before_backward=False),
)
env.seed(123)
non_zero_losses = 0
for i, obs in zip(range(100), env):
print(i, obs)
x = obs["x"]
done = obs["done"]
representations = x
assert isinstance(x, Tensor)
assert isinstance(done, Tensor)
observations = ContinualRLSetting.Observations(
x=x,
done=done,
# info=info,
)
head_output = head.forward(observations, representations=representations)
actions = head_output.actions.numpy().tolist()
# actions = np.zeros(batch_size, dtype=int).tolist()
rewards = env.send(actions)
# print(f"Step {i}, obs: {obs}, done: {done}")
assert isinstance(representations, Tensor)
forward_pass = ForwardPass(
observations=observations,
representations=representations,
actions=head_output,
)
rewards = ContinualRLSetting.Rewards(rewards)
loss = head.get_loss(forward_pass, actions=head_output, rewards=rewards)
print("loss:", loss)
for env_index, env_is_done in enumerate(observations.done):
if env_is_done:
print(f"Episode ended for env {env_index} at step {i}")
assert loss.total_loss != 0.0
non_zero_losses += 1
break
else:
print(f"No episode ended on step {i}, expecting no loss.")
assert loss.total_loss == 0.0
assert non_zero_losses > 0
@pytest.mark.xfail(reason="TODO: Fix this test")
def test_buffers_are_stacked_correctly(monkeypatch):
"""TODO: Test that when "de-synced" episodes, when fed to the output head,
get passed, re-stacked correctly, to the get_episode_loss function.
"""
batch_size = 5
starting_values = [i for i in range(batch_size)]
targets = [10 for i in range(batch_size)]
env = SyncVectorEnv(
[
partial(DummyEnvironment, start=start, target=target, max_value=10 * 2)
for start, target in zip(starting_values, targets)
]
)
obs = env.reset()
assert obs.tolist() == list(range(batch_size))
reward_space = spaces.Box(*env.reward_range, shape=())
output_head = PolicyHead( # observation_space=spaces.Tuple([env.observation_space,
# spaces.Box(False, True, [batch_size], np.bool)]),
input_space=spaces.Box(0, 1, (1,)),
action_space=env.single_action_space,
reward_space=reward_space,
)
# Set the max window length, for testing.
output_head.hparams.max_episode_window_length = 100
obs = env.reset()
done = np.zeros(batch_size, dtype=bool)
obs = torch.from_numpy(obs)
done = torch.from_numpy(done)
def mock_get_episode_loss(
self: PolicyHead,
env_index: int,
inputs: Tensor,
actions: ContinualRLSetting.Observations,
rewards: ContinualRLSetting.Rewards,
done: bool,
) -> Optional[Loss]:
print(f"Environment at index {env_index}, episode ended: {done}")
if done:
print(f"Full episode: {inputs}")
else:
print(f"Episode so far: {inputs}")
n_observations = len(inputs)
assert inputs.flatten().tolist() == (env_index + np.arange(n_observations)).tolist()
if done:
# Unfortunately, we don't get the final state, because of how
# VectorEnv works atm.
assert inputs[-1] == targets[env_index] - 1
monkeypatch.setattr(PolicyHead, "get_episode_loss", mock_get_episode_loss)
# perform 10 iterations, incrementing each DummyEnvironment's counter at
# each step (action of 1).
# Therefore, at first, the counters should be [0, 1, 2, ... batch-size-1].
info = [{} for _ in range(batch_size)]
for step in range(10):
print(f"Step {step}.")
# Wrap up the obs to pretend that this is the data coming from a
# ContinualRLSetting.
observations = ContinualRLSetting.Observations(x=obs, done=done) # , info=info)
# We don't use an encoder for testing, so the representations is just x.
representations = obs.reshape([batch_size, 1])
assert observations.task_labels is None
actions = output_head(observations.float(), representations.float())
# Wrap things up to pretend like the output head is being used in the
# BaseModel:
forward_pass = ForwardPass(
observations=observations,
representations=representations,
actions=actions,
)
action_np = actions.actions_np
obs, rewards, done, info = env.step(action_np)
obs = torch.from_numpy(obs)
rewards = torch.from_numpy(rewards)
done = torch.from_numpy(done)
rewards = ContinualRLSetting.Rewards(y=rewards)
_ = output_head.get_loss(forward_pass, actions=actions, rewards=rewards)
# Check the contents of the episode buffers.
assert len(output_head.representations) == batch_size
for env_index in range(batch_size):
# obs_buffer = output_head.observations[env_index]
representations_buffer = output_head.representations[env_index]
action_buffer = output_head.actions[env_index]
reward_buffer = output_head.rewards[env_index]
if step >= batch_size:
if step + env_index == targets[env_index]:
assert len(representations_buffer) == 1 and not output_head.done[env_index]
# if env_index == step - batch_size:
continue
assert len(representations_buffer) == step + 1
# Check to see that the last entry in the episode buffer for this
# environment corresponds to the slice of the most recent
# observations/actions/rewards at the index corresponding to this
# environment.
# observation_tuple = input_buffer[-1]
step_action = action_buffer[-1]
step_reward = reward_buffer[-1]
# assert observation_tuple.x == observations.x[env_index]
# assert observation_tuple.task_labels is None
# assert observation_tuple.done == observations.done[env_index]
# The last element in the buffer should be the slice in the batch
# for that environment.
assert step_action.y_pred == actions.y_pred[env_index]
assert step_reward.y == rewards.y[env_index]
if step < batch_size:
assert obs.tolist() == (np.arange(batch_size) + step + 1).tolist()
# if step >= batch_size:
# if step + env_index == targets[env_index]:
# assert done
# assert False, (obs, rewards, done, info)
# loss: Loss = output_head.get_loss(forward_pass, actions=actions, rewards=rewards)
@pytest.mark.no_xvfb
def test_sanity_check_cartpole_done_vector():
"""TODO: Sanity check, make sure that cartpole has done=True at some point
when using a BatchedEnv.
"""
env = make_batched_env("CartPole-v0", batch_size=5, wrappers=[PixelObservationWrapper])
env = AddDoneToObservation(env)
obs = env.reset()
for i in range(100):
obs, rewards, done, info = env.step(env.action_space.sample())
assert all(obs["done"] == done), i
if any(done):
break
else:
assert False, "Should have had at least one done=True, over the 100 steps!"
================================================
FILE: sequoia/methods/models/output_heads/rl/wasted_steps_calc.py
================================================
from typing import Callable, List
import numpy as np
import tqdm as tqdm
def get_fraction_of_observations_with_grad(
n_envs: int,
new_episode_length: Callable[[], int],
n_updates: int = 10,
min_episodes_before_update: int = 1,
):
n_used_steps = 0
n_wasted_steps = 0
# min_episode_length = 0
# max_episode_length = 10
# n_envs = 10
# new_episode_length = lambda: 10
# The starting episode lengths for each env.
# new_episode_length = lambda: 10
# episode_lengths = [5, 10]
# n_envs = 2
episode_lengths = np.array([new_episode_length() for _ in range(n_envs)])
steps_left_in_episode = episode_lengths.copy()
num_finished_episodes = np.zeros(n_envs)
for step in tqdm.tqdm(range(n_updates), leave=False):
# print(f"Step {step}")
steps_since_last_update = np.zeros(n_envs)
finished_episodes_since_last_update = np.zeros(n_envs)
# Loop over all the envs, until all of them have produced a loss (reached
# the end of an episode).
while not all(finished_episodes_since_last_update >= min_episodes_before_update):
# print(f"Episode lengths: {episode_lengths}")
# print(f"Steps left: {steps_left_in_episode}")
# print(f"Completed episodes: {num_finished_episodes}")
# print(f"Used steps: {n_used_steps}")
# print(f"Wasted steps: {n_wasted_steps}")
# print(steps_left_in_episode)
for env in range(n_envs):
if steps_left_in_episode[env] == 0:
# Perform the "backward()" for that env.
# This will use all steps since the last update (with grads).
used = steps_since_last_update[env]
n_used_steps += used
wasted = episode_lengths[env] - steps_since_last_update
# print(f"Step {step}, doing backward for env {env} using {used} steps.")
steps_since_last_update[env] = 0
finished_episodes_since_last_update[env] += 1
num_finished_episodes[env] += 1
# Sample the length of the next episode randomly.
length_of_next_episode = new_episode_length()
steps_left_in_episode[env] = length_of_next_episode
else:
steps_left_in_episode[env] -= 1
steps_since_last_update[env] += 1
# Perform the "optimizer step" for the model.
# This 'wastes' all the prediction tensors (actions) in unfinished episodes
# because it would detach them.
wasted_per_env = steps_since_last_update
n_wasted_steps += int(wasted_per_env.sum())
# print(f"Updating model at step {step}, wasting {wasted_per_env} grads")
# exit()
# print(f"Ratio of used vs wasted so far: {n_used_steps}/{n_wasted_steps+n_used_steps}")
# print(f"n episodes per env: {num_finished_episodes}")
total_steps = n_used_steps + n_wasted_steps
used_ratio = n_used_steps / total_steps
wasted_ratio = n_wasted_steps / total_steps
# print(f"Total steps: {total_steps}")
# print(f"n_envs: {n_envs}")
# print(f"n_updates: {n_updates}")
# print(f"Used steps: {n_used_steps} \t{used_ratio:.2%}")
# print(f"Wasted steps: {n_wasted_steps} \t{wasted_ratio:.2%}")
return n_used_steps, n_wasted_steps
if __name__ == "__main__":
import matplotlib.pyplot as plt
fig: plt.Figure
axes: List[plt.Axes]
n_updates_per_run: int = 20
fig, axes = plt.subplots(1, 2)
import textwrap
# x: np.ndarray = np.random.randint(1, 32, size=100)
x: np.ndarray = np.arange(63, dtype=int) + 1
min_episodes_before_update = 3
# min_episodes_before_updates = [1, 3, 5]
min_episode_length: int = 5
max_episode_length: int = 100
episode_len_dist = f"U[{min_episode_length},{max_episode_length}]"
# Normally distributed episode lengths:
# episode_length_mean = (max_episode_length + min_episode_length) / 2
episode_length_mean = 50
# episode_length_std = np.sqrt(max_episode_length - episode_length_mean)
# episode_len_dist = f"N({episode_length_mean:.1f}, {episode_length_std:.1f})"
episode_length_stds = [1.0, 3.0, 5.0, 10.0]
episode_len_dist = f"N({episode_length_mean:.1f}, {episode_length_stds})"
s = "s" if min_episodes_before_update > 1 else ""
fig.suptitle(
textwrap.dedent(
f"""\
Episode length ~ {episode_len_dist},
Updating model when all envs have finished at least {min_episodes_before_update} episode{s},
{n_updates_per_run} total updates per run.
"""
)
)
# for min_episodes_before_update in min_episodes_before_updates:
for episode_length_std in episode_length_stds:
label = f"episode_length_std={episode_length_std:.1f}"
# label = f"min_episodes_before_update={min_episodes_before_update}"
# new_episode_length = lambda: np.random.randint(min_episode_length, max_episode_length)
new_episode_length = lambda: int(np.random.normal(episode_length_mean, episode_length_std))
# x.sort()
used_ = []
wasted_ = []
for n_envs in tqdm.tqdm(x, desc="n_envs"):
used, wasted = get_fraction_of_observations_with_grad(
n_envs=n_envs,
new_episode_length=new_episode_length,
min_episodes_before_update=min_episodes_before_update,
n_updates=n_updates_per_run,
)
used_.append(used)
wasted_.append(wasted)
y_used = np.array(used_)
y_wasted = np.array(wasted_)
used_ratio = y_used / (y_used + y_wasted)
wasted_ratio = 1 - used_ratio
axes[0].set_title(f"Percentage of used vs 'wasted' gradients w.r.t. batch size")
axes[0].scatter(x, used_ratio, label=label)
axes[0].set_ylim(0.0, 1.0)
used_per_env = y_used / x / n_updates_per_run
axes[1].scatter(x, used_per_env)
fig.legend()
# xs, ys = x, used_ratio
# # zip joins x and y coordinates in pairs
# for x_i, y_i in zip(xs, ys):
# label = f"({int(x_i)}, {y_i:.2f})"
# axes[0].annotate(label, # this is the text
# (x_i, y_i), # this is the point to label
# textcoords="offset points", # how to position the text
# xytext=(0,10), # distance from text to points (x,y)
# ha='center') # horizontal alignment can be left, right or center
axes[0].set_ylabel("% of used gradients")
axes[0].set_xlabel("batch size (number of environments)")
axes[1].set_title(f"''Data efficiency'': Average number of used steps per update per env")
axes[1].set_xlabel(f"# of environments")
axes[1].set_ylabel(f"# of used steps per env")
plt.show()
================================================
FILE: sequoia/methods/models/output_heads.puml
================================================
@startuml output_heads
package output_heads {
package output_head {
abstract class OutputHead {
+ hparams: OutputHead.HParams
{abstract} + forward(observations: Observations representations: Tensor): Actions
{abstract} + get_loss(ForwardPass, Actions, Rewards) -> Loss
}
abstract class OutputHead.HParams {
+ {static} available_activations: ClassVar[Dict[str, Type[nn.Module]]]
+ hidden_layers: int
+ hidden_neurons: List[int]
+ activation: Type[nn.Module] = "tanh"
}
}
package classification {
class ClassificationHead implements OutputHead {
+ forward(Observations representations: Tensor): ClassificationHeadOutput
+ get_loss(ForwardPass, ClassificationOutput, Rewards): Loss
}
class ClassificationHead.HParams extends OutputHead.HParams {}
class ClassificationHeadOutput extends settings.base.Actions {
+ y_pred: Tensor
+ logits: Tensor
}
}
package regression {
class RegressionHead implements OutputHead {}
}
package rl {
package policy_head {
class PolicyHead extends ClassificationHead {
+ forward(observations: Observations representations: Tensor): PolicyHeadOutput
+ hparams: PolicyHead.HParams
}
class PolicyHead.HParams extends ClassificationHead.HParams {
+ forward(observations: Observations representations: Tensor): PolicyHeadOutput
}
class PolicyHeadOutput extends ClassificationHeadOutput {
action_dist: Distribution
}
}
package episodic_a2c {
class EpisodicA2C extends PolicyHead {
+ actor: nn.Module
+ critic: nn.Module
+ get_episode_loss(Observations, Actions, Rewards, done: bool): Loss
}
class EpisodicA2C.HParams extends PolicyHead.HParams {
+ normalize_advantages: bool = False
+ actor_loss_coef: float = 0.5
+ critic_loss_coef: float = 0.5
+ entropy_loss_coef: float = 0.1
+ max_policy_grad_norm: Optional[float] = None
+ gamma: float = 0.99
+ learning_rate: float = 1e-2
}
class A2CHeadOutput extends PolicyHeadOutput {
+ value: Tensor
}
}
package actor_critic_head {
class ActorCriticHead extends ClassificationHead {
+ hparams: ActorCriticHead.HParams
+ actor: nn.Module
+ critic: nn.Module
}
class ActorCriticHead.HParams extends ClassificationHead.HParams {
+ gamma: float = 0.95
+ learning_rate: float = 1e-3
}
}
}
' OutputHead *-- OutputHead.HParams
' ClassificationHead *-- ClassificationHead.HParams
' PolicyHead *-- PolicyHead.HParams
' ActorCriticHead *-- ActorCriticHead.HParams
' EpisodicA2C *-- EpisodicA2C.HParams
' OutputHead *-- Actions : outputs
' ClassificationHead *-- ClassificationHeadOutput : outputs
' PolicyHead *-- PolicyHeadOutput : outputs
' EpisodicA2C *-- A2CHeadOutput : outputs
}
@enduml
================================================
FILE: sequoia/methods/models/simple_convnet.py
================================================
from torch import Tensor, nn
class SimpleConvNet(nn.Module):
def __init__(self, in_channels: int = 3, n_classes: int = 10):
super().__init__()
self.features = nn.Sequential(
nn.Conv2d(in_channels, 6, kernel_size=5, stride=1, padding=1, bias=False),
nn.BatchNorm2d(6),
nn.ReLU(inplace=True),
nn.Conv2d(6, 16, kernel_size=5, stride=1, padding=1, bias=False),
nn.BatchNorm2d(16),
nn.ReLU(inplace=True),
nn.Conv2d(16, 16, kernel_size=3, stride=1, padding=1, bias=False),
nn.BatchNorm2d(16),
nn.AdaptiveAvgPool2d(output_size=(8, 8)), # [16, 8, 8]
nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=0, bias=False), # [32, 6, 6]
nn.BatchNorm2d(32),
nn.ReLU(inplace=True),
nn.Conv2d(32, 32, kernel_size=3, stride=1, padding=0, bias=False), # [32, 4, 4]
nn.BatchNorm2d(32),
nn.Flatten(),
)
self.fc = nn.Sequential(
nn.Flatten(),
nn.Linear(512, 120), # NOTE: This '512' is what gets used as the
# hidden size of the encoder.
nn.ReLU(),
nn.Linear(120, 84),
nn.ReLU(),
nn.Linear(84, n_classes),
)
def forward(self, x: Tensor) -> Tensor:
return self.fc(self.features(x))
================================================
FILE: sequoia/methods/models.puml
================================================
@startuml models
package models {
class ForwardPass extends Batch {
+ observations: Observations
+ representations: Tensor
+ actions: Actions
}
' TODO: Idk why, but this doesn't work if placed inside the 'models' package
' above.
!include ./models/output_heads.puml
!include ./models/base_model.puml
}
@enduml
================================================
FILE: sequoia/methods/packnet_method.py
================================================
from dataclasses import dataclass
from typing import Any, Dict, Iterable, List, Mapping, Optional, Sequence, Tuple, Type, Union
import torch
from pytorch_lightning import Callback, LightningModule, Trainer
from pytorch_lightning.callbacks import EarlyStopping
from simple_parsing.helpers import mutable_field
from simple_parsing.helpers.hparams import HyperParameters, uniform
from torch import Tensor, nn
from sequoia.common.config import Config
from sequoia.methods.base_method import BaseMethod, BaseModel
from sequoia.methods.trainer import TrainerConfig
from sequoia.settings import Setting
from sequoia.settings.assumptions import IncrementalAssumption as IncrementalSetting
from sequoia.settings.sl import IncrementalSLSetting, TaskIncrementalSLSetting
class PackNet(Callback, nn.Module):
"""PyTorch-Lightning Callback that implements the PackNet algorithm for CL.
TODO: Add a citation for the PackNet paper.
"""
@dataclass
class HParams(HyperParameters):
"""Hyper-parameters of the Packnet callback."""
prune_instructions: Union[float, List[float]] = uniform(0.1, 0.9, default=0.5)
train_epochs: int = uniform(1, 5, default=1)
fine_tune_epochs: int = uniform(0, 5, default=1)
def __init__(
self,
n_tasks: int,
hparams: Optional["PackNet.HParams"] = None,
prunable_types: Sequence[Type[nn.Module]] = (nn.Conv2d, nn.Linear),
ignore_modules: Sequence[str] = None,
ignore_parameters: Sequence[str] = ("bias",),
):
"""Create the PackNet callback.
Parameters
----------
n_tasks : int
Number of tasks.
hparams : PackNet.HParams
Configuration options (hyper-parameters) of the PackNet algorithm.
prunable_types : Sequence[Type[nn.Module]], optional
The types of nn.Modules to consider for pruning. By default, only consideres
layers of types `nn.Conv2d` and `nn.Linear`.
ignore_modules : Sequence[str], optional
List of flags for module names that should be ignored by PackNet.
When one of these values is found within the name of a module, it is
ignored. Doesn't ignore any modules by default.
parameters_to_ignore : List[str], optional
List of flags for parameter names that should be ignored by PackNet.
When one of these values is found within the name of a parameter, it is
ignored. Defaults to ["bias"].
"""
super().__init__()
hparams = hparams or self.HParams()
self.n_tasks = n_tasks
self.prune_instructions = hparams.prune_instructions
self.prunable_types = prunable_types or [nn.Conv2d, nn.Linear]
self.ignore_modules = list(ignore_modules or [])
self.ignore_parameters = list(ignore_parameters or [])
# Set up an array of quantiles for pruning procedure
if n_tasks:
self.config_instructions()
self.PATH = None
self.epoch_split = (hparams.train_epochs, hparams.fine_tune_epochs)
self.current_task = 0
# 3-dimensions: task, layer, parameter mask
self.masks: List[Dict[str, Tensor]] = []
self.mode: str = None
self.params_dict: dict = None
def filtered_parameter_iterator(self, module: nn.Module) -> Iterable[Tuple[str, nn.Parameter]]:
"""Iterator that, given a module, yields tuples with the full name of the
parameters that will be modified by the PackNet callback, as well as the
parameters themselves.
This is used to remove a bit of boilerplate code in the for loops below.
Parameters
----------
module : nn.Module
The module to iterate over.
Returns
-------
Iterable[Tuple[str, nn.Parameter]]
An Iterator of tuples containing parameter names ('{mod_name}.{param_name}')
and parameters.
"""
for mod_name, mod in module.named_modules():
if not isinstance(mod, self.prunable_types):
continue
if any(ignored in mod_name for ignored in self.ignore_modules):
continue
for param_name, param in mod.named_parameters():
if any(ignored in param_name for ignored in self.ignore_parameters):
continue
param_full_name = f"{mod_name}.{param_name}"
yield param_full_name, param
@torch.no_grad()
def prune(self, model: nn.Module, prune_quantile: float) -> Dict[str, Tensor]:
"""Create task-specific mask and prune least relevant weights
[extended_summary]
Parameters
----------
model : nn.Module
The model to be pruned.
prune_quantile : float
The percentage of weights to prune as a decimal.
Returns
-------
Dict[str, Tensor]
The masks to use to prune the layers of the given model.
"""
# Calculate Quantile
all_prunable_tensors: List[Tensor] = []
for param_full_name, param_layer in self.filtered_parameter_iterator(model):
# get fixed weights for this layer (on the same device)
prev_mask = torch.zeros_like(param_layer, dtype=torch.bool)
for task_masks in self.masks:
if param_full_name in task_masks:
prev_mask |= task_masks[param_full_name]
p = param_layer.masked_select(~prev_mask)
if p is not None:
all_prunable_tensors.append(p)
all_parameters_tensor = torch.cat(all_prunable_tensors, -1)
cutoff = torch.quantile(torch.abs(all_parameters_tensor), q=prune_quantile)
masks = {} # create mask for this task
for param_full_name, param_layer in self.filtered_parameter_iterator(model):
# get weight mask for this layer
# p
prev_mask = torch.zeros_like(param_layer, dtype=torch.bool)
for task_masks in self.masks:
# TODO: check for bug here
# if param_full_name in task_masks:
prev_mask |= task_masks[param_full_name]
curr_mask = torch.abs(param_layer).ge(cutoff) # q
curr_mask &= ~prev_mask # (q & ~p)
# Zero non masked weights
param_layer *= curr_mask | prev_mask
masks[param_full_name] = curr_mask
return masks
def fine_tune_mask(self, model: nn.Module):
"""
Zero the gradient of pruned weights this task as well as previously fixed weights
Apply this mask before each optimizer step during fine-tuning
"""
assert len(self.masks) > self.current_task
for param_full_name, param in self.filtered_parameter_iterator(model):
param.grad *= self.masks[self.current_task][param_full_name]
def training_mask(self, model: nn.Module):
"""
Zero the gradient of only fixed weights for previous tasks
Apply this mask after .backward() and before
optimizer.step() at every batch of training a new task
"""
if len(self.masks) == 0:
return
for param_full_name, param in self.filtered_parameter_iterator(model):
# get mask of weights from previous tasks
prev_mask = torch.zeros_like(param, dtype=torch.bool)
for task_masks in self.masks:
# FIXME: Get the mask if it exists, otherwise set one and move on.
# if param_full_name not in task_masks:
# task_masks[param_full_name] = torch.zeros_like(param, dtype=torch.bool)
prev_mask |= task_masks[param_full_name]
# zero grad of previous fixed weights
# param.grad[prev_mask] = 0. # (NOTE: Equivalent)
param.grad *= ~prev_mask
def fix_biases(self, model: nn.Module):
"""
Fix the gradient of prunable bias parameters
"""
for mod_name, mod in model.named_modules():
if not isinstance(mod, self.prunable_types):
continue
if any(ignore in mod_name for ignore in self.ignore_modules):
continue
for name, param_layer in mod.named_parameters():
if "bias" in name:
param_layer.requires_grad = False
def fix_batch_norm(self, model: nn.Module):
"""
Fix batch norm gain, bias, running mean and variance
"""
for mod_name, mod in model.named_modules():
if isinstance(mod, nn.BatchNorm2d):
mod.affine = False
for param_layer in mod.parameters():
param_layer.requires_grad = False
def set_params_dict(self, model: nn.Module):
"""
Set a dictionary containing all prunable parameters
useful for fixing all layers, but may be wasted memory
"""
# TODO: This dict actually doesn't copy the parameters, it saves references.
self.params_dict = dict()
for param_full_name, param in self.filtered_parameter_iterator(model):
self.params_dict[param_full_name] = param
def fix_all_layers(self, model: nn.Module):
"""
Fix grad of all parameters outside of params_dict
"""
self.set_params_dict(model) # Not necessary for fixed model
# Fix grad of all non-prunable layers in this
for mod_name, mod in model.named_modules():
for param_name, param_layer in mod.named_parameters():
key = f"{mod_name}.{param_name}"
if key not in self.params_dict:
param_layer.requires_grad = False
@torch.no_grad()
def apply_eval_mask(self, model: nn.Module, task_idx: int):
"""
Revert to final trained network state and apply mask for given task
:param model: the model to apply the eval mask to
:param task_idx: the task id to be evaluated (0 - > n_tasks)
"""
assert len(self.masks) > task_idx
for param_full_name, param in self.filtered_parameter_iterator(model):
# get indices of all weights from previous masks
prev_mask = torch.zeros_like(param, dtype=torch.bool)
for task_id in range(0, task_idx + 1):
prev_mask |= self.masks[task_id][param_full_name]
# zero out all weights that are not in the mask for this task
# param[prev_mask] = 0. (NOTE: Equivalent)
param *= prev_mask
def mask_remaining_params(self, model: nn.Module) -> Dict[str, Tensor]:
"""
Create mask for remaining parameters
"""
masks = {}
for param_full_name, param in self.filtered_parameter_iterator(model):
# Get mask of all weights assigned to previous tasks
prev_mask = torch.zeros_like(param, dtype=torch.bool)
for task_masks in self.masks:
prev_mask |= task_masks[param_full_name]
# Create mask of remaining parameters
layer_mask = ~prev_mask
masks[param_full_name] = layer_mask
return masks
# self.masks.append(mask)
def total_epochs(self) -> int:
return self.epoch_split[0] + self.epoch_split[1]
def config_instructions(self):
"""
Create pruning instructions for this task split
:return: None
"""
assert self.n_tasks is not None
if not isinstance(self.prune_instructions, list): # if a float is passed in
assert 0 < self.prune_instructions < 1
self.prune_instructions = [self.prune_instructions] * (self.n_tasks - 1)
assert (
len(self.prune_instructions) == self.n_tasks - 1
), "Must give prune instructions for every task"
def save_final_state(self, model, PATH="model_weights.pth"):
"""
Save the final weights of the model after training
:param model: pl_module
:param PATH: The path to weights file
"""
self.PATH = PATH
torch.save(model.state_dict(), PATH)
def load_final_state(self, model):
"""
Load the final state of the model
"""
device = model.device
model.load_state_dict(torch.load(self.PATH))
model = model.to(device)
def on_init_end(self, trainer: Trainer):
self.mode = "train"
def on_after_backward(self, trainer: Trainer, pl_module: LightningModule):
if self.mode == "train":
self.training_mask(pl_module)
elif self.mode == "fine_tune":
self.fine_tune_mask(pl_module)
def on_train_epoch_end(self, trainer: Trainer, pl_module: LightningModule, *args, **kwargs):
super().on_train_epoch_end(trainer, pl_module)
if pl_module.current_epoch == self.epoch_split[0] - 1: # Train epochs completed
self.mode = "fine_tune"
new_masks: Dict[str, Tensor]
if self.current_task == self.n_tasks - 1:
new_masks = self.mask_remaining_params(pl_module)
else:
new_masks = self.prune(
model=pl_module,
prune_quantile=self.prune_instructions[self.current_task],
)
self.masks.append(new_masks)
def on_fit_end(self, trainer: Trainer, pl_module: LightningModule):
self.fix_biases(pl_module) # Fix biases after first task
self.fix_batch_norm(pl_module) # Fix batch norm mean, var, and params
# TODO: This may cause issues with output heads
# self.fix_all_layers(pl_module) # Fix all other layers -> may not be necessary?
self.save_final_state(pl_module)
self.mode = "train"
# TODO: Reset this to IncrementalAssumption after the fixes are made to BaseMethod in RL.
@dataclass
class PackNetMethod(BaseMethod, target_setting=IncrementalSLSetting):
# NOTE: these two fields are also used to create the command-line arguments.
# HyperParameters of the method.
hparams: BaseModel.HParams = mutable_field(BaseModel.HParams)
# Configuration options.
config: Config = mutable_field(Config)
# Options for the Trainer object.
trainer_options: TrainerConfig = mutable_field(TrainerConfig)
# Hyper-Parameters of the PackNet callback
packnet_hparams: PackNet.HParams = mutable_field(PackNet.HParams)
def __init__(
self,
hparams: BaseModel.HParams = None,
config: Config = None,
trainer_options: TrainerConfig = None,
packnet_hparams: PackNet.HParams = None,
**kwargs,
):
super().__init__(hparams=hparams, config=config, trainer_options=trainer_options)
self.packnet_hparams = packnet_hparams or PackNet.HParams()
self.p_net: PackNet # This gets set in configure
def configure(self, setting: Setting):
# NOTE: super().configure creates the Trainer and calls `configure_callbacks()`,
# so we have to create `self.p_net` before calling `super().configure`.
# Ignore all the modules that are task-specific when the setting gives task ids:
# NOTE: Always ignore the `output_heads` dict, as it contains output heads for
# each task.
# NOTE: `model.output_heads[]` is the same as `model.output_head`.
ignored_modules: List[str] = ["output_heads"]
if setting.task_labels_at_test_time:
# Also ignore the main output_head.
ignored_modules.append("output_head")
self.p_net = PackNet(
n_tasks=setting.nb_tasks,
hparams=self.packnet_hparams,
ignore_modules=ignored_modules,
)
self.p_net.current_task = -1
self.p_net.config_instructions()
super().configure(setting)
def fit(self, train_env, valid_env):
super().fit(train_env=train_env, valid_env=valid_env)
def on_task_switch(self, task_id: Optional[int]) -> None:
"""Called when switching between tasks.
Args:
task_id (int, optional): the id of the new task. When None, we are
basically being informed that there is a task boundary, but without
knowing what task we're switching to.
"""
super().on_task_switch(task_id=task_id)
if task_id is not None and len(self.p_net.masks) > task_id:
self.p_net.load_final_state(model=self.model)
self.p_net.apply_eval_mask(task_idx=task_id, model=self.model)
self.p_net.current_task = task_id
def configure_callbacks(self, setting: TaskIncrementalSLSetting = None) -> List[Callback]:
"""Create the PyTorch-Lightning Callbacks for this Setting.
These callbacks will get added to the Trainer in `create_trainer`.
Parameters
----------
setting : SettingType
The `Setting` on which this Method is going to be applied.
Returns
-------
List[Callback]
A List of `Callback` objects to use during training.
"""
callbacks = super().configure_callbacks(setting=setting)
assert self.p_net not in callbacks
for i in range(len(callbacks)):
if isinstance(callbacks[i], EarlyStopping):
callbacks.pop(i)
print(callbacks)
if not setting.stationary_context:
callbacks.append(self.p_net)
return callbacks
def create_trainer(self, setting) -> Trainer:
"""Creates a Trainer object from pytorch-lightning for the given setting.
Returns:
Trainer: the Trainer object.
"""
self.trainer_options.max_epochs = (
self.packnet_hparams.train_epochs + self.packnet_hparams.fine_tune_epochs
)
return super().create_trainer(setting)
def adapt_to_new_hparams(self, new_hparams: Dict[str, Any]) -> None:
"""Adapts the Method when it receives new Hyper-Parameters to try for a new run.
It is required that this method be implemented if you want to perform HPO sweeps
with Orion.
Parameters
----------
new_hparams : Dict[str, Any]
The new hyper-parameters being recommended by the HPO algorithm. These will
have the same structure as the search space.
"""
self.hparams = self.hparams.replace(**new_hparams)
self.packnet_hparams = self.packnet_hparams.replace(**new_hparams["packnet_hparams"])
def get_search_space(self, setting: Setting) -> Mapping[str, Union[str, Dict]]:
"""Returns the search space to use for HPO in the given Setting.
Parameters
----------
setting : Setting
The Setting on which the run of HPO will take place.
Returns
-------
Mapping[str, Union[str, Dict]]
An orion-formatted search space dictionary, mapping from hyper-parameter
names (str) to their priors (str), or to nested dicts of the same form.
"""
hparam_priors: Dict = super().get_search_space(setting=setting)
hparam_priors["packnet_hparams"] = self.packnet_hparams.get_orion_space_dict()
return hparam_priors
================================================
FILE: sequoia/methods/packnet_method_test.py
================================================
from typing import ClassVar, Type
from sequoia.methods.base_method_test import TestBaseMethod as BaseMethodTests
from sequoia.methods.packnet_method import PackNetMethod
class TestPackNetMethod(BaseMethodTests):
Method: ClassVar[Type[PackNetMethod]] = PackNetMethod
def validate_results(self, setting, method, results):
"""Called at the end of each test run to check that the results make sense for
the given setting and method.
"""
super().validate_results(setting, method, results)
# TODO: Add checks to make sure that the packnet callback's state makes sense
# for the given setting.
================================================
FILE: sequoia/methods/pl_bolts_methods/__init__.py
================================================
""" TODO: Add some of the pytorch lightning bolts models and such as Methods
targetting the IID Setting.
TODO: Also figure out a way to consider LightningDataModules that aren't Settings
as 'IID' settings, so we can get all the methods and models and datamodules
from pl_bolts for free.
"""
================================================
FILE: sequoia/methods/pl_dqn.py
================================================
# Copyright The PyTorch Lightning team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Deep Reinforcement Learning: Deep Q-network (DQN)
The template illustrates using Lightning for Reinforcement Learning. The example builds a basic DQN using the
classic CartPole environment.
To run the template, just run:
`python template/methods/rl/dqn_pl.py`
After ~1500 steps, you will see the total_reward hitting the max score of 475+.
Open up TensorBoard to see the metrics:
`tensorboard --logdir default`
References
----------
[1] https://github.com/PacktPublishing/Deep-Reinforcement-Learning-Hands-On-
Second-Edition/blob/master/Chapter06/02_dqn_pong.py
"""
import dataclasses
from collections import defaultdict, deque
from dataclasses import dataclass
from typing import (
Any,
Callable,
Container,
Deque,
Generic,
Iterator,
List,
Optional,
Sequence,
SupportsFloat,
SupportsInt,
Tuple,
Type,
TypeVar,
Union,
)
import gym
import numpy as np
import pytorch_lightning as pl
import simple_parsing
import torch
import torch.nn as nn
import torch.optim as optim
import tqdm
from gym.spaces import Discrete
from sequoia.common.spaces.typed_dict import TypedDictSpace
from simple_parsing import ArgumentParser, Serializable
from torch import Tensor
from torch.nn import functional as F
from torch.optim.optimizer import Optimizer
from torch.utils.data import DataLoader
from torch.utils.data.dataset import IterableDataset
class DQN(nn.Module):
"""Simple MLP network."""
def __init__(self, obs_size: int, n_actions: int, hidden_size: int = 128):
"""
Args:
obs_size: observation/state size of the environment
n_actions: number of discrete actions available in the environment
hidden_size: size of hidden layers
"""
super().__init__()
self.net = nn.Sequential(
nn.Linear(obs_size, hidden_size),
nn.ReLU(),
nn.Linear(hidden_size, n_actions),
)
def forward(self, x: Tensor) -> Tensor:
return self.net(torch.as_tensor(x, dtype=torch.float32))
T = TypeVar("T", np.ndarray, Tensor)
V = TypeVar("V", np.ndarray, Tensor)
@dataclass
class Experience(Generic[T]):
"""Experience for one step."""
state: T
action: SupportsInt
reward: SupportsFloat
done: bool
new_state: T
@dataclass
class ExperienceBatch(Generic[T]):
"""Experience for more than one step.
Note: neighbouring indices can be independant, i.e. this isn't a sequence of actions in an env.
"""
states: T
actions: T
rewards: T
dones: T
new_states: T
def __len__(self) -> int:
return len(self.dones)
def __getitem__(self, index: Union[int, slice]) -> Union[Experience[T], "ExperienceBatch[T]"]:
if isinstance(index, int):
return Experience( # type: ignore
state=self.states[index],
action=self.actions[index],
reward=self.rewards[index],
done=bool(self.dones[index]),
new_state=self.new_states[index],
)
return ExperienceBatch(
states=self.states[index],
actions=self.actions[index],
rewards=self.rewards[index],
dones=self.dones[index],
new_states=self.new_states[index],
)
@classmethod
def stack(cls, items: Sequence["Experience[T]"]) -> "ExperienceBatch[T]":
field_names = set(f.name for item in items for f in dataclasses.fields(item))
field_values = defaultdict(list)
for item in items:
for field_name in field_names:
f_value = getattr(item, field_name)
field_values[field_name].append(f_value)
stack_fn = np.stack if isinstance(items[0].state, np.ndarray) else torch.stack
return cls( # type: ignore
**{f_name + "s": stack_fn(f_values) for f_name, f_values in field_values.items()}
# states=np.concatenate(states),
# actions=np.concatenate(actions),
# rewards=np.concatenate(rewards, dtype=np.float32),
# dones=np.concatenate(dones, dtype=bool),
# new_states=np.concatenate(next_states),
)
def _map(self, fn: Callable[[T], V]) -> "ExperienceBatch[V]":
return type(self)( # type: ignore
**{f.name: fn(getattr(self, f.name)) for f in dataclasses.fields(self)}
)
def numpy(self) -> "ExperienceBatch[np.ndarray]":
def _numpy(v) -> np.ndarray:
return v.detach().cpu().numpy() if isinstance(v, Tensor) else np.array(v)
return self._map(_numpy)
def to(self, device: torch.device = None, **kwargs) -> "ExperienceBatch[Tensor]":
return self._map(lambda v: torch.as_tensor(v, device=device, **kwargs))
E = TypeVar("E", bound=Experience)
class ReplayBuffer(Generic[T]):
"""Replay Buffer for storing past experiences allowing the agent to learn from them.
>>> buffer = ReplayBuffer(5)
"""
def __init__(self, capacity: int) -> None:
"""
Args:
capacity: size of the buffer
"""
self.buffer: Deque[Experience[T]] = deque(maxlen=capacity)
def __len__(self) -> int:
return len(self.buffer)
def append(self, experience: Experience[T]) -> None:
"""Add experience to the buffer.
Args:
experience: tuple (state, action, reward, done, new_state)
"""
self.buffer.append(experience)
def sample(
self,
batch_size: int,
) -> ExperienceBatch[T]:
indices = np.random.choice(len(self.buffer), batch_size, replace=False)
samples: List[Experience[T]] = [self.buffer[idx] for idx in indices]
return ExperienceBatch.stack(samples)
class RLDataset(IterableDataset[ExperienceBatch[T]]):
"""Iterable Dataset containing the buffer which will be updated with new experiences during
training.
>>> dataset = RLDataset(ReplayBuffer(5))
"""
def __init__(self, buffer: ReplayBuffer, sample_size: int = 200) -> None:
"""
Args:
buffer: replay buffer
sample_size: number of experiences to sample at a time
"""
self.buffer = buffer
self.sample_size = sample_size
def __iter__(self) -> Iterator[Experience[T]]:
sampled_experience_batch = self.buffer.sample(self.sample_size)
for sampled_experience in sampled_experience_batch:
assert isinstance(sampled_experience, Experience), sampled_experience
yield sampled_experience
class Agent:
"""Base Agent class handling the interaction with the environment.
```python
env = gym.make("CartPole-v1")
buffer = ReplayBuffer(10)
agent = Agent(env, buffer)
```
"""
def __init__(self, env: gym.Env, replay_buffer: ReplayBuffer) -> None:
"""
Args:
env: training environment
replay_buffer: replay buffer storing experiences
"""
self.env = env
self.replay_buffer = replay_buffer
self.reset()
self.state = self.env.reset()
def reset(self) -> None:
"""Resets the environment and updates the state."""
self.state = self.env.reset()
def get_action(self, state: Tensor, net: nn.Module, epsilon: float) -> int:
"""Using the given network, decide what action to carry out using an epsilon-greedy policy.
Args:
net: DQN network
epsilon: value to determine likelihood of taking a random action
device: current device
Returns:
action
"""
if np.random.random() < epsilon:
action = self.env.action_space.sample()
else:
q_values = net(state)
_, action = torch.max(q_values, dim=-1)
# TODO: Adapt this for batched actions.
action = int(action.item())
return action
@torch.no_grad()
def play_step(
self,
net: nn.Module,
epsilon: float = 0.0,
device: Union[str, torch.device] = "cpu",
) -> Tuple[float, bool]:
"""Carries out a single interaction step between the agent and the environment.
Args:
net: DQN network
epsilon: value to determine likelihood of taking a random action
device: current device
Returns:
reward, done
"""
state = torch.as_tensor([self.state], device=torch.device(device))
action = self.get_action(state=state, net=net, epsilon=epsilon)
# do step in the environment
new_state, reward, done, _ = self.env.step(action)
exp = Experience(
state=self.state,
action=action,
reward=reward,
done=done,
new_state=new_state,
)
self.replay_buffer.append(exp)
self.state = new_state
if done:
self.state = self.env.reset()
return reward, done
class DQNLightning(pl.LightningModule):
"""Basic DQN Model.
```python
DQNLightning(env="CartPole-v1")
```
"""
@dataclass
class HParams(Serializable):
# Size of the batches.
batch_size: int = 16
# learning rate.
lr: float = 1e-2
# Discount factor.
gamma: float = 0.99
# Interval at which we update the target network.
sync_rate: int = 10
# Capacity of the replay buffer.
replay_size: int = 1000
# How many samples do we use to fill our buffer at the start of training.
warm_start_steps: int = 1000
# The frame at which epsilon should stop decaying.
eps_last_frame: int = 1000
# Starting value of epsilon.
eps_start: float = 1.0
# Final value of epsilon
eps_end: float = 0.01
# Max length of an episode.
episode_length: int = 200
def __init__(self, env: Union[str, gym.Env[np.ndarray, int]], hp: HParams = None) -> None:
super().__init__()
self.hp = hp or self.HParams()
self.save_hyperparameters({"hp": self.hp.to_dict()})
self.env = gym.make(env) if isinstance(env, str) else env
from gym.spaces import Box, Discrete
self.episode_length: Optional[int] = get_max_episode_length(self.env)
if not isinstance(self.env.observation_space, Box):
raise RuntimeError(
f"Only works on envs with Box observation space, not {self.env.observation_space}."
)
if not isinstance(self.env.action_space, Discrete):
raise RuntimeError(
f"Only works on envs with Discrete action space, not {self.env.action_space}."
)
from gym.spaces.utils import flatdim
# TODO: Adapt this to also work with image observations.
obs_size = flatdim(self.env.observation_space)
n_actions = self.env.action_space.n
self.net = DQN(obs_size, n_actions)
self.target_net = DQN(obs_size, n_actions)
self.buffer = ReplayBuffer(self.hp.replay_size)
self.agent = Agent(self.env, self.buffer)
self.total_reward = 0
self.episode_reward = 0
self.trainer: Optional[pl.Trainer]
self.populate(self.hp.warm_start_steps)
def populate(self, steps: int = 1000) -> None:
"""Carries out several random steps through the environment to initially fill up the replay buffer with
experiences.
Args:
steps: number of random steps to populate the buffer with
"""
for i in range(steps):
try:
self.agent.play_step(self.net, epsilon=1.0)
except gym.error.ClosedEnvironmentError as err:
print(f"Unable to add more data to the buffer: env closed after {i} steps.")
break
def forward(self, x: torch.Tensor) -> torch.Tensor:
"""Passes in a state `x` through the network and gets the `q_values` of each action as an output.
Args:
x: environment state
Returns:
q values
"""
output = self.net(x)
return output
def dqn_mse_loss(self, batch: ExperienceBatch[Tensor]) -> torch.Tensor:
"""Calculates the mse loss using a mini batch from the replay buffer.
Args:
batch: current mini batch of replay data
Returns:
loss
"""
states = batch.states
actions = batch.actions
rewards = batch.rewards.type(dtype=torch.float32)
dones = batch.dones
next_states = batch.new_states
values: Tensor = self.net(states)
state_action_values = values.gather(1, actions.unsqueeze(-1)).squeeze(-1)
with torch.no_grad():
next_state_values: Tensor = self.target_net(next_states).max(1)[0]
next_state_values[dones] = 0.0
next_state_values = next_state_values.detach()
expected_state_action_values = next_state_values * self.hp.gamma + rewards
return F.mse_loss(state_action_values, expected_state_action_values)
def training_step(self, batch: ExperienceBatch[Tensor], batch_idx: int) -> Optional[Tensor]:
"""Carries out a single step through the environment to update the replay buffer. Then calculates loss
based on the minibatch received.
Args:
batch: current mini batch of replay data
batch_idx: batch index
Returns:
Training loss and log metrics
"""
device = batch.states.device
epsilon = max(
self.hp.eps_end,
self.hp.eps_start - (self.global_step + 1) / self.hp.eps_last_frame,
)
try:
# step through environment with agent
reward, done = self.agent.play_step(self.net, epsilon, device)
except gym.error.ClosedEnvironmentError:
print(f"Environment closed at batch {batch_idx}")
assert self.trainer is not None
self.trainer.should_stop = True
return
self.episode_reward += reward
# calculates training loss
loss = self.dqn_mse_loss(batch)
if done:
self.total_reward = self.episode_reward
self.episode_reward = 0
# Soft update of target network
if self.global_step % self.hp.sync_rate == 0:
self.target_net.load_state_dict(self.net.state_dict())
self.log_dict(
{
"total_reward": self.total_reward,
"reward": reward,
"steps": float(self.global_step),
},
prog_bar=True,
)
return loss
def configure_optimizers(self) -> List[Optimizer]:
"""Initialize Adam optimizer."""
optimizer = optim.Adam(self.net.parameters(), lr=self.hp.lr)
return [optimizer]
def __dataloader(self) -> DataLoader:
"""Initialize the Replay Buffer dataset used for retrieving experiences."""
dataset = RLDataset(self.buffer, sample_size=self.episode_length or 200)
dataloader = DataLoader(
dataset=dataset,
batch_size=self.hp.batch_size,
sampler=None,
collate_fn=ExperienceBatch.stack,
)
return dataloader
def train_dataloader(self) -> DataLoader:
"""Get train loader."""
return self.__dataloader()
def get_device(self, batch) -> str:
"""Retrieve device currently being used by minibatch."""
return batch[0].device.index if self.on_gpu else "cpu"
@classmethod
def add_model_specific_args(cls, parent_parser: ArgumentParser): # pragma: no-cover
parent_parser.add_arguments(cls.HParams, "hp")
return parent_parser
def get_max_episode_length(env: Union[gym.Env, gym.Wrapper]) -> Optional[int]:
"""Inspects the env to get the max episode length, if it is wrapped with a
`gym.wrappers.TimeLimit` wrapper.
If the env isn't wrapped with a TimeLimit, then returns None.
"""
while isinstance(env, gym.Wrapper):
if isinstance(env, gym.wrappers.TimeLimit):
return env._max_episode_steps
env = env.env
if env.spec is not None:
return env.spec.max_episode_steps
return None
from sequoia import Method
from sequoia.settings.rl import RLEnvironment, RLSetting
from sequoia.settings.rl.objects import Actions, Observations, Rewards
class PlDqnMethod(Method, target_setting=RLSetting):
def __init__(self, hp: DQNLightning.HParams = None) -> None:
super().__init__()
self.hp = hp or DQNLightning.HParams()
self.model: Optional[DQNLightning] = None
def configure(self, setting: RLSetting) -> None:
self.model = None
self.train_max_steps = setting.train_max_steps
def fit(self, train_env: gym.Env, valid_env: gym.Env):
from sequoia.common.gym_wrappers import (
TransformAction,
TransformObservation,
TransformReward,
)
# Our simple DQN model expects to get arrays / integer actions, so we adapt the env a bit
# using some wrappers.
train_env = TransformObservation(train_env, lambda obs: obs.x)
train_env = TransformReward(train_env, lambda rew: rew.y)
if isinstance(train_env.action_space, TypedDictSpace):
actions_type: Type[Actions] = train_env.action_space.dtype
# Make it possible to send just ints to the env, and wrap them up into an Actions object.
train_env = TransformAction(train_env, lambda act: actions_type(y_pred=act))
if self.model is None:
self.model = DQNLightning(env=train_env, hp=self.hp)
trainer = pl.Trainer(
gpus=1,
strategy="dp",
val_check_interval=100,
max_steps=self.train_max_steps,
)
trainer.fit(self.model)
def get_actions(self, observations: Observations, action_space: Discrete) -> Actions:
assert self.model is not None
with torch.no_grad():
obs = torch.as_tensor(
observations.x,
device=torch.device(self.model.device),
dtype=self.model.dtype,
)
v = self.model.forward(obs)
selected_action = v.argmax(-1).cpu().numpy()
return selected_action
def main() -> None:
parser = ArgumentParser()
parser = DQNLightning.add_model_specific_args(parser)
parser.add_argument("--seed", type=int, default=None, help="Random seed")
args = parser.parse_args()
# env = gym.make("CartPole-v1")
# hp: DQNLightning.HParams = args.hp
# model = DQNLightning(env=env, hp=hp)
# pl.seed_everything(args.seed)
# trainer = pl.Trainer(gpus=1, strategy="dp", val_check_interval=100)
# trainer.fit(model)
from sequoia.settings.rl import TraditionalRLSetting, MultiTaskRLSetting
setting = MultiTaskRLSetting(
dataset="CartPole-v1",
nb_tasks=1,
train_max_steps=2_000,
)
setting.prepare_data()
setting.setup()
setting.train_dataloader()
setting.test_dataloader()
method = PlDqnMethod()
from sequoia.common.config import Config
results = setting.apply(method, config=Config(debug=True))
print(results)
return
if __name__ == "__main__":
main()
================================================
FILE: sequoia/methods/pnn/__init__.py
================================================
from .layers import PNNConvLayer, PNNLinearBlock
from .model_rl import PnnA2CAgent
from .model_sl import PnnClassifier
from .pnn_method import PnnMethod
================================================
FILE: sequoia/methods/pnn/layers.py
================================================
import torch.nn as nn
import torch.nn.functional as F
"""
Based on https://github.com/TomVeniat/ProgressiveNeuralNetworks.pytorch
"""
class PNNConvLayer(nn.Module):
def __init__(self, col, depth, n_in, n_out, kernel_size=3):
super(PNNConvLayer, self).__init__()
self.col = col
self.layer = nn.Conv2d(n_in, n_out, kernel_size, stride=2, padding=1)
self.u = nn.ModuleList()
if depth > 0:
self.u.extend(
[nn.Conv2d(n_in, n_out, kernel_size, stride=2, padding=1) for _ in range(col)]
)
def forward(self, inputs):
if not isinstance(inputs, list):
inputs = [inputs]
cur_column_out = self.layer(inputs[-1])
prev_columns_out = [mod(x) for mod, x in zip(self.u, inputs)]
return F.relu(cur_column_out + sum(prev_columns_out))
class PNNLinearBlock(nn.Module):
def __init__(self, col: int, depth: int, n_in: int, n_out: int):
super(PNNLinearBlock, self).__init__()
self.layer = nn.Linear(n_in, n_out)
self.u = nn.ModuleList()
if depth > 0:
self.u.extend([nn.Linear(n_in, n_out) for _ in range(col)])
def forward(self, inputs):
if not isinstance(inputs, list):
inputs = [inputs]
cur_column_out = self.layer(inputs[-1])
prev_columns_out = [mod(x) for mod, x in zip(self.u, inputs)]
return F.relu(cur_column_out + sum(prev_columns_out))
================================================
FILE: sequoia/methods/pnn/model_rl.py
================================================
from typing import List
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import transforms
from .layers import PNNConvLayer, PNNLinearBlock
class PnnA2CAgent(nn.Module):
"""
@article{rusu2016progressive,
title={Progressive neural networks},
author={Rusu, Andrei A and Rabinowitz, Neil C and Desjardins, Guillaume and Soyer, Hubert and Kirkpatrick, James and Kavukcuoglu, Koray and Pascanu, Razvan and Hadsell, Raia},
journal={arXiv preprint arXiv:1606.04671},
year={2016}
}
"""
def __init__(self, arch="mlp", hidden_size=256):
super(PnnA2CAgent, self).__init__()
self.columns_actor = nn.ModuleList([])
self.columns_critic = nn.ModuleList([])
self.columns_conv = nn.ModuleList([])
self.arch = arch
self.hidden_size = hidden_size
# TODO: This doesn't take the observation space into account at all!
# Only works for Pixel Cartpole at the moment.
# Original size 3 x 400 x 600
self.transformation = transforms.Compose(
[
transforms.ToPILImage(),
transforms.Resize(256),
transforms.CenterCrop(224),
transforms.ToTensor(),
]
)
def forward(self, observations):
assert (
self.columns_actor
), "PNN should at least have one column (missing call to `new_task` ?)"
t = observations.task_labels
if self.arch == "mlp":
x = torch.from_numpy(observations.x).unsqueeze(0).float()
inputs_critic = [c[1](c[0](x)) for c in self.columns_critic]
inputs_actor = [c[1](c[0](x)) for c in self.columns_actor]
outputs_critic = []
outputs_actor = []
for i, column in enumerate(self.columns_critic):
outputs_critic.append(column[2](inputs_critic[: i + 1]))
outputs_actor.append(self.columns_actor[i][2](inputs_actor[: i + 1]))
ind_depth = 3
else:
x = self.transfor_img(observations.x).unsqueeze(0).float()
inputs = [c[1](c[0](x)) for c in self.columns_conv]
outputs = []
for i, column in enumerate(self.columns_conv):
outputs.append(column[3](column[2](inputs[: i + 1])))
inputs = outputs
outputs = []
for i, column in enumerate(self.columns_conv):
outputs.append(column[5](column[4](inputs[: i + 1])))
inputs_critic = [c[6](outputs[i]).view(1, -1) for i, c in enumerate(self.columns_conv)]
inputs_actor = inputs_critic[:]
outputs_critic = []
outputs_actor = []
for i, column in enumerate(self.columns_critic):
outputs_critic.append(column[0](inputs_critic[: i + 1]))
outputs_actor.append(self.columns_actor[i][0](inputs_actor[: i + 1]))
ind_depth = 1
critic = []
for i, column in enumerate(self.columns_critic):
critic.append(column[ind_depth](outputs_critic[i]))
actor = []
for i, column in enumerate(self.columns_actor):
actor.append(F.softmax(column[ind_depth](outputs_actor[i]), dim=1))
return critic[t], actor[t]
def new_task(self, device, num_inputs, num_actions=5):
task_id = len(self.columns_actor)
if self.arch == "conv":
sizes = [num_inputs, 32, 64, self.hidden_size]
modules_conv = nn.Sequential()
modules_conv.add_module("Conv1", PNNConvLayer(task_id, 0, sizes[0], sizes[1]))
modules_conv.add_module("MaxPool1", nn.MaxPool2d(3))
modules_conv.add_module("Conv2", PNNConvLayer(task_id, 1, sizes[1], sizes[2]))
modules_conv.add_module("MaxPool2", nn.MaxPool2d(3))
modules_conv.add_module("Conv3", PNNConvLayer(task_id, 2, sizes[2], sizes[3]))
modules_conv.add_module("MaxPool3", nn.MaxPool2d(3))
modules_conv.add_module("globavgpool2d", nn.AdaptiveAvgPool2d((1, 1)))
self.columns_conv.append(modules_conv)
modules_actor = nn.Sequential()
modules_critic = nn.Sequential()
if self.arch == "mlp":
modules_actor.add_module("linAc1", nn.Linear(num_inputs, self.hidden_size))
modules_actor.add_module("relAc", nn.ReLU(inplace=True))
modules_actor.add_module(
"linAc2", PNNLinearBlock(task_id, 1, self.hidden_size, self.hidden_size)
)
modules_actor.add_module("linAc3", nn.Linear(self.hidden_size, num_actions))
if self.arch == "mlp":
modules_critic.add_module("linCr1", nn.Linear(num_inputs, self.hidden_size))
modules_critic.add_module("relCr", nn.ReLU(inplace=True))
modules_critic.add_module(
"linCr2", PNNLinearBlock(task_id, 1, self.hidden_size, self.hidden_size)
)
modules_critic.add_module("linCr3", nn.Linear(self.hidden_size, 1))
self.columns_actor.append(modules_actor)
self.columns_critic.append(modules_critic)
print("Add column of the new task")
def unfreeze_columns(self):
for i, c in enumerate(self.columns_actor):
for params in c.parameters():
params.requires_grad = True
for params in self.columns_critic[i].parameters():
params.requires_grad = True
for i, c in enumerate(self.columns_conv):
for params in c.parameters():
params.requires_grad = True
def freeze_columns(self, skip: List[int] = None):
if skip is None:
skip = []
self.unfreeze_columns()
for i, c in enumerate(self.columns_actor):
if i not in skip:
for params in c.parameters():
params.requires_grad = False
for params in self.columns_critic[i].parameters():
params.requires_grad = False
for i, c in enumerate(self.columns_conv):
if i not in skip:
for params in c.parameters():
params.requires_grad = False
print("Freeze columns from previous tasks")
def parameters(self, task_id):
param = []
for p in self.columns_critic[task_id].parameters():
param.append(p)
for p in self.columns_actor[task_id].parameters():
param.append(p)
if len(self.columns_conv) > 0:
for p in self.columns_conv[task_id].parameters():
param.append(p)
return param
def transfor_img(self, img):
return self.transformation(img)
# return lambda img: imresize(img[35:195].mean(2), (80,80)).astype(np.float32).reshape(1,80,80)/255.
================================================
FILE: sequoia/methods/pnn/model_sl.py
================================================
from typing import List, Optional, Tuple
import torch
import torch.nn as nn
from torch import Tensor
from sequoia.settings import Actions, PassiveEnvironment
from sequoia.settings.sl.incremental.objects import Observations, Rewards
from sequoia.utils.logging_utils import get_logger
from .layers import PNNLinearBlock
logger = get_logger(__name__)
class PnnClassifier(nn.Module):
"""
@article{rusu2016progressive,
title={Progressive neural networks},
author={Rusu, Andrei A and Rabinowitz, Neil C and Desjardins, Guillaume and Soyer, Hubert and Kirkpatrick, James and Kavukcuoglu, Koray and Pascanu, Razvan and Hadsell, Raia},
journal={arXiv preprint arXiv:1606.04671},
year={2016}
}
"""
def __init__(self, n_layers):
super().__init__()
self.n_layers = n_layers
self.columns = nn.ModuleList([])
self.loss = torch.nn.CrossEntropyLoss()
self.device = None
self.n_tasks = 0
self.n_classes_per_task: List[int] = []
def forward(self, observations: Observations):
assert self.columns, "PNN should at least have one column (missing call to `new_task` ?)"
x = observations.x
x = torch.flatten(x, start_dim=1)
task_labels: Optional[Tensor] = observations.task_labels
batch_size = x.shape[0]
n_known_tasks = len(self.columns)
last_known_task_id = n_known_tasks - 1
if task_labels is None:
# TODO: Use random output heads per item?
logger.warning(
f"Encoutering None task labels, assigning a fake random task id for each sample."
)
task_labels = torch.randint(n_known_tasks, (batch_size,))
# task_labels = np.array([None for _ in range(len(x))])
unique_task_labels = set(task_labels.tolist())
# TODO: Debug this:
column_outputs = [
column[0](x) + n_classes_in_task
for n_classes_in_task, column in zip(self.n_classes_per_task, self.columns)
]
inputs = column_outputs
for layer in range(1, self.n_layers):
outputs = []
for i, column in enumerate(self.columns):
outputs.append(column[layer](inputs[: i + 1]))
inputs = outputs
y_logits: Optional[Tensor] = None
task_masks = {}
# BUG: Can't apply PNN to the ClassIncrementalSetting at the moment.
for task_id in unique_task_labels:
task_mask = task_labels == task_id
task_masks[task_id] = task_mask
if task_id is None or task_id >= n_known_tasks:
logger.warning(
f"Task id {task_id} is encountered, but we haven't trained for it yet!"
)
task_id = last_known_task_id
if y_logits is None:
y_logits = inputs[task_id]
else:
y_logits[task_mask] = inputs[task_id][task_mask]
assert y_logits is not None, "Can't get prediction in model PNN"
return y_logits
# def new_task(self, device, num_inputs, num_actions = 5):
def new_task(self, device, sizes: List[int]):
assert len(sizes) == self.n_layers + 1, (
f"Should have the out size for each layer + input size (got {len(sizes)} "
f"sizes but {self.n_layers} layers)."
)
self.n_tasks += 1
# TODO: Fix this to use the actual number of classes per task.
n_outputs = sizes[-1]
self.n_classes_per_task.append(n_outputs)
task_id = len(self.columns)
modules = []
# TODO: Would it also be possible to use convolutional layers here?
for i in range(0, self.n_layers):
modules.append(PNNLinearBlock(col=task_id, depth=i, n_in=sizes[i], n_out=sizes[i + 1]))
new_column = nn.ModuleList(modules).to(device)
self.columns.append(new_column)
self.device = device
print("Add column of the new task")
def freeze_columns(self, skip: List[int] = None):
if skip == None:
skip = []
for i, c in enumerate(self.columns):
for params in c.parameters():
params.requires_grad = True
for i, c in enumerate(self.columns):
if i not in skip:
for params in c.parameters():
params.requires_grad = False
print("Freeze columns from previous tasks")
def shared_step(
self,
batch: Tuple[Observations, Optional[Rewards]],
environment: PassiveEnvironment,
):
"""Shared step used for both training and validation.
Parameters
----------
batch : Tuple[Observations, Optional[Rewards]]
Batch containing Observations, and optional Rewards. When the Rewards are
None, it means that we'll need to provide the Environment with actions
before we can get the Rewards (e.g. image labels) back.
This happens for example when being applied in a Setting which cares about
sample efficiency or training performance, for example.
environment : Environment
The environment we're currently interacting with. Used to provide the
rewards when they aren't already part of the batch (as mentioned above).
Returns
-------
Tuple[Tensor, Dict]
The Loss tensor, and a dict of metrics to be logged.
"""
# Since we're training on a Passive environment, we will get both observations
# and rewards, unless we're being evaluated based on our training performance,
# in which case we will need to send actions to the environments before we can
# get the corresponding rewards (image labels).
observations: Observations = batch[0].to(self.device)
rewards: Optional[Rewards] = batch[1]
# Get the predictions:
logits = self(observations)
y_pred = logits.argmax(-1)
# TODO: PNN is coded for the DomainIncrementalSetting, where the action space
# is the same for each task.
# Get the rewards, if necessary:
if rewards is None:
rewards = environment.send(Actions(y_pred))
image_labels = rewards.y.to(self.device)
# print(logits.size())
loss = self.loss(logits, image_labels)
accuracy = (y_pred == image_labels).sum().float() / len(image_labels)
metrics_dict = {"accuracy": accuracy}
return loss, metrics_dict
def parameters(self, task_id):
return self.columns[task_id].parameters()
================================================
FILE: sequoia/methods/pnn/pnn_method.py
================================================
from argparse import Namespace
from dataclasses import dataclass
from typing import Any, Dict, Mapping, Optional, Union
import gym
import numpy as np
import torch
import tqdm
from gym import spaces
from gym.spaces import Box
from numpy import inf
from simple_parsing import ArgumentParser
from wandb.wandb_run import Run
from sequoia.common import Config
from sequoia.common.hparams import HyperParameters, categorical, log_uniform, uniform
from sequoia.common.spaces import Image
from sequoia.common.transforms.utils import is_image
from sequoia.methods import register_method
from sequoia.settings import (
Actions,
Method,
Observations,
PassiveEnvironment,
RLSetting,
Setting,
TaskIncrementalRLSetting,
TaskIncrementalSLSetting,
)
from sequoia.settings.assumptions import IncrementalAssumption
from sequoia.settings.base import Environment
from sequoia.utils import get_logger
from .model_rl import PnnA2CAgent
from .model_sl import PnnClassifier
logger = get_logger(__name__)
# BUG: Can't apply PNN to the ClassIncrementalSetting at the moment.
# BUG: Can't apply PNN to any RL Settings at the moment.
# (it was hard-coded to handle pixel cartpole).
# TODO: When those bugs get fixed, restore the 'IncrementalAssumption' as the target
# setting.
# TODO: Debugging PNN on Incremental rather than TaskIncremental
@register_method
class PnnMethod(Method, target_setting=IncrementalAssumption):
"""
PNN Method.
Applicable to both RL and SL Settings, as long as there are clear task boundaries
during training (IncrementalAssumption).
"""
@dataclass
class HParams(HyperParameters):
"""Hyper-parameters of the Pnn method."""
# Learning rate of the optimizer. Defauts to 0.0001 when in SL.
learning_rate: float = log_uniform(1e-6, 1e-2, default=2e-4)
num_steps: int = 200 # (only applicable in RL settings.)
# Discount factor (Only used in RL settings).
gamma: float = uniform(0.9, 0.999, default=0.99)
# Number of hidden units (only used in RL settings.)
hidden_size: int = categorical(64, 128, 256, default=256)
# Batch size in SL, and number of parallel environments in RL.
# Defaults to None in RL, and 32 when in SL.
batch_size: Optional[int] = None
# Maximum number of training epochs per task. (only used in SL Settings)
max_epochs_per_task: int = uniform(1, 100, default=10)
def __init__(self, hparams: HParams = None):
# We will create those when `configure` will be called, before training.
self.config: Optional[Config] = None
self.task_id: Optional[int] = 0
self.hparams: Optional[PnnMethod.HParams] = hparams
self.model: Union[PnnA2CAgent, PnnClassifier]
self.optimizer: torch.optim.Optimizer
def configure(self, setting: Setting):
"""Called before the method is applied on a setting (before training).
You can use this to instantiate your model, for instance, since this is
where you get access to the observation & action spaces.
"""
input_space: Box = setting.observation_space["x"]
# For now all Settings have `Discrete` (i.e. classification) action spaces.
action_space: spaces.Discrete = setting.action_space
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
self.num_actions = action_space.n
self.num_inputs = np.prod(input_space.shape)
self.added_tasks = []
if not (setting.task_labels_at_train_time and setting.task_labels_at_test_time):
logger.warning(
RuntimeWarning(
"TODO: PNN doesn't have 'propper' task inference, and task labels "
"arent always available! This will use an output head at random."
)
)
if isinstance(setting, RLSetting):
# If we're applied to an RL setting:
# Used these as the default hparams in RL:
self.hparams = self.hparams or self.HParams()
assert self.hparams
self.train_steps_per_task = setting.steps_per_task
# We want a batch_size of None, i.e. only one observation at a time.
setting.batch_size = None
self.num_steps = self.hparams.num_steps
# Otherwise, we can train basically as long as we want on each task.
self.loss_function = {
"gamma": self.hparams.gamma,
}
if is_image(setting.observation_space.x):
# Observing pixel input.
self.arch = "conv"
else:
# Observing state input (e.g. the 4 floats in cartpole rather than images)
self.arch = "mlp"
self.model = PnnA2CAgent(self.arch, self.hparams.hidden_size)
else:
# If we're applied to a Supervised Learning setting:
# Used these as the default hparams in SL:
self.hparams = self.hparams or self.HParams(
learning_rate=0.0001,
batch_size=32,
)
if self.hparams.batch_size is None:
self.hparams.batch_size = 32
# Set the batch size on the setting.
setting.batch_size = self.hparams.batch_size
# For now all Settings on the supervised side of the tree have images as
# inputs, so the observation spaces are of type `Image` (same as Box, but with
# additional `h`, `w`, `c` and `b` attributes).
assert isinstance(input_space, Image)
assert (
setting.increment == setting.test_increment
), "Assuming same number of classes per task for training and testing."
# TODO: (@lebrice): Temporarily 'fixing' this by making it so each output
# head has as many outputs as there are classes in total, which might make
# no sense, but currently works.
# It would be better to refactor this so that each output head can have only
# as many outputs as is required, and then reshape / offset the predictions.
n_outputs = setting.increment
n_outputs = setting.action_space.n
self.layer_size = [self.num_inputs, 256, n_outputs]
self.model = PnnClassifier(
n_layers=len(self.layer_size) - 1,
)
def on_task_switch(self, task_id: Optional[int]) -> None:
"""Called when switching tasks in a CL setting."""
# This method gets called if task boundaries are known in the current
# setting. Furthermore, if task labels are available, task_id will be
# the index of the new task. If not, task_id will be None.
# For example, you could do something like this:
# self.model.current_task = task_id
if self.training:
self.model.freeze_columns([task_id])
if task_id not in self.added_tasks:
if isinstance(self.model, PnnA2CAgent):
self.model.new_task(
device=self.device,
num_inputs=self.num_inputs,
num_actions=self.num_actions,
)
else:
self.model.new_task(device=self.device, sizes=self.layer_size)
self.added_tasks.append(task_id)
self.task_id = task_id
def set_optimizer(self):
self.optimizer = torch.optim.Adam(
self.model.parameters(self.task_id),
lr=self.hparams.learning_rate,
)
def get_actions(self, observations: Observations, action_space: spaces.Space) -> Actions:
"""Get a batch of predictions (aka actions) for the given observations."""
observations = observations.to(self.device)
with torch.no_grad():
if isinstance(self.model, PnnA2CAgent):
predictions = self.model(observations)
_, logit = predictions
# get the predicted action:
action = torch.argmax(logit).item()
else:
logits = self.model(observations)
# Get the predicted classes
y_pred = logits.argmax(dim=-1).cpu().numpy()
action = y_pred
assert action in action_space, (action, action_space)
return action
def fit(self, train_env: Environment, valid_env: Environment):
"""Train and validate this method using the "environments" for the current task.
NOTE: `train_env` and `valid_env` are both `gym.Env`s as well as `DataLoader`s.
This means that if you want to write a "regular" SL training loop, you totally
can, and if you want to write you RL-style training loop, you can also do that.
"""
if isinstance(train_env.unwrapped, PassiveEnvironment):
self.fit_sl(train_env, valid_env)
else:
self.fit_rl(train_env, valid_env)
def fit_rl(self, train_env: gym.Env, valid_env: gym.Env):
"""Training loop for Reinforcement Learning (a.k.a. "active") environment."""
"""
base on https://towardsdatascience.com/understanding-actor-critic-methods-931b97b6df3f
"""
if self.model is None:
self.model = PnnA2CAgent(self.arch, self.hparams.hidden_size)
assert isinstance(self.model, PnnA2CAgent)
self.set_optimizer()
assert self.hparams
# self.model.float()
all_lengths = []
average_lengths = []
all_rewards = []
entropy_term = 0
for episode in range(self.train_steps_per_task):
values = []
rewards = []
log_probs = []
state = train_env.reset()
for steps in range(self.num_steps):
value, policy_dist = self.model(state)
value = value.item()
dist = policy_dist.detach().numpy()
action = np.random.choice(self.num_actions, p=np.squeeze(dist))
log_prob = torch.log(policy_dist.squeeze(0)[action])
entropy = -np.sum(np.mean(dist) * np.log(dist))
new_state, reward, done, _ = train_env.step(action)
rewards.append(reward.y)
values.append(value)
log_probs.append(log_prob)
entropy_term += entropy
state = new_state
if done or steps == self.num_steps - 1:
Qval, _ = self.model(state)
Qval = Qval.item()
all_rewards.append(np.sum(rewards))
all_lengths.append(steps)
average_lengths.append(np.mean(all_lengths[-10:]))
if episode % 10 == 0:
print(
f"episode: {episode}, "
f"reward: {np.sum(rewards)}, "
f"total length: {steps}, "
f"average length: {average_lengths[-1]}"
)
break
Qvals = np.zeros_like(values)
for t in reversed(range(len(rewards))):
Qval = rewards[t] + self.hparams.gamma * Qval
Qvals[t] = Qval
# update actor critic
values_tensor = torch.as_tensor(values, dtype=torch.float)
Qvals = torch.as_tensor(Qvals, dtype=torch.float)
log_probs_tensor = torch.stack(log_probs)
advantage = Qvals - values_tensor
actor_loss = (-log_probs_tensor * advantage).mean()
critic_loss = 0.5 * advantage.pow(2).mean()
ac_loss = actor_loss + critic_loss + 0.001 * entropy_term
self.optimizer.zero_grad()
ac_loss.backward()
self.optimizer.step()
def fit_sl(self, train_env: PassiveEnvironment, valid_env: PassiveEnvironment):
"""Train on a Supervised Learning (a.k.a. "passive") environment."""
observations: TaskIncrementalSLSetting.Observations = train_env.reset()
cuda_observations = observations.to(self.device)
assert isinstance(self.model, PnnClassifier)
assert self.hparams
self.set_optimizer()
best_val_loss = inf
best_epoch = 0
for epoch in range(self.hparams.max_epochs_per_task):
self.model.train()
print(f"Starting epoch {epoch}")
# Training loop:
with torch.set_grad_enabled(True), tqdm.tqdm(train_env) as train_pbar:
postfix: Dict[str, Any] = {}
train_pbar.set_description(f"Training Epoch {epoch}")
for i, batch in enumerate(train_pbar):
loss, metrics_dict = self.model.shared_step(
batch,
environment=train_env,
)
self.optimizer.zero_grad()
loss.backward()
self.optimizer.step()
postfix.update(metrics_dict)
train_pbar.set_postfix(postfix)
# Validation loop:
self.model.eval()
with torch.set_grad_enabled(False), tqdm.tqdm(valid_env) as val_pbar:
postfix = {}
val_pbar.set_description(f"Validation Epoch {epoch}")
epoch_val_loss = 0.0
for i, batch in enumerate(val_pbar):
batch_val_loss, metrics_dict = self.model.shared_step(
batch,
environment=valid_env,
)
epoch_val_loss += batch_val_loss
postfix.update(metrics_dict, val_loss=epoch_val_loss)
val_pbar.set_postfix(postfix)
@classmethod
def add_argparse_args(cls, parser: ArgumentParser) -> None:
parser.add_arguments(cls.HParams, dest="hparams", default=None)
@classmethod
def from_argparse_args(cls, args: Namespace) -> "PnnMethod":
hparams: PnnMethod.HParams = args.hparams
method = cls(hparams=hparams)
return method
def get_search_space(self, setting: Setting) -> Mapping[str, Union[str, Dict]]:
"""Returns the search space to use for HPO in the given Setting.
Parameters
----------
setting : Setting
The Setting on which the run of HPO will take place.
Returns
-------
Mapping[str, Union[str, Dict]]
An orion-formatted search space dictionary, mapping from hyper-parameter
names (str) to their priors (str), or to nested dicts of the same form.
"""
return self.hparams.get_orion_space()
def adapt_to_new_hparams(self, new_hparams: Dict[str, Any]) -> None:
"""Adapts the Method when it receives new Hyper-Parameters to try for a new run.
It is required that this method be implemented if you want to perform HPO sweeps
with Orion.
Parameters
----------
new_hparams : Dict[str, Any]
The new hyper-parameters being recommended by the HPO algorithm. These will
have the same structure as the search space.
"""
# Here we overwrite the corresponding attributes with the new suggested values
# leaving other fields unchanged.
# NOTE: These new hyper-paramers will be used in the next run in the sweep,
# since each call to `configure` will create a new Model.
self.hparams = self.hparams.replace(**new_hparams)
def setup_wandb(self, run: Run) -> None:
"""Called by the Setting when using Weights & Biases, after `wandb.init`.
This method is here to provide Methods with the opportunity to log some of their
configuration options or hyper-parameters to wandb.
NOTE: The Setting has already set the `"setting"` entry in the `wandb.config` by
this point.
Parameters
----------
run : wandb.Run
Current wandb Run.
"""
run.config["hparams"] = self.hparams.to_dict()
def main_rl():
"""Applies the PnnMethod in a RL Setting."""
parser = ArgumentParser(description=__doc__, add_dest_to_option_strings=False)
Config.add_argparse_args(parser, dest="config")
PnnMethod.add_argparse_args(parser, dest="method")
setting = TaskIncrementalRLSetting(
dataset="cartpole",
nb_tasks=2,
train_task_schedule={
0: {"gravity": 10, "length": 0.3},
1000: {"gravity": 10, "length": 0.5},
},
)
args = parser.parse_args()
config: Config = Config.from_argparse_args(args, dest="config")
method: PnnMethod = PnnMethod.from_argparse_args(args, dest="method")
method.config = config
# 2. Creating the Method
# method = ImproveMethod()
# 3. Applying the method to the setting:
results = setting.apply(method, config=config)
print(results.summary())
print(f"objective: {results.objective}")
return results
def main_sl():
"""Applies the PnnMethod in a SL Setting."""
parser = ArgumentParser(description=__doc__, add_dest_to_option_strings=False)
# Add arguments for the Setting
# TODO: PNN is coded for the DomainIncrementalSetting, where the action space
# is the same for each task.
# parser.add_arguments(DomainIncrementalSetting, dest="setting")
parser.add_arguments(TaskIncrementalSLSetting, dest="setting")
# TaskIncrementalSLSetting.add_argparse_args(parser, dest="setting")
Config.add_argparse_args(parser, dest="config")
# Add arguments for the Method:
PnnMethod.add_argparse_args(parser, dest="method")
args = parser.parse_args()
# setting: TaskIncrementalSLSetting = args.setting
setting: TaskIncrementalSLSetting = TaskIncrementalSLSetting.from_argparse_args(
# setting: DomainIncrementalSetting = DomainIncrementalSetting.from_argparse_args(
args,
dest="setting",
)
config: Config = Config.from_argparse_args(args, dest="config")
method: PnnMethod = PnnMethod.from_argparse_args(args, dest="method")
method.config = config
results = setting.apply(method, config=config)
print(results.summary())
return results
if __name__ == "__main__":
# Run RL Setting
main_sl()
# Run SL Setting
# main_rl()
================================================
FILE: sequoia/methods/random_baseline.py
================================================
"""A random baseline Method that gives random predictions for any input.
Should be applicable to any Setting.
"""
from argparse import Namespace
from typing import Any, Dict, Mapping, Optional, Union
import gym
import numpy as np
import tqdm
from simple_parsing import ArgumentParser
from torch import Tensor
from sequoia.methods import register_method
from sequoia.settings import Setting
from sequoia.settings.base import Actions, Environment, Method, Observations
from sequoia.settings.sl import SLSetting
from sequoia.utils import get_logger
logger = get_logger(__name__)
@register_method
class RandomBaselineMethod(Method, target_setting=Setting):
"""Baseline method that gives random predictions for any given setting.
This method doesn't have a model or any parameters. It just returns a random
action for every observation.
"""
def __init__(self):
self.max_train_episodes: Optional[int] = None
def configure(self, setting: Setting):
"""Called before the method is applied on a setting (before training).
You can use this to instantiate your model, for instance, since this is
where you get access to the observation & action spaces.
"""
if isinstance(setting, SLSetting):
# Being applied in SL, we will only do one 'epoch" (a.k.a. "episode").
self.max_train_episodes = 1
def fit(
self,
train_env: Environment,
valid_env: Environment,
):
episodes = 0
with tqdm.tqdm(desc="training") as train_pbar:
while not train_env.is_closed():
for i, batch in enumerate(train_env):
if isinstance(batch, Observations):
observations, rewards = batch, None
else:
observations, rewards = batch
batch_size = observations.x.shape[0]
y_pred = train_env.action_space.sample()
# If we're at the last batch, it might have a different size, so w
# give only the required number of values.
if isinstance(y_pred, (np.ndarray, Tensor)):
if y_pred.shape[0] != batch_size:
y_pred = y_pred[:batch_size]
if rewards is None:
rewards = train_env.send(y_pred)
train_pbar.set_postfix({"Episode": episodes, "Step": i})
train_pbar.update()
# train as you usually would.
if train_env.is_closed():
break
episodes += 1
if self.max_train_episodes and episodes >= self.max_train_episodes:
train_env.close()
break
def get_actions(self, observations: Observations, action_space: gym.Space) -> Actions:
return action_space.sample()
def get_search_space(self, setting: Setting) -> Mapping[str, Union[str, Dict]]:
"""Returns the search space to use for HPO in the given Setting.
Parameters
----------
setting : Setting
The Setting on which the run of HPO will take place.
Returns
-------
Mapping[str, Union[str, Dict]]
An orion-formatted search space dictionary, mapping from hyper-parameter
names (str) to their priors (str), or to nested dicts of the same form.
"""
logger.warning(
UserWarning(
"Hey, you seem to be trying to perform an HPO sweep using the random "
"baseline method?"
)
)
# Assuming that this is just used for debugging, so giving back a simple space.
return {"foo": "choices([0, 1, 2])"}
def adapt_to_new_hparams(self, new_hparams: Dict[str, Any]) -> None:
"""Adapts the Method when it receives new Hyper-Parameters to try for a new run.
It is required that this method be implemented if you want to perform HPO sweeps
with Orion.
Parameters
----------
new_hparams : Dict[str, Any]
The new hyper-parameters being recommended by the HPO algorithm. These will
have the same structure as the search space.
"""
foo = new_hparams["foo"]
print(f"Using new suggested value {foo}")
@classmethod
def add_argparse_args(cls, parser: ArgumentParser):
pass
@classmethod
def from_argparse_args(cls, args: Namespace):
return cls()
if __name__ == "__main__":
RandomBaselineMethod.main()
================================================
FILE: sequoia/methods/random_baseline_test.py
================================================
# TODO: Create a sort of reusable fixture for the Method
# TODO: Figure out how to ACTUALLY set the checkpoint dir in pytorch-lightning!
from typing import List
from sequoia.settings import all_settings
from .random_baseline import RandomBaselineMethod
# Use 'Method' as an alias for the actual Method cusblass under test. (since at
# the moment quite a few tests share some common code.
# List of datasets that are currently supported.
supported_datasets: List[str] = [
"mnist",
"fashionmnist",
"cifar10",
"cifar100",
"kmnist",
"cartpole",
]
def test_is_applicable_to_all_settings():
settings = RandomBaselineMethod.get_applicable_settings()
assert set(settings) == set(all_settings)
================================================
FILE: sequoia/methods/stable_baselines3_methods/__init__.py
================================================
from .a2c import A2CMethod, A2CModel
from .base import SB3BaseHParams, StableBaselines3Method
from .ddpg import DDPGMethod, DDPGModel
from .dqn import DQNMethod, DQNModel
from .off_policy_method import OffPolicyMethod, OffPolicyModel
from .on_policy_method import OnPolicyMethod, OnPolicyModel
from .policy_wrapper import PolicyWrapper
from .ppo import PPOMethod, PPOModel
from .sac import SACMethod, SACModel
from .td3 import TD3Method, TD3Model
================================================
FILE: sequoia/methods/stable_baselines3_methods/a2c.py
================================================
""" Method that uses the A2C model from stable-baselines3 and targets the RL
settings in the tree.
"""
import math
from dataclasses import dataclass
from typing import Callable, ClassVar, Dict, Mapping, Optional, Type, Union
import gym
import torch
from gym import spaces
from simple_parsing import mutable_field
from stable_baselines3.a2c import A2C
from sequoia.common.hparams import log_uniform, uniform
from sequoia.methods import register_method
from sequoia.settings.rl import ContinualRLSetting
from sequoia.utils import get_logger
from .on_policy_method import OnPolicyMethod, OnPolicyModel
logger = get_logger(__name__)
class A2CModel(A2C, OnPolicyModel):
"""Advantage Actor Critic (A2C) model imported from stable-baselines3.
Paper: https://arxiv.org/abs/1602.01783
Code: The SB3 implementation borrows code from
https://github.com/ikostrikov/pytorch-a2c-ppo-acktr-gail and
and Stable Baselines (https://github.com/hill-a/stable-baselines)
Introduction to A2C:
https://hackernoon.com/intuitive-rl-intro-to-advantage-actor-critic-a2c-4ff545978752
"""
@dataclass
class HParams(OnPolicyModel.HParams):
"""Hyper-parameters of the A2C Model.
TODO: Set actual 'good' priors for these hyper-parameters, as these were set
somewhat arbitrarily. (They do however use the same defaults as in SB3).
"""
# learning rate for the optimizer, it can be a function of the current
# progress remaining (from 1 to 0)
learning_rate: Union[float, Callable] = log_uniform(1e-7, 1e-2, default=7e-4)
# The number of steps to run for each environment per update (i.e. batch size
# is n_steps * n_env where n_env is number of environment copies running in
# parallel)
# NOTE: Default value here is much lower than in PPO, which might indicate
# that this A2C is more "on-policy"? (i.e. that it requires data to be super
# "fresh")?
n_steps: int = uniform(3, 64, default=5, discrete=True)
# Discount factor
gamma: float = 0.99
# gamma: float = uniform(0.9, 0.9999, default=0.99)
# Factor for trade-off of bias vs variance for Generalized Advantage Estimator.
# Equivalent to classic advantage when set to 1.
gae_lambda: float = 1.0
# gae_lambda: float = uniform(0.5, 1.0, default=1.0)
# Entropy coefficient for the loss calculation
ent_coef: float = 0.0
# ent_coef: float = uniform(0.0, 1.0, default=0.0)
# Value function coefficient for the loss calculation
vf_coef: float = 0.5
# vf_coef: float = uniform(0.01, 1.0, default=0.5)
# The maximum value for the gradient clipping
max_grad_norm: float = 0.5
# max_grad_norm: float = uniform(0.1, 10, default=0.5)
# RMSProp epsilon. It stabilizes square root computation in denominator of
# RMSProp update.
rms_prop_eps: float = 1e-5
# rms_prop_eps: float = log_uniform(1e-7, 1e-3, default=1e-5)
# Whether to use RMSprop (default) or Adam as optimizer
use_rms_prop: bool = True
# use_rms_prop: bool = categorical(True, False, default=True)
# Whether to use generalized State Dependent Exploration (gSDE) instead of
# action noise exploration (default: False)
use_sde: bool = False
# use_sde: bool = categorical(True, False, default=False)
# Sample a new noise matrix every n steps when using gSDE.
# Default: -1 (only sample at the beginning of the rollout)
sde_sample_freq: int = -1
# sde_sample_freq: int = categorical(-1, 1, 5, 10, default=-1)
# Whether to normalize or not the advantage
normalize_advantage: bool = False
# normalize_advantage: bool = categorical(True, False, default=False)
# The log location for tensorboard (if None, no logging)
tensorboard_log: Optional[str] = None
# # Whether to create a second environment that will be used for evaluating the
# # agent periodically. (Only available when passing string for the environment)
# create_eval_env: bool = False
# # Additional arguments to be passed to the policy on creation
# policy_kwargs: Optional[Dict[str, Any]] = None
# The verbosity level: 0 no output, 1 info, 2 debug
verbose: int = 0
# Seed for the pseudo random generators
seed: Optional[int] = None
# Device (cpu, cuda, ...) on which the code should be run.
# Setting it to auto, the code will be run on the GPU if possible.
device: Union[torch.device, str] = "auto"
# :param _init_setup_model: Whether or not to build the network at the
# creation of the instance
# _init_setup_model: bool = True
@register_method
@dataclass
class A2CMethod(OnPolicyMethod):
"""Method that uses the A2C model from stable-baselines3."""
# changing the 'name' in this case here, because the default name would be
# 'a_2_c'.
name: ClassVar[str] = "a2c"
Model: ClassVar[Type[A2CModel]] = A2CModel
# Hyper-parameters of the A2C model.
hparams: A2CModel.HParams = mutable_field(A2CModel.HParams)
def configure(self, setting: ContinualRLSetting):
super().configure(setting=setting)
if setting.steps_per_phase:
if self.hparams.n_steps > setting.steps_per_phase:
self.hparams.n_steps = math.ceil(0.1 * setting.steps_per_phase)
logger.info(
f"Capping the n_steps to 10% of step budget length: " f"{self.hparams.n_steps}"
)
# NOTE: We limit the number of trainign steps per task, such that we never
# attempt to fill the buffer using more samples than the environment allows.
self.train_steps_per_task = min(
self.train_steps_per_task,
setting.steps_per_phase - self.hparams.n_steps - 1,
)
logger.info(f"Limitting training steps per task to {self.train_steps_per_task}")
def create_model(self, train_env: gym.Env, valid_env: gym.Env) -> A2CModel:
return self.Model(env=train_env, **self.hparams.to_dict())
def fit(self, train_env: gym.Env, valid_env: gym.Env):
super().fit(train_env=train_env, valid_env=valid_env)
def get_actions(
self, observations: ContinualRLSetting.Observations, action_space: spaces.Space
) -> ContinualRLSetting.Actions:
return super().get_actions(
observations=observations,
action_space=action_space,
)
def on_task_switch(self, task_id: Optional[int]) -> None:
"""Called when switching tasks in a CL setting.
If task labels are available, `task_id` will correspond to the index of
the new task. Otherwise, if task labels aren't available, `task_id` will
be `None`.
todo: use this to customize how your method handles task transitions.
"""
super().on_task_switch(task_id=task_id)
def get_search_space(self, setting: ContinualRLSetting) -> Mapping[str, Union[str, Dict]]:
search_space = super().get_search_space(setting)
if isinstance(setting.action_space, spaces.Discrete):
# From stable_baselines3/common/base_class.py", line 170:
# > Generalized State-Dependent Exploration (gSDE) can only be used with
# continuous actions
# Therefore we remove related entries in the search space, so they keep
# their default values.
search_space.pop("use_sde", None)
search_space.pop("sde_sample_freq", None)
return search_space
if __name__ == "__main__":
results = A2CMethod.main()
print(results)
================================================
FILE: sequoia/methods/stable_baselines3_methods/a2c_test.py
================================================
from typing import ClassVar, Type
from .a2c import A2CMethod, A2CModel
from .base import BaseAlgorithm, StableBaselines3Method
from .base_test import DiscreteActionSpaceMethodTests
class TestA2C(DiscreteActionSpaceMethodTests):
Method: ClassVar[Type[StableBaselines3Method]] = A2CMethod
Model: ClassVar[Type[BaseAlgorithm]] = A2CModel
================================================
FILE: sequoia/methods/stable_baselines3_methods/base.py
================================================
""" Example of creating an A2C agent using the simplebaselines3 package.
See https://stable-baselines3.readthedocs.io/en/master/guide/install.html
"""
from abc import ABC
from dataclasses import dataclass
from typing import Any, Callable, ClassVar, Dict, List, Mapping, Optional, Type, Union
import gym
import torch
from gym import spaces
from simple_parsing import choice, mutable_field
from simple_parsing.helpers.hparams import HyperParameters, categorical, log_uniform
from stable_baselines3.common.base_class import BaseAlgorithm, BasePolicy, MaybeCallback
# from stable_baselines3.common.vec_env.obs_dict_wrapper import ObsDictWrapper
from wandb.wandb_run import Run
from sequoia.common.transforms.utils import is_image
from sequoia.settings import Method, Setting
from sequoia.settings.rl.continual import ContinualRLSetting
from sequoia.utils.logging_utils import get_logger
from sequoia.utils.serialization import register_decoding_fn
logger = get_logger(__name__)
# "Patch" the _wrap_env function of the BaseAlgorithm class of
# stable_baselines, to make it recognize the VectorEnv from gym.vector as a
# vectorized environment.
# Stable-Baselines3 has a lot of duplicated code from openai gym
# def _wrap_env(env: GymEnv, verbose: int = 0, monitor_wrapper: bool = True) -> VecEnv:
# """ "
# Wrap environment with the appropriate wrappers if needed.
# For instance, to have a vectorized environment
# or to re-order the image channels.
# :param env:
# :param verbose:
# :param monitor_wrapper: Whether to wrap the env in a ``Monitor`` when possible.
# :return: The wrapped environment.
# """
# # if not isinstance(env, VecEnv):
# if not (
# isinstance(env, (VecEnv, VectorEnv))
# or isinstance(env.unwrapped, (VecEnv, VectorEnv))
# ):
# # if not is_wrapped(env, Monitor) and monitor_wrapper:
# if monitor_wrapper and not (
# is_wrapped(env, Monitor)
# or is_wrapped(env, gym.wrappers.Monitor)
# or has_wrapper(env, gym.wrappers.Monitor)
# ):
# if verbose >= 1:
# print("Wrapping the env with a `Monitor` wrapper")
# env = Monitor(env)
# if verbose >= 1:
# print("Wrapping the env in a DummyVecEnv.")
# env = DummyVecEnv([lambda: env])
# if is_image_space(env.observation_space) and not is_wrapped(env, VecTransposeImage):
# if verbose >= 1:
# print("Wrapping the env in a VecTransposeImage.")
# env = VecTransposeImage(env)
# # check if wrapper for dict support is needed when using HER
# if isinstance(env.observation_space, gym.spaces.dict.Dict):
# env = ObsDictWrapper(env)
# return env
# BaseAlgorithm._wrap_env = staticmethod(_wrap_env)
class RemoveInfoWrapper(gym.Wrapper):
"""Wrapper used to remove the 'info' dict, since there seems to be a bug in sb3
whenever there is something in the 'info' dict.
"""
def step(self, action):
obs, rewards, done, info = self.env.step(action)
info = {}
return obs, rewards, done, info
@dataclass
class SB3BaseHParams(HyperParameters):
"""Hyper-parameters of a model from the `stable_baselines3` package.
The command-line arguments for these are created with simple-parsing.
"""
# The policy model to use (MlpPolicy, CnnPolicy, ...)
policy: Optional[Union[str, Type[BasePolicy]]] = choice("MlpPolicy", "CnnPolicy", default=None)
# # The base policy used by this method
# policy_base: Type[BasePolicy]
# learning rate for the optimizer, it can be a function of the current
# progress remaining (from 1 to 0)
learning_rate: Union[float, Callable] = log_uniform(1e-7, 1e-2, default=1e-4)
# Additional arguments to be passed to the policy on creation
policy_kwargs: Optional[Dict[str, Any]] = None
# the log location for tensorboard (if None, no logging)
tensorboard_log: Optional[str] = None
# The verbosity level: 0 none, 1 training information, 2 debug
verbose: int = 1
# Device on which the code should run. By default, it will try to use a Cuda
# compatible device and fallback to cpu if it is not possible.
device: Union[torch.device, str] = "auto"
# # Whether the algorithm supports training with multiple environments (as in A2C)
# support_multi_env: bool = False
# Whether to create a second environment that will be used for evaluating
# the agent periodically. (Only available when passing string for the
# environment)
create_eval_env: bool = False
# # When creating an environment, whether to wrap it or not in a Monitor wrapper.
# monitor_wrapper: bool = True
# Seed for the pseudo random generators
seed: Optional[int] = None
# # Whether to use generalized State Dependent Exploration (gSDE) instead of
# action noise exploration (default: False)
# use_sde: bool = False
# # Sample a new noise matrix every n steps when using gSDE Default: -1
# (only sample at the beginning of the rollout)
# sde_sample_freq: int = -1
# Wether to clear the experience buffer at the beginning of a new task.
# NOTE: We use to_dict here so that it doesn't get passed do the Policy class.
clear_buffers_between_tasks: bool = categorical(True, False, default=False, to_dict=False)
@dataclass
class StableBaselines3Method(Method, ABC, target_setting=ContinualRLSetting):
"""Base class for the methods that use models from the stable_baselines3
repo.
"""
family: ClassVar[str] = "sb3"
# Class variable that represents what kind of Model will be used.
# (This is just here so we can easily create one Method class per model type
# by just changing this class attribute.)
Model: ClassVar[Type[BaseAlgorithm]]
# HyperParameters of the Method.
hparams: SB3BaseHParams = mutable_field(SB3BaseHParams)
# The number of training steps to run per task.
# NOTE: This shouldn't be set to more than the task length when applying this method
# on a ContinualRLSetting, because we don't currently have a way of "resetting"
# the nonstationarity in the environment, and there is only one task,
# therefore if we trained for say 10 million steps, while the
# non-stationarity only lasts for 10_000 steps, we'd have seen an almost
# stationary distribution, since the environment would have stopped changing after
# 10_000 steps.
# train_steps_per_task: int = 10_000
# callback(s) called at every step with state of the algorithm.
callback: MaybeCallback = None
# The number of timesteps before logging.
log_interval: int = 100
# the name of the run for TensorBoard logging
tb_log_name: str = "run"
# Evaluate the agent every ``eval_freq`` timesteps (this may vary a little)
# TODO: Log the evaluations to wandb.
eval_freq: int = 5_000
# Number of episode to evaluate the agent
n_eval_episodes = 5
# Path to a folder where the evaluations will be saved
eval_log_path: Optional[str] = None
def __post_init__(self):
self.model: Optional[BaseAlgorithm] = None
# Extra wrappers to add to the train_env and valid_env before passing
# them to the `learn` method from stable-baselines3.
import operator
from functools import partial
from sequoia.common.gym_wrappers import TransformObservation, TransformReward
self.extra_train_wrappers: List[Callable[[gym.Env], gym.Env]] = [
partial(TransformObservation, f=operator.itemgetter("x")),
# partial(TransformAction, f=operator.itemgetter("y_pred"),
partial(TransformReward, f=operator.itemgetter("y")),
RemoveInfoWrapper,
]
self.extra_valid_wrappers: List[Callable[[gym.Env], gym.Env]] = [
partial(TransformObservation, f=operator.itemgetter("x")),
partial(TransformReward, f=operator.itemgetter("y")),
RemoveInfoWrapper,
]
# Number of timesteps to train on for each task.
self.total_timesteps_per_task: int = 0
self.train_env: gym.Env = None
self.valid_env: gym.Env = None
def configure(self, setting: ContinualRLSetting):
# Delete the model, if present.
self.model = None
# For now, we don't batch the space because stablebaselines3 will add an
# additional batch dimension if we do.
# TODO: Still need to debug the batching stuff with stablebaselines,
# some methods support it, some don't, and it doesn't recognize
# VectorEnvs from gym.
setting.batch_size = None
# BUG: Need to fix an issue when using the CnnPolicy and Atary envs, the
# input shape isn't what they expect (only 2 channels instead of three
# apparently.)
# from sequoia.common.transforms import Transforms
# NOTE: Important to not use any transforms, since the SB3 methods want to get
# the 'raw' np.uint8 image as an input.
transforms = [
# Transforms.to_tensor,
# Transforms.three_channels,
# Transforms.channels_first_if_needed,
]
setting.transforms = transforms
setting.train_transforms = transforms
setting.val_transforms = transforms
setting.test_transforms = transforms
if self.hparams.policy is None:
if is_image(setting.observation_space.x):
self.hparams.policy = "CnnPolicy"
else:
self.hparams.policy = "MlpPolicy"
logger.debug(f"Will use {self.hparams.policy} as the policy.")
# TODO: Double check that some settings might not impose a limit on
# number of training steps per environment (e.g. task-incremental RL?)
if setting.steps_per_phase:
# if self.train_steps_per_task > setting.steps_per_phase:
# warnings.warn(
# RuntimeWarning(
# f"Can't train for the requested {self.train_steps_per_task} "
# f"steps, since we're (currently) only allowed a maximum of "
# f"{setting.steps_per_phase} steps.)"
# )
# )
# Use as many training steps as possible.
self.train_steps_per_task = setting.steps_per_phase - 1
# Otherwise, we can train basically as long as we want on each task.
def create_model(self, train_env: gym.Env, valid_env: gym.Env) -> BaseAlgorithm:
"""Create a Model given the training and validation environments."""
model_kwargs = self.hparams.to_dict()
assert "clear_buffers_between_tasks" not in model_kwargs
return self.Model(env=train_env, **model_kwargs)
def fit(self, train_env: gym.Env, valid_env: gym.Env):
# Remove the extra information that the Setting gives us.
for wrapper in self.extra_train_wrappers:
train_env = wrapper(train_env)
for wrapper in self.extra_valid_wrappers:
valid_env = wrapper(valid_env)
if self.model is None:
self.model = self.create_model(train_env, valid_env)
else:
# TODO: "Adapt"/re-train the model on the new environment.
# BUG: In the MT10 benchmark, the last entry in the observation space is
# very slightly different, which prevents us from doing this:
"""
>>> env.observation_space.low
array([-0.525 , 0.348 , -0.0525, -1. , -inf, -inf, -inf,
-inf, -inf, -inf, -inf, -inf, -inf, -inf,
-inf, -inf, -inf, -inf, -0.525 , 0.348 , -0.0525,
-1., -inf, -inf, -inf, -inf, -inf, -inf,
-inf, -inf, -inf, -inf, -inf, -inf, -inf,
-inf, -0.1 , 0.8 , 0.01 ], dtype=float32)
>>> observation_space.low
array([-0.525 , 0.348 , -0.0525, -1. , -inf, -inf, -inf,
-inf, -inf, -inf, -inf, -inf, -inf, -inf,
-inf, -inf, -inf, -inf, -0.525 , 0.348 , -0.0525,
-1., -inf, -inf, -inf, -inf, -inf, -inf,
-inf, -inf, -inf, -inf, -inf, -inf, -inf,
-inf, -0.1 , 0.8 , 0.05 ], dtype=float32)
"""
if self.train_env is not None:
# BUG: MT10 has *slightly* different values in 'low' between tasks!
if (
isinstance(train_env.observation_space, spaces.Box)
and train_env.observation_space.shape[-1] == 39
):
train_env.observation_space = self.train_env.observation_space
self.model.set_env(train_env)
self.train_env = train_env
self.valid_env = valid_env
# Decide how many steps to train on.
total_timesteps = self.train_steps_per_task
# TODO: Get the max number of steps directly from the env, rather than from the
# setting's fields.
logger.info(f"Starting training, for a maximum of {total_timesteps} steps.")
# todo: Customize the parametrers of the model and/or of this "learn"
# method if needed.
self.model = self.model.learn(
# The total number of samples (env steps) to train on
total_timesteps=total_timesteps,
eval_env=valid_env,
callback=self.callback,
log_interval=self.log_interval,
tb_log_name=self.tb_log_name,
eval_freq=self.eval_freq,
n_eval_episodes=self.n_eval_episodes,
eval_log_path=self.eval_log_path,
# whether or not to reset the current timestep number (used in logging)
reset_num_timesteps=True,
)
def get_actions(
self, observations: ContinualRLSetting.Observations, action_space: spaces.Space
) -> ContinualRLSetting.Actions:
obs = observations.x
predictions = self.model.predict(obs)
action, _ = predictions
assert action in action_space, (observations, action, action_space)
return action
def get_search_space(self, setting: Setting) -> Mapping[str, Union[str, Dict]]:
"""Returns the search space to use for HPO in the given Setting.
Parameters
----------
setting : Setting
The Setting on which the run of HPO will take place.
Returns
-------
Mapping[str, Union[str, Dict]]
An orion-formatted search space dictionary, mapping from hyper-parameter
names (str) to their priors (str), or to nested dicts of the same form.
"""
return {
"algo_hparams": self.hparams.get_orion_space(),
}
def adapt_to_new_hparams(self, new_hparams: Dict[str, Any]) -> None:
"""Adapts the Method when it receives new Hyper-Parameters to try for a new run.
It is required that this method be implemented if you want to perform HPO sweeps
with Orion.
Parameters
----------
new_hparams : Dict[str, Any]
The new hyper-parameters being recommended by the HPO algorithm. These will
have the same structure as the search space.
"""
# Here we overwrite the corresponding attributes with the new suggested values
# leaving other fields unchanged.
# NOTE: These new hyper-paramers will be used in the next run in the sweep,
# since each call to `configure` will create a new Model.
self.hparams = self.hparams.replace(**new_hparams["algo_hparams"])
def setup_wandb(self, run: Run) -> None:
"""Called by the Setting when using Weights & Biases, after `wandb.init`.
This method is here to provide Methods with the opportunity to log some of their
configuration options or hyper-parameters to wandb.
NOTE: The Setting has already set the `"setting"` entry in the `wandb.config` by
this point.
Parameters
----------
run : wandb.Run
Current wandb Run.
"""
run.config["hparams"] = self.hparams.to_dict()
def on_task_switch(self, task_id: Optional[int]) -> None:
"""Called when switching tasks in a CL setting.
If task labels are available, `task_id` will correspond to the index of
the new task. Otherwise, if task labels aren't available, `task_id` will
be `None`.
todo: use this to customize how your method handles task transitions.
"""
if self.hparams.clear_buffers_between_tasks:
self.clear_buffers()
def clear_buffers(self):
"""Clears out the experience buffer of the Policy."""
# I think that's the right way to do it.. not sure.
# assert False, self.model.replay_buffer.pos
if self.model:
# TODO: These are really interesting methods!
# self.model.save_replay_buffer
# self.model.load_replay_buffer
self.model.replay_buffer.reset()
# We do this just to prevent errors when trying to decode the hparams class above, and
# also to silence the related warnings from simple-parsing's decoding.py module.
register_decoding_fn(Type[BasePolicy], lambda v: v)
register_decoding_fn(Callable, lambda v: v)
================================================
FILE: sequoia/methods/stable_baselines3_methods/base_test.py
================================================
from inspect import Parameter, Signature, getsourcefile, signature
from typing import ClassVar, Dict, Type
import pytest
from stable_baselines3.common.off_policy_algorithm import OffPolicyAlgorithm
from stable_baselines3.common.on_policy_algorithm import OnPolicyAlgorithm
from sequoia.common.config import Config
from sequoia.conftest import monsterkong_required
from sequoia.methods.method_test import MethodTests
from sequoia.settings.base import Results
from sequoia.settings.rl import DiscreteTaskAgnosticRLSetting, IncrementalRLSetting, RLSetting
from .base import BaseAlgorithm, StableBaselines3Method
# @pytest.mark.parametrize(
# "MethodType, AlgoType",
# [
# (OnPolicyMethod, OnPolicyAlgorithm),
# (OffPolicyMethod, OffPolicyAlgorithm),
# (A2CMethod, A2C),
# (DDPGMethod, DDPG),
# (PPOMethod, PPO),
# (DQNMethod, DQN),
# (TD3Method, TD3),
# (SACMethod, SAC),
# ],
# )
class StableBaselines3MethodTests(MethodTests):
Method: ClassVar[Type[StableBaselines3Method]] = StableBaselines3Method
Model: ClassVar[Type[BaseAlgorithm]]
SB3_Algo: ClassVar[Type[BaseAlgorithm]]
debug_kwargs: ClassVar[Dict] = {}
@pytest.mark.parametrize("clear_buffers", [False, True])
def test_clear_buffers_between_tasks(self, clear_buffers: bool, config: Config):
setting_kwargs = dict(
nb_tasks=2,
train_steps_per_task=1_000,
test_steps_per_task=1_000,
config=config,
)
setting_kwargs.update(self.setting_kwargs)
setting = DiscreteTaskAgnosticRLSetting(**setting_kwargs)
setting.setup()
assert setting.train_max_steps == 2_000
assert setting.test_max_steps == 2_000
method = self.Method(hparams=self.Model.HParams(clear_buffers_between_tasks=clear_buffers))
method.configure(setting)
method.fit(
train_env=setting.train_dataloader(),
valid_env=setting.val_dataloader(),
)
assert method.hparams.clear_buffers_between_tasks == clear_buffers
# TODO: Not clear how to check the length of the replay buffer!
length_before_task_switch = get_current_length_of_replay_buffer(method.model)
method.on_task_switch(task_id=1)
if clear_buffers:
assert get_current_length_of_replay_buffer(method.model) == 0
else:
assert get_current_length_of_replay_buffer(method.model) == length_before_task_switch
def test_hparams_have_same_defaults_as_in_sb3(
self,
):
hparams = self.Model.HParams()
AlgoType = [
cls for cls in self.Model.mro() if cls.__module__.startswith("stable_baselines3")
][0]
sig: Signature = signature(AlgoType.__init__)
for attr_name, value_in_hparams in hparams.to_dict().items():
params_names = list(sig.parameters.keys())
assert attr_name in params_names, f"Hparams has extra field {attr_name}"
algo_constructor_parameter = sig.parameters[attr_name]
sb3_default = algo_constructor_parameter.default
if sb3_default is Parameter.empty:
continue
if attr_name in "verbose":
continue # ignore the default value of the 'verbose' param which we change.
if (
attr_name == "train_freq"
and isinstance(sb3_default, tuple)
and len(sb3_default) == 2
):
# Convert the default of (1, "steps") to 1, since that's the format we use.
if sb3_default[1] == "step":
sb3_default = sb3_default[0]
if isinstance(value_in_hparams, list):
value_in_hparams = tuple(value_in_hparams)
assert value_in_hparams == sb3_default, (
f"{self.Method.__name__} in Sequoia has different default value for "
f"hyper-parameter '{attr_name}' than in SB3: \n"
f"\t{value_in_hparams} != {sb3_default}\n"
f"Path to sequoia implementation: {getsourcefile(self.Method)}\n"
f"Path to SB3 implementation: {getsourcefile(AlgoType)}\n"
)
@classmethod
@pytest.fixture
def method(cls, config: Config) -> StableBaselines3Method:
"""Fixture that returns the Method instance to use when testing/debugging."""
return cls.Method(**cls.debug_kwargs)
def validate_results(
self,
setting: RLSetting,
method: StableBaselines3Method,
results: RLSetting.Results,
) -> None:
assert results
assert results.objective
# TODO: Set some 'reasonable' bounds on the performance here, depending on the
# setting/dataset.
def test_debug(self, method: StableBaselines3Method, setting: RLSetting, config: Config):
results: Results = setting.apply(method, config=config)
assert results.objective is not None
print(results.summary())
self.validate_results(setting=setting, method=method, results=results)
class DiscreteActionSpaceMethodTests(StableBaselines3MethodTests):
debug_kwargs: ClassVar[Dict] = {}
expected_debug_mean_episode_reward: ClassVar[float] = 135
setting_kwargs: ClassVar[str] = {"dataset": "CartPole-v0"}
@pytest.mark.timeout(120)
@monsterkong_required
def test_monsterkong(self):
method = self.Method(**self.debug_kwargs)
setting = IncrementalRLSetting(
dataset="monsterkong",
nb_tasks=2,
train_steps_per_task=1_000,
test_steps_per_task=1_000,
)
results: IncrementalRLSetting.Results = setting.apply(method, config=Config(debug=True))
print(results.summary())
from functools import singledispatch
from stable_baselines3.common.buffers import RolloutBuffer
@singledispatch
def get_current_length_of_replay_buffer(algo: BaseAlgorithm) -> int:
"""Returns the current length of the replay buffer of the given Algorithm."""
raise NotImplementedError(algo)
@get_current_length_of_replay_buffer.register
def _(algo: OffPolicyAlgorithm):
return algo.replay_buffer.pos
@get_current_length_of_replay_buffer.register
def _(algo: OnPolicyAlgorithm):
rollout_buffer: RolloutBuffer
return algo.rollout_buffer.pos
class ContinuousActionSpaceMethodTests(StableBaselines3MethodTests):
setting_kwargs: ClassVar[str] = {"dataset": "MountainCarContinuous-v0"}
================================================
FILE: sequoia/methods/stable_baselines3_methods/ddpg.py
================================================
""" Method that uses the DDPG model from stable-baselines3 and targets the RL
settings in the tree.
"""
from dataclasses import dataclass
from typing import Callable, ClassVar, Optional, Type, Union
import gym
from gym import spaces
from simple_parsing import mutable_field
from stable_baselines3.common.off_policy_algorithm import TrainFreq
from stable_baselines3.ddpg import DDPG
from sequoia.common.hparams import log_uniform
from sequoia.methods import register_method
from sequoia.settings.rl import ContinualRLSetting
from sequoia.utils.logging_utils import get_logger
from .off_policy_method import OffPolicyMethod, OffPolicyModel
logger = get_logger(__name__)
class DDPGModel(DDPG, OffPolicyModel):
"""Customized version of the DDPG model from stable-baselines-3."""
@dataclass
class HParams(OffPolicyModel.HParams):
"""Hyper-parameters of the DDPG Model."""
# TODO: Add hparams specific to DDPG here.
# The learning rate, it can be a function of the current progress (from
# 1 to 0)
learning_rate: Union[float, Callable] = log_uniform(1e-6, 1e-2, default=1e-3)
# The verbosity level: 0 none, 1 training information, 2 debug
verbose: int = 0
train_freq: TrainFreq = TrainFreq(frequency=1, unit="episode")
# Minibatch size for each gradient update
batch_size: int = 100
# How many gradient steps to do after each rollout (see ``train_freq``
# and ``n_episodes_rollout``) Set to ``-1`` means to do as many gradient
# steps as steps done in the environment during the rollout.
gradient_steps: int = -1
# gradient_steps: int = categorical(1, -1, default=-1)
@register_method
@dataclass
class DDPGMethod(OffPolicyMethod):
"""Method that uses the DDPG model from stable-baselines3."""
Model: ClassVar[Type[DDPGModel]] = DDPGModel
# Hyper-parameters of the DDPG model.
hparams: DDPGModel.HParams = mutable_field(DDPGModel.HParams)
# Approximate limit on the size of the replay buffer, in megabytes.
max_buffer_size_megabytes: float = 2_048.0
def configure(self, setting: ContinualRLSetting):
super().configure(setting)
def create_model(self, train_env: gym.Env, valid_env: gym.Env) -> DDPGModel:
return self.Model(env=train_env, **self.hparams.to_dict())
def fit(self, train_env: gym.Env, valid_env: gym.Env):
super().fit(train_env=train_env, valid_env=valid_env)
def get_actions(
self, observations: ContinualRLSetting.Observations, action_space: spaces.Space
) -> ContinualRLSetting.Actions:
return super().get_actions(
observations=observations,
action_space=action_space,
)
def on_task_switch(self, task_id: Optional[int]) -> None:
"""Called when switching tasks in a CL setting.
If task labels are available, `task_id` will correspond to the index of
the new task. Otherwise, if task labels aren't available, `task_id` will
be `None`.
todo: use this to customize how your method handles task transitions.
"""
super().on_task_switch(task_id=task_id)
if __name__ == "__main__":
results = DDPGMethod.main()
print(results)
================================================
FILE: sequoia/methods/stable_baselines3_methods/ddpg_test.py
================================================
from typing import ClassVar, Type
import pytest
from .base import BaseAlgorithm, StableBaselines3Method
from .base_test import ContinuousActionSpaceMethodTests
from .ddpg import DDPGMethod, DDPGModel
@pytest.mark.timeout(60)
class TestDDPG(ContinuousActionSpaceMethodTests):
Method: ClassVar[Type[StableBaselines3Method]] = DDPGMethod
Model: ClassVar[Type[BaseAlgorithm]] = DDPGModel
================================================
FILE: sequoia/methods/stable_baselines3_methods/dqn.py
================================================
""" Method that uses the DQN model from stable-baselines3 and targets the RL
settings in the tree.
"""
from dataclasses import dataclass
from typing import Callable, ClassVar, Optional, Type, Union
import gym
from gym import spaces
from simple_parsing import mutable_field
from simple_parsing.helpers.hparams import log_uniform, uniform
from stable_baselines3.dqn import DQN
from sequoia.common.hparams import categorical
from sequoia.common.transforms import ChannelsFirst
from sequoia.methods import register_method
from sequoia.settings.rl import ContinualRLSetting
from sequoia.utils.logging_utils import get_logger
from .off_policy_method import OffPolicyMethod, OffPolicyModel
logger = get_logger(__name__)
class DQNModel(DQN, OffPolicyModel):
"""Customized version of the DQN model from stable-baselines-3."""
@dataclass
class HParams(OffPolicyModel.HParams):
"""Hyper-parameters of the DQN model from `stable_baselines3`.
The command-line arguments for these are created with simple-parsing.
"""
# ------------------
# overwritten hparams
# The learning rate, it can be a function of the current progress (from
# 1 to 0)
learning_rate: Union[float, Callable] = log_uniform(1e-6, 1e-2, default=1e-4)
# size of the replay buffer
buffer_size: int = uniform(100_000, 10_000_000, default=1_000_000)
# --------------------
# How many steps of the model to collect transitions for before learning
# starts.
learning_starts: int = 50_000
# Minibatch size for each gradient update
batch_size: int = 32
# Update the model every ``train_freq`` steps. Set to `-1` to disable.
train_freq: int = 4
# train_freq: int = categorical(1, 10, 100, 1_000, 10_000, default=4)
# The soft update coefficient ("Polyak update", between 0 and 1) default
# 1 for hard update
tau: float = 1.0
# tau: float = uniform(0., 1., default=1.0)
# Update the target network every ``target_update_interval`` environment
# steps.
target_update_interval: int = categorical(1, 10, 100, 1_000, 10_000, default=10_000)
# Fraction of entire training period over which the exploration rate is
# reduced.
exploration_fraction: float = 0.1
# exploration_fraction: float = uniform(0.05, 0.3, default=0.1)
# Initial value of random action probability.
exploration_initial_eps: float = 1.0
# exploration_initial_eps: float = uniform(0.5, 1.0, default=1.0)
# final value of random action probability.
exploration_final_eps: float = 0.05
# exploration_final_eps: float = uniform(0, 0.1, default=0.05)
# The maximum value for the gradient clipping.
max_grad_norm: float = 10
# max_grad_norm: float = uniform(1, 100, default=10)
def train(self, gradient_steps: int, batch_size: int = 100) -> None:
super().train(gradient_steps, batch_size=batch_size)
@register_method
@dataclass
class DQNMethod(OffPolicyMethod):
"""Method that uses a DQN model from the stable-baselines3 package."""
Model: ClassVar[Type[DQNModel]] = DQNModel
# Hyper-parameters of the DQN model.
hparams: DQNModel.HParams = mutable_field(DQNModel.HParams)
# Approximate limit on the size of the replay buffer, in megabytes.
max_buffer_size_megabytes: float = 1_024 * 10.0
def configure(self, setting: ContinualRLSetting):
super().configure(setting)
# NOTE: Need to change some attributes depending on the maximal number of steps
# in the environment allowed in the given Setting.
if setting.steps_per_phase:
ten_percent_of_step_budget = setting.steps_per_phase // 10
if self.hparams.target_update_interval > ten_percent_of_step_budget:
# Same for the 'update target network' interval.
self.hparams.target_update_interval = ten_percent_of_step_budget // 2
logger.info(
f"Reducing the target network update interval to "
f"{self.hparams.target_update_interval}, because of the limit on "
f"training steps imposed by the Setting."
)
def create_model(self, train_env: gym.Env, valid_env: gym.Env) -> DQNModel:
return self.Model(env=train_env, **self.hparams.to_dict())
def fit(self, train_env: gym.Env, valid_env: gym.Env):
super().fit(train_env=train_env, valid_env=valid_env)
def get_actions(
self, observations: ContinualRLSetting.Observations, action_space: spaces.Space
) -> ContinualRLSetting.Actions:
obs = observations.x
# Temp fix for monsterkong and DQN:
if obs.shape == (64, 64, 3):
obs = ChannelsFirst.apply(obs)
predictions = self.model.predict(obs)
action, _ = predictions
assert action in action_space, (observations, action, action_space)
return action
def on_task_switch(self, task_id: Optional[int]) -> None:
"""Called when switching tasks in a CL setting.
If task labels are available, `task_id` will correspond to the index of
the new task. Otherwise, if task labels aren't available, `task_id` will
be `None`.
todo: use this to customize how your method handles task transitions.
"""
super().on_task_switch(task_id=task_id)
if __name__ == "__main__":
results = DQNMethod.main()
print(results)
================================================
FILE: sequoia/methods/stable_baselines3_methods/dqn_test.py
================================================
from typing import ClassVar, Dict, Type
import numpy as np
import pytest
from gym import spaces
from sequoia.common.config import Config
from sequoia.common.spaces import Image
from sequoia.settings.rl import IncrementalRLSetting
from .base import BaseAlgorithm, StableBaselines3Method
from .base_test import DiscreteActionSpaceMethodTests
from .dqn import DQNMethod, DQNModel
from .off_policy_method_test import OffPolicyMethodTests
class TestDQN(DiscreteActionSpaceMethodTests, OffPolicyMethodTests):
Method: ClassVar[Type[StableBaselines3Method]] = DQNMethod
Model: ClassVar[Type[BaseAlgorithm]] = DQNModel
debug_kwargs: ClassVar[Dict] = {}
# TODO: Maybe this is because of the buffer isn't filled up enough with the short
# number of allowed steps?
@pytest.mark.xfail(reason="DQN really sucks on cartpole?")
def test_classic_control_state(self, config: Config):
super().test_classic_control_state(config=config)
@pytest.mark.xfail(reason="DQN really sucks on cartpole?")
def test_incremental_classic_control_state(self, config: Config):
super().test_incremental_classic_control_state(config=config)
def test_dqn_monsterkong_adds_channel_first_transform(self):
method = self.Method(**self.debug_kwargs)
setting = IncrementalRLSetting(
dataset="monsterkong",
nb_tasks=2,
train_steps_per_task=1_000,
test_steps_per_task=1_000,
)
assert setting.train_max_steps == 2_000
assert setting.test_max_steps == 2_000
assert setting.nb_tasks == 2
assert setting.observation_space.x == Image(0, 255, shape=(64, 64, 3), dtype=np.uint8)
assert setting.observation_space.task_labels.n == 2
# assert setting.observation_space == TypedDictSpace(
# spaces={
# "x": Image(0, 255, shape=(64, 64, 3), dtype=np.uint8),
# "task_labels": Sparse(spaces.Discrete(2), sparsity=0.5),
# "done": Sparse(spaces.Box(False, True, (), dtype=np.bool), sparsity=1),
# },
# dtype=setting.Observations,
# )
assert setting.observation_space.dtype is setting.Observations
assert setting.action_space == spaces.Discrete(6) # monsterkong has 6 actions.
# (Before the method gets to change the Setting):
# By default the setting gives the same shape of obs as the underlying env.
for env_method in [
setting.train_dataloader,
setting.val_dataloader,
setting.test_dataloader,
]:
print(f"Testing method {env_method.__name__}")
with env_method() as env:
reset_obs = env.reset()
# TODO: Fix this so the 'x' space actually gets tensor support.
# assert reset_obs in env.observation_space
assert reset_obs.numpy() in env.observation_space
assert reset_obs.x.shape == (64, 64, 3)
# Let the Method configure itself on the Setting:
method.configure(setting)
# (After the method gets to change the Setting):
for env_method in [
setting.train_dataloader,
setting.val_dataloader,
setting.test_dataloader,
]:
with env_method() as env:
reset_obs = env.reset()
# Fix this numpy bug.
assert reset_obs.numpy() in env.observation_space
assert reset_obs.x.shape == (64, 64, 3)
================================================
FILE: sequoia/methods/stable_baselines3_methods/off_policy_method.py
================================================
""" Base class used to not duplicate the tweaks made all the off-policy algos from SB3.
"""
import math
import warnings
from abc import ABC
from dataclasses import dataclass
from typing import Any, Callable, ClassVar, Optional, Type, Union
import gym
from gym import spaces
from gym.spaces.utils import flatten_space
from simple_parsing import mutable_field
from simple_parsing.helpers.serialization import register_decoding_fn
from stable_baselines3.common.off_policy_algorithm import OffPolicyAlgorithm, TrainFreq
from sequoia.common.hparams import log_uniform, uniform
from sequoia.settings.rl import ContinualRLSetting
from sequoia.utils.logging_utils import get_logger
from .base import SB3BaseHParams, StableBaselines3Method
logger = get_logger(__name__)
def decode_trainfreq(v: Any):
if isinstance(v, list) and len(v) == 2:
return TrainFreq(v[0], v[1])
return v
register_decoding_fn(TrainFreq, decode_trainfreq)
class OffPolicyModel(OffPolicyAlgorithm, ABC):
"""Tweaked version of the OffPolicyAlgorithm from SB3."""
@dataclass
class HParams(SB3BaseHParams):
"""Hyper-parameters common to all off-policy algos from SB3."""
# The learning rate, it can be a function of the current progress (from
# 1 to 0)
learning_rate: Union[float, Callable] = log_uniform(1e-6, 1e-2, default=1e-4)
# size of the replay buffer
buffer_size: int = uniform(100, 10_000_000, default=1_000_000)
# How many steps of the model to collect transitions for before learning
# starts.
learning_starts: int = 100
# Minibatch size for each gradient update
batch_size: int = 256
# batch_size: int = categorical(1, 2, 4, 8, 16, 32, 128, default=32)
# The soft update coefficient ("Polyak update", between 0 and 1) default
# 1 for hard update
tau: float = 0.005
# tau: float = uniform(0., 1., default=1.0)
# The discount factor
gamma: float = 0.99
# gamma: float = uniform(0.9, 0.9999, default=0.99)
# Update the model every ``train_freq`` steps. Set to `-1` to disable.
train_freq: int = 1
# train_freq: int = categorical(1, 10, 100, 1_000, 10_000, default=10)
# How many gradient steps to do after each rollout (see ``train_freq``
# and ``n_episodes_rollout``) Set to ``-1`` means to do as many gradient
# steps as steps done in the environment during the rollout.
gradient_steps: int = 1
# gradient_steps: int = categorical(1, -1, default=1)
# Enable a memory efficient variant of the replay buffer at a cost of
# more complexity.
# See https://github.com/DLR-RM/stable-baselines3/issues/37#issuecomment-637501195
optimize_memory_usage: bool = False
# Whether to create a second environment that will be used for
# evaluating the agent periodically. (Only available when passing string
# for the environment)
create_eval_env: bool = False
# The verbosity level: 0 no output, 1 info, 2 debug
verbose: int = 1
@dataclass
class OffPolicyMethod(StableBaselines3Method, ABC):
"""ABC for a Method that uses an off-policy Algorithm from SB3."""
# Type of model to use. This has to be overwritten in a subclass.
Model: ClassVar[Type[OffPolicyModel]] = OffPolicyModel
# Hyper-parameters of the DDPG model.
hparams: OffPolicyModel.HParams = mutable_field(OffPolicyModel.HParams)
# Approximate limit on the size of the replay buffer, in megabytes.
max_buffer_size_megabytes: float = 2_048.0
def __post_init__(self):
super().__post_init__()
self.model: OffPolicyAlgorithm
def configure(self, setting: ContinualRLSetting):
super().configure(setting)
# The default value for the buffer size in the DQN model is WAY too
# large, so we re-size it depending on the size of the observations.
# NOTE: (issue #156) Only consider the images, not the task labels for these
# buffer size calculations (since the task labels might be None and have the
# np.object dtype).
x_space = setting.observation_space.x
flattened_observation_space = flatten_space(x_space)
observation_size_bytes = flattened_observation_space.sample().nbytes
# IF there are more than a few dimensions per observation, then we
# should probably reduce the size of the replay buffer according to
# the size of the observations.
max_buffer_size_bytes = self.max_buffer_size_megabytes * 1024 * 1024
max_buffer_length = max_buffer_size_bytes // observation_size_bytes
if max_buffer_length == 0:
raise RuntimeError(
f"Couldn't even fit a single observation in the buffer, "
f"given the specified max_buffer_size_megabytes "
f"({self.max_buffer_size_megabytes}) and the size of a "
f"single observation ({observation_size_bytes} bytes)!"
)
if self.hparams.buffer_size > max_buffer_length:
calculated_size_bytes = observation_size_bytes * self.hparams.buffer_size
calculated_size_gb = calculated_size_bytes / 1024**3
warnings.warn(
RuntimeWarning(
f"The selected buffer size ({self.hparams.buffer_size} is "
f"too large! (It would take roughly around "
f"{calculated_size_gb:.3f}Gb to hold many observations alone! "
f"The buffer size will be capped at {max_buffer_length} "
f"entries."
)
)
self.hparams.buffer_size = int(max_buffer_length)
# NOTE: Need to change some attributes depending on the maximal number of steps
# in the environment allowed in the given Setting.
if setting.train_max_steps:
logger.info(
f"Total training steps are limited to {setting.train_steps_per_task} "
f"steps per task, {setting.train_max_steps} steps in total."
)
ten_percent_of_step_budget = setting.steps_per_phase // 10
if self.hparams.buffer_size > ten_percent_of_step_budget:
warnings.warn(
RuntimeWarning("Reducing max buffer size to ten percent of the step budget.")
)
self.hparams.buffer_size = ten_percent_of_step_budget
if self.hparams.learning_starts > ten_percent_of_step_budget:
logger.info(
f"The model was originally going to use the first "
f"{self.hparams.learning_starts} steps for pure random "
f"exploration, but the setting has a max number of steps set to "
f"{setting.train_max_steps}, therefore we will limit the number of "
f"exploration steps to 10% of that 'step budget' = "
f"{ten_percent_of_step_budget} steps."
)
self.hparams.learning_starts = ten_percent_of_step_budget
if self.hparams.train_freq != -1 and isinstance(self.hparams.train_freq, int):
# Update the model at least 2 times during each task, and at most
# once per step.
self.hparams.train_freq = min(
self.hparams.train_freq,
int(0.5 * ten_percent_of_step_budget),
)
self.hparams.train_freq = max(self.hparams.train_freq, 1)
logger.info(f"Training frequency: {self.hparams.train_freq}")
logger.info(f"Will use a Replay buffer of size {self.hparams.buffer_size}.")
if setting.steps_per_phase:
if not isinstance(self.hparams.train_freq, int):
if self.hparams.train_freq[1] == "step":
self.hparams.train_freq = self.hparams.train_freq[0]
else:
assert self.hparams.train_freq[1] == "episode"
# Use some value based of the maximum episode length if available,
# else use a "reasonable" default value.
# TODO: Double-check that this makes sense.
if setting.max_episode_steps:
self.hparams.train_freq = setting.max_episode_steps
else:
self.hparams.train_freq = 10
warnings.warn(
RuntimeWarning(
f"Need the training frequency units to be steps for now! "
f"(Train freq has been changed to every "
f"{self.hparams.train_freq} steps)."
)
)
# NOTE: We limit the number of training steps per task, such that we never
# attempt to fill the buffer using more samples than the environment allows.
if self.hparams.train_freq > setting.steps_per_phase:
self.hparams.n_steps = math.ceil(0.1 * setting.steps_per_phase)
logger.info(
f"Capping the n_steps to 10% of step budget length: " f"{self.hparams.n_steps}"
)
self.train_steps_per_task = min(
self.train_steps_per_task,
setting.steps_per_phase - self.hparams.train_freq - 1,
)
logger.info(f"Limitting training steps per task to {self.train_steps_per_task}")
def create_model(self, train_env: gym.Env, valid_env: gym.Env) -> OffPolicyModel:
return self.Model(env=train_env, **self.hparams.to_dict())
def fit(self, train_env: gym.Env, valid_env: gym.Env):
super().fit(train_env=train_env, valid_env=valid_env)
def get_actions(
self, observations: ContinualRLSetting.Observations, action_space: spaces.Space
) -> ContinualRLSetting.Actions:
return super().get_actions(
observations=observations,
action_space=action_space,
)
def on_task_switch(self, task_id: Optional[int]) -> None:
"""Called when switching tasks in a CL setting.
If task labels are available, `task_id` will correspond to the index of
the new task. Otherwise, if task labels aren't available, `task_id` will
be `None`.
todo: use this to customize how your method handles task transitions.
"""
super().on_task_switch(task_id=task_id)
def clear_buffers(self):
"""Clears out the experience buffer of the Policy."""
# I think that's the right way to do it.. not sure.
if self.model:
# TODO: These are really interesting methods!
# self.model.save_replay_buffer
# self.model.load_replay_buffer
self.model.replay_buffer.reset()
================================================
FILE: sequoia/methods/stable_baselines3_methods/off_policy_method_test.py
================================================
from typing import ClassVar, Dict, Type
from .off_policy_method import OffPolicyAlgorithm, OffPolicyMethod
class OffPolicyMethodTests:
Method: ClassVar[Type[OffPolicyMethod]]
Model: ClassVar[Type[OffPolicyAlgorithm]]
debug_dataset: ClassVar[str]
debug_kwargs: ClassVar[Dict] = {}
================================================
FILE: sequoia/methods/stable_baselines3_methods/on_policy_method.py
================================================
""" Base class used to not duplicate the tweaks made all the on-policy algos from SB3.
"""
import math
import warnings
from abc import ABC
from dataclasses import dataclass
from typing import Callable, ClassVar, Dict, Mapping, Optional, Type, Union
import gym
import torch
from gym import spaces
from simple_parsing import mutable_field
from stable_baselines3.common.on_policy_algorithm import OnPolicyAlgorithm
from sequoia.common.hparams import log_uniform, uniform
from sequoia.settings.rl import ContinualRLSetting
from sequoia.utils.logging_utils import get_logger
from .base import SB3BaseHParams, StableBaselines3Method
logger = get_logger(__name__)
class OnPolicyModel(OnPolicyAlgorithm, ABC):
"""Tweaked version of the OnPolicyAlgorithm from SB3."""
@dataclass
class HParams(SB3BaseHParams):
"""Hyper-parameters common to all on-policy algos from SB3."""
# learning rate for the optimizer, it can be a function of the current
# progress remaining (from 1 to 0)
learning_rate: Union[float, Callable] = log_uniform(1e-7, 1e-2, default=1e-3)
# The number of steps to run for each environment per update (i.e. batch size
# is n_steps * n_env where n_env is number of environment copies running in
# parallel)
# NOTE: Default value here is much lower than in PPO, which might indicate
# that this A2C is more "on-policy"? (i.e. that it requires data to be super
# "fresh")?
n_steps: int = uniform(3, 64, default=5, discrete=True)
# Discount factor
gamma: float = 0.99
# gamma: float = uniform(0.9, 0.9999, default=0.99)
# Factor for trade-off of bias vs variance for Generalized Advantage Estimator.
# Equivalent to classic advantage when set to 1.
gae_lambda: float = 1.0
# gae_lambda: float = uniform(0.5, 1.0, default=1.0)
# Entropy coefficient for the loss calculation
ent_coef: float = 0.0
# ent_coef: float = uniform(0.0, 1.0, default=0.0)
# Value function coefficient for the loss calculation
vf_coef: float = 0.5
# vf_coef: float = uniform(0.01, 1.0, default=0.5)
# The maximum value for the gradient clipping
max_grad_norm: float = 0.5
# max_grad_norm: float = uniform(0.1, 10, default=0.5)
# Whether to use generalized State Dependent Exploration (gSDE) instead of
# action noise exploration (default: False)
use_sde: bool = False
# use_sde: bool = categorical(True, False, default=False)
# Sample a new noise matrix every n steps when using gSDE.
# Default: -1 (only sample at the beginning of the rollout)
sde_sample_freq: int = -1
# sde_sample_freq: int = categorical(-1, 1, 5, 10, default=-1)
# The log location for tensorboard (if None, no logging)
tensorboard_log: Optional[str] = None
# # Whether to create a second environment that will be used for evaluating the
# # agent periodically. (Only available when passing string for the environment)
# create_eval_env: bool = False
# # Additional arguments to be passed to the policy on creation
# policy_kwargs: Optional[Dict[str, Any]] = None
# The verbosity level: 0 no output, 1 info, 2 debug
verbose: int = 1
# Seed for the pseudo random generators
seed: Optional[int] = None
# Device (cpu, cuda, ...) on which the code should be run.
# Setting it to auto, the code will be run on the GPU if possible.
device: Union[torch.device, str] = "auto"
# :param _init_setup_model: Whether or not to build the network at the
# creation of the instance
# _init_setup_model: bool = True
@dataclass
class OnPolicyMethod(StableBaselines3Method, ABC):
"""Method that uses the A2C model from stable-baselines3."""
Model: ClassVar[Type[OnPolicyModel]] = OnPolicyModel
# Hyper-parameters of the model/algorithm.
hparams: OnPolicyModel.HParams = mutable_field(OnPolicyModel.HParams)
def configure(self, setting: ContinualRLSetting):
super().configure(setting=setting)
if setting.steps_per_phase:
min_model_updates = 20
if self.hparams.n_steps > setting.steps_per_phase // min_model_updates:
# Set the number of steps per update so that there are *at least*
# `min_model_updates` model updates during a single `fit` call.
new_n_steps = math.ceil(setting.steps_per_phase / min_model_updates)
warnings.warn(
RuntimeWarning(
f"Capping the number of steps per update to {new_n_steps}, in "
f"order to update the model at least {min_model_updates} "
f"times per phase (call to `fit`)."
)
)
assert new_n_steps > 1
self.hparams.n_steps = new_n_steps
# NOTE: We limit the number of trainign steps per task, such that we never
# attempt to fill the buffer using more samples than the environment allows.
self.train_steps_per_task = min(
self.train_steps_per_task,
setting.steps_per_phase - self.hparams.n_steps - 1,
)
logger.info(f"Limitting training steps per task to {self.train_steps_per_task}")
def create_model(self, train_env: gym.Env, valid_env: gym.Env) -> OnPolicyModel:
logger.info("Creating model with hparams: \n" + self.hparams.dumps_json(indent="\t"))
return self.Model(env=train_env, **self.hparams.to_dict())
def fit(self, train_env: gym.Env, valid_env: gym.Env):
super().fit(train_env=train_env, valid_env=valid_env)
def get_actions(
self, observations: ContinualRLSetting.Observations, action_space: spaces.Space
) -> ContinualRLSetting.Actions:
return super().get_actions(
observations=observations,
action_space=action_space,
)
def on_task_switch(self, task_id: Optional[int]) -> None:
"""Called when switching tasks in a CL setting.
If task labels are available, `task_id` will correspond to the index of
the new task. Otherwise, if task labels aren't available, `task_id` will
be `None`.
todo: use this to customize how your method handles task transitions.
"""
super().on_task_switch(task_id=task_id)
def clear_buffers(self):
"""Clears out the experience buffer of the Policy."""
# I think that's the right way to do it.. not sure.
if self.model:
# TODO: These are really interesting methods!
# self.model.save_replay_buffer
# self.model.load_replay_buffer
self.model.rollout_buffer.reset()
def get_search_space(self, setting: ContinualRLSetting) -> Mapping[str, Union[str, Dict]]:
search_space = super().get_search_space(setting)
if isinstance(setting.action_space, spaces.Discrete):
# From stable_baselines3/common/base_class.py", line 170:
# > Generalized State-Dependent Exploration (gSDE) can only be used with
# continuous actions
# Therefore we remove related entries in the search space, so they keep
# their default values.
search_space.pop("use_sde", None)
search_space.pop("sde_sample_freq", None)
return search_space
================================================
FILE: sequoia/methods/stable_baselines3_methods/policy_wrapper.py
================================================
from abc import ABC, abstractmethod
from functools import wraps
from typing import ClassVar, Dict, Generic, Optional, Type, TypeVar, Union
from stable_baselines3.a2c import A2C
from stable_baselines3.a2c.policies import ActorCriticPolicy
from stable_baselines3.common.base_class import BaseAlgorithm
from stable_baselines3.common.policies import BasePolicy
from torch import Tensor
from sequoia.utils import get_logger
logger = get_logger(__name__)
T = TypeVar("T")
Policy = TypeVar("Policy", bound=BasePolicy)
SB3Algo = TypeVar("SB3Algo", bound=BaseAlgorithm)
Wrapper = TypeVar("Wrapper", bound="PolicyWrapper")
class PolicyWrapper(BasePolicy, ABC, Generic[Policy]):
"""Base class for 'wrappers' to be applied to policies from SB3.
This adds "hooks" into the `step()` and `zero_grad()` method of the Policy's
optimizer.
NOTE: Hasn't been worked on in a while, would not recommend using this unless you're
very familiar with SB3 source code and there is no other way of doing what you want.
"""
# Dictionary that stores the types of policies that have been 'wrapped' with
# this mixin.
_wrapped_classes: ClassVar[Dict[Type[T], Type[Union[T, "PolicyWrapper"]]]] = {}
def __init__(self, *args, _already_initialized: bool = False, **kwargs):
# When calling `EWCMixin.__init__(existing_policy)`, we don't want
# to actually call the policy's __init__.
if not _already_initialized:
super().__init__(*args, **kwargs)
@abstractmethod
def get_loss(self: Policy) -> Union[float, Tensor]:
"""This will get called before the call to `policy.optimizer.step()`
from within the `train` method of the algos from stable-baselines3.
You can use this to return some kind of loss tensor to use.
"""
def before_optimizer_step(self: Policy):
"""Called before executing `self.policy.optimizer.step()` in the training
loop of the SB3 algos.
"""
def after_zero_grad(self: Policy):
"""Called after `self.policy.optimizer.zero_grad()` in the training
loop of the SB3 algos.
"""
# Backpropagate the loss here, by default, so that any grad clipping
# also affects the grads of the loss, for instance.
wrapper_loss = self.get_loss()
logger.debug(f"{type(self).__name__} loss: {wrapper_loss}")
if isinstance(wrapper_loss, Tensor) and wrapper_loss.requires_grad:
wrapper_loss.backward(retain_graph=True)
@classmethod
def wrap_policy(
cls: Type[Wrapper], policy: Policy, **mixin_init_kwargs
) -> Union[Policy, Wrapper]:
"""IDEA: "Wrap" a Policy, so that every time its optimizer's `step()`
method gets called, it actually first backpropagates an EWC loss.
Parameters
----------
policy : Policy
[description]
Returns
-------
Union[Policy, EWCMixin]
[description]
"""
assert isinstance(policy, BasePolicy)
if not isinstance(policy, cls):
# Dynamically change the class of this single instance to be a subclass
# of its current class, with the addition of the EWCMixin base class.
policy.__class__ = cls.wrap_policy_class(type(policy))
# 'initialize' the existing object for this mixin type.
cls.__init__(policy, _already_initialized=True, **mixin_init_kwargs)
assert isinstance(policy, cls)
optimizer = policy.optimizer or policy.optimizer_class
if optimizer is None:
raise NotImplementedError("Need to have an optimizer instance atm")
# 'Replace' the `policy.optimizer.step` with a function that might first
# backpropagates the loss.
_step = optimizer.step
# NOTE: Setting the policy's `optimizer` attribute to a new value will
# will actually break this.
@wraps(optimizer.step)
def new_optimizer_step(*args, **kwargs):
policy.before_optimizer_step()
return _step(*args, **kwargs)
optimizer.step = new_optimizer_step
_zero_grad = optimizer.zero_grad
@wraps(optimizer.zero_grad)
def new_zero_grad(*args, **kwargs):
_zero_grad(*args, **kwargs)
policy.after_zero_grad()
optimizer.zero_grad = new_zero_grad
return policy
@classmethod
def wrap_policy_class(
cls: Type[Wrapper], policy_type: Type[Policy]
) -> Type[Union[Policy, Wrapper]]:
"""Add the wrapper as a base class to a policy type from SB3."""
assert issubclass(policy_type, BasePolicy)
if issubclass(policy_type, cls):
# It already has the mixin, so return the class unchanged.
return policy_type
# Save the results so we don't create two wrappers for the same class.
if policy_type in cls._wrapped_classes:
return cls._wrapped_classes[policy_type]
class WrappedPolicy(policy_type, cls): # type: ignore
pass
WrappedPolicy.__name__ = policy_type.__name__ + "With" + cls.__name__
cls._wrapped_classes[policy_type] = WrappedPolicy
return WrappedPolicy
@classmethod
def wrap_algorithm(cls: Type[Wrapper], algo: SB3Algo, **wrapper_kwargs) -> SB3Algo:
"""Wrap an existing algorithm's policy using this wrapper."""
assert isinstance(algo, BaseAlgorithm)
if not isinstance(algo.policy, cls):
# Dynamically change the class of this single instance to be a subclass
# of its current class, with the addition of the EWCMixin base class.
if algo.policy is None:
# We want to wrap the _setup_model so the policy gets wrapped.
# raise NotImplementedError("TODO")
_original_setup_model = algo._setup_model
@wraps(algo._setup_model)
def _wrapped_setup_model(*args, **kwargs) -> None:
_original_setup_model(*args, **kwargs)
assert isinstance(algo.policy, BasePolicy)
algo.policy = cls.wrap_policy(algo.policy, **wrapper_kwargs)
algo._setup_model = _wrapped_setup_model
else:
algo.policy = cls.wrap_policy(algo.policy, **wrapper_kwargs)
return algo
@classmethod
def wrap_algorithm_class(
cls: Type[Wrapper], algo_type: Type[SB3Algo]
) -> Type[Union[SB3Algo, Wrapper]]:
"""Same idea, but wraps a class of algorithm, so that its policies are
wrapped with this mixin.
"""
if algo_type in cls._wrapped_classes:
return cls._wrapped_classes[algo_type]
class WrappedAlgo(algo_type): # type: ignore
def __init__(self, *args, **kwargs):
# IDEA Extract the arguments that could be used for the wrapper?
super().__init__(*args, **kwargs)
self.policy: Union[BasePolicy, Wrapper]
def _setup_model(self):
super()._setup_model()
# TODO: Figure out a way of passing the kwargs to the policy?
# maybe using the 'policy_kwargs' argument to the constructor?
self.policy = cls.wrap_policy(self.policy)
# No need to change the train loop anymore!
# def train(self) -> None:
# return super().train()
# IDEA: Redirect any failing attribute lookups to the policy?
def __getattr__(self, attr: str):
try:
return super().__getattribute__(attr)
except AttributeError as e:
if hasattr(self.policy, attr):
return getattr(self.policy, attr)
raise e
# The above would remove the need for any of these:
# def on_task_switch(self, task_id: Optional[int]):
# self.policy.on_task_switch(task_id)
# def ewc_loss(self) -> Union[float, Tensor]:
# return self.policy.ewc_loss()
WrappedAlgo.__name__ = algo_type.__name__ + "With" + cls.__name__
cls._wrapped_classes[algo_type] = WrappedAlgo
return WrappedAlgo
from stable_baselines3 import A2C
# Either 'manually', like this:
class A2CWithEWC(A2C):
def __init__(self, *args, ewc_coefficient: float = 1.0, ewc_p_norm: int = 2, **kwargs):
self.ewc_coefficient = ewc_coefficient
self.ewc_p_norm = ewc_p_norm
super().__init__(*args, **kwargs)
self.policy: Union[ActorCriticPolicy, EWC]
def _setup_model(self):
super()._setup_model()
# Just to show that the policy was just wrapped.
self.policy = EWC._wrap_policy(
self.policy,
ewc_coefficient=self.ewc_coefficient,
ewc_p_norm=self.ewc_p_norm,
)
def on_task_switch(self, task_id: Optional[int]) -> None:
self.policy.on_task_switch(task_id)
## OR automatically, like this!
# A2CWithEWC = EWC._wrap_algorithm_class(A2C)
# DQNWithEWC = EWC._wrap_algorithm_class(DQN)
# PPOWithEWC = EWC._wrap_algorithm_class(PPO)
# DDPGWithEWC = EWC._wrap_algorithm_class(DDPG)
# SACWithEWC = EWC._wrap_algorithm_class(SAC)
================================================
FILE: sequoia/methods/stable_baselines3_methods/ppo.py
================================================
""" Method that uses the PPO model from stable-baselines3 and targets the RL
settings in the tree.
"""
from dataclasses import dataclass
from typing import ClassVar, Dict, Mapping, Optional, Type, Union
import gym
import torch
from gym import spaces
from simple_parsing import mutable_field
from stable_baselines3.ppo import PPO
from sequoia.common.hparams import log_uniform
from sequoia.methods import register_method
from sequoia.settings.rl import ContinualRLSetting
from sequoia.utils.logging_utils import get_logger
from .on_policy_method import OnPolicyMethod, OnPolicyModel
logger = get_logger(__name__)
class PPOModel(PPO, OnPolicyModel):
"""Proximal Policy Optimization algorithm (PPO) (clip version) - from SB3.
Paper: https://arxiv.org/abs/1707.06347
Code: The SB3 implementation borrows code from OpenAI Spinning Up
(https://github.com/openai/spinningup/)
https://github.com/ikostrikov/pytorch-a2c-ppo-acktr-gail and
and Stable Baselines (PPO2 from https://github.com/hill-a/stable-baselines)
Introduction to PPO: https://spinningup.openai.com/en/latest/algorithms/ppo.html
"""
@dataclass
class HParams(OnPolicyModel.HParams):
"""Hyper-parameters of the PPO Model."""
# # The policy model to use (MlpPolicy, CnnPolicy, ...)
# policy: Union[str, Type[ActorCriticPolicy]]
# # The environment to learn from (if registered in Gym, can be str)
# env: Union[GymEnv, str]
# The learning rate, it can be a function of the current progress remaining
# (from 1 to 0)
learning_rate: float = log_uniform(1e-6, 1e-2, default=3e-4)
# The number of steps to run for each environment per update (i.e. batch size
# is n_steps * n_env where n_env is number of environment copies running in
# parallel)
n_steps: int = log_uniform(32, 8192, default=2048, discrete=True)
# Minibatch size
batch_size: int = 64
# batch_size: Optional[int] = categorical(16, 32, 64, 128, default=64)
# Number of epoch when optimizing the surrogate loss
n_epochs: int = 10
# Discount factor
gamma: float = 0.99
# gamma: float = uniform(0.9, 0.9999, default=0.99)
# Factor for trade-off of bias vs variance for Generalized Advantage Estimator
gae_lambda: float = 0.95
# gae_lambda: float = uniform(0.8, 1.0, default=0.95)
# Clipping parameter, it can be a function of the current progress remaining
# (from 1 to 0).
clip_range: float = 0.2
# clip_range: float = uniform(0.05, 0.4, default=0.2)
# Clipping parameter for the value function, it can be a function of the current
# progress remaining (from 1 to 0). This is a parameter specific to the OpenAI
# implementation. If None is passed (default), no clipping will be done on the
# value function. IMPORTANT: this clipping depends on the reward scaling.
clip_range_vf: Optional[float] = None
# Entropy coefficient for the loss calculation
ent_coef: float = 0.0
# ent_coef: float = uniform(0., 1., default=0.0)
# Value function coefficient for the loss calculation
vf_coef: float = 0.5
# vf_coef: float = uniform(0.01, 1.0, default=0.5)
# The maximum value for the gradient clipping
max_grad_norm: float = 0.5
# max_grad_norm: float = uniform(0.1, 10, default=0.5)
# Whether to use generalized State Dependent Exploration (gSDE) instead of
# action noise exploration (default: False)
use_sde: bool = False
# use_sde: bool = categorical(True, False, default=False)
# Sample a new noise matrix every n steps when using gSDE Default: -1 (only
# sample at the beginning of the rollout)
sde_sample_freq: int = -1
# sde_sample_freq: int = categorical(-1, 1, 5, 10, default=-1)
# Limit the KL divergence between updates, because the clipping is not enough to
# prevent large update see issue #213
# (cf https://github.com/hill-a/stable-baselines/issues/213)
# By default, there is no limit on the kl div.
target_kl: Optional[float] = None
# the log location for tensorboard (if None, no logging)
tensorboard_log: Optional[str] = None
# # Whether to create a second environment that will be used for evaluating the
# # agent periodically. (Only available when passing string for the environment)
# create_eval_env: bool = False
# # Additional arguments to be passed to the policy on creation
# policy_kwargs: Optional[Dict[str, Any]] = None
# The verbosity level: 0 no output, 1 info, 2 debug
verbose: int = 1
# Seed for the pseudo random generators
seed: Optional[int] = None
# Device (cpu, cuda, ...) on which the code should be run. Setting it to auto,
# the code will be run on the GPU if possible.
device: Union[torch.device, str] = "auto"
# Whether or not to build the network at the creation of the instance
# _init_setup_model: bool = True
@register_method
@dataclass
class PPOMethod(OnPolicyMethod):
"""Method that uses the PPO model from stable-baselines3."""
Model: ClassVar[Type[PPOModel]] = PPOModel
# Hyper-parameters of the PPO Model.
hparams: PPOModel.HParams = mutable_field(PPOModel.HParams)
def configure(self, setting: ContinualRLSetting):
super().configure(setting=setting)
def create_model(self, train_env: gym.Env, valid_env: gym.Env) -> PPOModel:
logger.info("Creating model with hparams: \n" + self.hparams.dumps_json(indent="\t"))
return self.Model(env=train_env, **self.hparams.to_dict())
def fit(self, train_env: gym.Env, valid_env: gym.Env):
super().fit(train_env=train_env, valid_env=valid_env)
def get_actions(
self, observations: ContinualRLSetting.Observations, action_space: spaces.Space
) -> ContinualRLSetting.Actions:
return super().get_actions(
observations=observations,
action_space=action_space,
)
def on_task_switch(self, task_id: Optional[int]) -> None:
"""Called when switching tasks in a CL setting.
If task labels are available, `task_id` will correspond to the index of
the new task. Otherwise, if task labels aren't available, `task_id` will
be `None`.
todo: use this to customize how your method handles task transitions.
"""
super().on_task_switch(task_id=task_id)
def get_search_space(self, setting: ContinualRLSetting) -> Mapping[str, Union[str, Dict]]:
return super().get_search_space(setting)
if __name__ == "__main__":
results = PPOMethod.main()
print(results)
================================================
FILE: sequoia/methods/stable_baselines3_methods/ppo_test.py
================================================
from typing import ClassVar, Type
from .base import BaseAlgorithm, StableBaselines3Method
from .base_test import DiscreteActionSpaceMethodTests
from .ppo import PPOMethod, PPOModel
class TestPPO(DiscreteActionSpaceMethodTests):
Method: ClassVar[Type[StableBaselines3Method]] = PPOMethod
Model: ClassVar[Type[BaseAlgorithm]] = PPOModel
================================================
FILE: sequoia/methods/stable_baselines3_methods/sac.py
================================================
""" Method that uses the SAC model from stable-baselines3 and targets the RL
settings in the tree.
"""
from dataclasses import dataclass
from typing import Callable, ClassVar, Optional, Type, Union
import gym
from gym import spaces
from simple_parsing import mutable_field
from stable_baselines3.sac.sac import SAC
from sequoia.common.hparams import log_uniform
from sequoia.methods import register_method
from sequoia.settings.rl import ContinualRLSetting
from sequoia.utils.logging_utils import get_logger
from .off_policy_method import OffPolicyMethod, OffPolicyModel
logger = get_logger(__name__)
class SACModel(SAC, OffPolicyModel):
"""Customized version of the SAC model from stable-baselines-3."""
@dataclass
class HParams(OffPolicyModel.HParams):
"""Hyper-parameters of the SAC Model."""
# The learning rate, it can be a function of the current progress (from
# 1 to 0)
learning_rate: Union[float, Callable] = log_uniform(1e-6, 1e-2, default=3e-4)
buffer_size: int = 1_000_000
learning_starts: int = 100
batch_size: int = 256
tau: float = 0.005
gamma: float = 0.99
train_freq = 1
gradient_steps: int = 1
# action_noise: Optional[ActionNoise] = None
optimize_memory_usage: bool = False
ent_coef: Union[str, float] = "auto"
target_update_interval: int = 1
target_entropy: Union[str, float] = "auto"
use_sde: bool = False
sde_sample_freq: int = -1
@register_method
@dataclass
class SACMethod(OffPolicyMethod):
"""Method that uses the SAC model from stable-baselines3."""
Model: ClassVar[Type[SACModel]] = SACModel
# Hyper-parameters of the SAC model.
hparams: SACModel.HParams = mutable_field(SACModel.HParams)
# Approximate limit on the size of the replay buffer, in megabytes.
max_buffer_size_megabytes: float = 2_048.0
def configure(self, setting: ContinualRLSetting):
super().configure(setting)
def create_model(self, train_env: gym.Env, valid_env: gym.Env) -> SACModel:
return self.Model(env=train_env, **self.hparams.to_dict())
def fit(self, train_env: gym.Env, valid_env: gym.Env):
super().fit(train_env=train_env, valid_env=valid_env)
def get_actions(
self, observations: ContinualRLSetting.Observations, action_space: spaces.Space
) -> ContinualRLSetting.Actions:
return super().get_actions(
observations=observations,
action_space=action_space,
)
def on_task_switch(self, task_id: Optional[int]) -> None:
"""Called when switching tasks in a CL setting.
If task labels are available, `task_id` will correspond to the index of
the new task. Otherwise, if task labels aren't available, `task_id` will
be `None`.
todo: use this to customize how your method handles task transitions.
"""
super().on_task_switch(task_id=task_id)
if __name__ == "__main__":
results = SACMethod.main()
print(results)
================================================
FILE: sequoia/methods/stable_baselines3_methods/sac_test.py
================================================
from typing import ClassVar, Type
import pytest
from sequoia.common.config import Config
from sequoia.conftest import slow
from sequoia.settings import Setting
from sequoia.settings.rl import ContinualRLSetting, IncrementalRLSetting, TaskIncrementalRLSetting
from .base import BaseAlgorithm, StableBaselines3Method
from .base_test import ContinuousActionSpaceMethodTests
from .sac import SACMethod, SACModel
@slow
@pytest.mark.timeout(120)
class TestSAC(ContinuousActionSpaceMethodTests):
Method: ClassVar[Type[StableBaselines3Method]] = SACMethod
Model: ClassVar[Type[BaseAlgorithm]] = SACModel
# TODO: Look into why SAC is so slow, there's probably a parameter which isn't being set
# properly.
@slow
@pytest.mark.timeout(120)
@pytest.mark.parametrize(
"Setting", [ContinualRLSetting, IncrementalRLSetting, TaskIncrementalRLSetting]
)
@pytest.mark.parametrize("observe_state", [True, False])
def test_continuous_mountaincar(self, Setting: Type[Setting], observe_state: bool):
method = self.Method()
setting = Setting(
dataset="MountainCarContinuous-v0",
nb_tasks=2,
train_steps_per_task=1_000,
test_steps_per_task=1_000,
)
results: ContinualRLSetting.Results = setting.apply(method, config=Config(debug=True))
print(results.summary())
================================================
FILE: sequoia/methods/stable_baselines3_methods/td3.py
================================================
""" TODO: Implement and test DDPG. """
from dataclasses import dataclass
from typing import Callable, ClassVar, Optional, Type, Union
import gym
from gym import spaces
from simple_parsing import mutable_field
from stable_baselines3.common.off_policy_algorithm import TrainFreq
from stable_baselines3.td3 import TD3
from sequoia.common.hparams import log_uniform
from sequoia.methods import register_method
from sequoia.settings.rl import ContinualRLSetting
from sequoia.utils.logging_utils import get_logger
from .off_policy_method import OffPolicyMethod, OffPolicyModel
logger = get_logger(__name__)
class TD3Model(TD3, OffPolicyModel):
@dataclass
class HParams(OffPolicyModel.HParams):
"""Hyper-parameters of the TD3 model."""
# TODO: Add HParams specific to TD3 here, if any, and also check that the
# default values are correct.
# The learning rate, it can be a function of the current progress (from
# 1 to 0)
learning_rate: Union[float, Callable] = log_uniform(1e-6, 1e-2, default=1e-3)
# Minibatch size for each gradient update
batch_size: int = 100
# batch_size: int = categorical(1, 2, 4, 8, 16, 32, 128, default=32)
train_freq: TrainFreq = (1, "episode")
# How many gradient steps to do after each rollout (see ``train_freq``
# and ``n_episodes_rollout``) Set to ``-1`` means to do as many gradient
# steps as steps done in the environment during the rollout.
gradient_steps: int = -1
# gradient_steps: int = categorical(1, -1, default=1)
@register_method
@dataclass
class TD3Method(OffPolicyMethod):
"""Method that uses the TD3 model from stable-baselines3."""
Model: ClassVar[Type[TD3Model]] = TD3Model
hparams: TD3Model.HParams = mutable_field(TD3Model.HParams)
# Approximate limit on the size of the replay buffer, in megabytes.
max_buffer_size_megabytes: float = 2_048.0
def configure(self, setting: ContinualRLSetting):
super().configure(setting)
def create_model(self, train_env: gym.Env, valid_env: gym.Env) -> TD3Model:
return self.Model(env=train_env, **self.hparams.to_dict())
def fit(self, train_env: gym.Env, valid_env: gym.Env):
super().fit(train_env=train_env, valid_env=valid_env)
def get_actions(
self, observations: ContinualRLSetting.Observations, action_space: spaces.Space
) -> ContinualRLSetting.Actions:
return super().get_actions(
observations=observations,
action_space=action_space,
)
def on_task_switch(self, task_id: Optional[int]) -> None:
"""Called when switching tasks in a CL setting.
If task labels are available, `task_id` will correspond to the index of
the new task. Otherwise, if task labels aren't available, `task_id` will
be `None`.
todo: use this to customize how your method handles task transitions.
"""
super().on_task_switch(task_id=task_id)
if __name__ == "__main__":
results = TD3Method.main()
print(results)
================================================
FILE: sequoia/methods/stable_baselines3_methods/td3_test.py
================================================
from typing import ClassVar, Type
from .base import BaseAlgorithm, StableBaselines3Method
from .base_test import ContinuousActionSpaceMethodTests
from .td3 import TD3Method, TD3Model
class TestTD3(ContinuousActionSpaceMethodTests):
Method: ClassVar[Type[StableBaselines3Method]] = TD3Method
Model: ClassVar[Type[BaseAlgorithm]] = TD3Model
================================================
FILE: sequoia/methods/trainer.py
================================================
""" 'Patch' for the Trainer of Pytorch Lightning so it can use gym environment as
dataloaders (via the GymDataLoader class of Sequoia).
"""
import os
from dataclasses import dataclass
from functools import singledispatch
from pathlib import Path
from typing import Any, Callable, Iterable, List, Optional, Union
import gym
import pytorch_lightning.trainer.connectors.data_connector
import pytorch_lightning.utilities.apply_func
import torch
from pytorch_lightning import Callback
from pytorch_lightning import Trainer as _Trainer
from pytorch_lightning.loggers import LightningLoggerBase
from pytorch_lightning.trainer.connectors.data_connector import DataConnector
from pytorch_lightning.trainer.supporters import CombinedLoader
from pytorch_lightning.utilities.apply_func import apply_to_collection
from simple_parsing import choice
from torch.utils.data import DataLoader
from sequoia.common import Batch
from sequoia.common.config import Config
from sequoia.common.gym_wrappers.utils import IterableWrapper, has_wrapper
from sequoia.common.hparams import HyperParameters, uniform
from sequoia.settings.rl.continual.environment import GymDataLoader
from sequoia.settings.sl import PassiveEnvironment
from sequoia.utils.logging_utils import get_logger
from sequoia.utils.parseable import Parseable
logger = get_logger(__name__)
@dataclass
class TrainerConfig(HyperParameters, Parseable):
"""Configuration dataclass for a pytorch-lightning Trainer.
See the docs for the Trainer from pytorch lightning for more info on the options.
TODO: Pytorch Lightning already has a mechanism for adding argparse
arguments for the Trainer.. It would be nice to find a way to use the 'native' way
of adding arguments in PL in addition to using simple-parsing.
"""
gpus: int = torch.cuda.device_count()
overfit_batches: float = 0.0
fast_dev_run: bool = False
# Maximum number of epochs to train for.
max_epochs: int = uniform(1, 100, default=10)
# Number of nodes to use.
num_nodes: int = 1
accelerator: Optional[str] = None
log_gpu_memory: bool = False
val_check_interval: Union[int, float] = 1.0
auto_scale_batch_size: Optional[str] = None
auto_lr_find: bool = False
# Floating point precision to use in the model. (See pl.Trainer)
precision: int = choice(16, 32, default=32)
default_root_dir: Path = Path(os.environ.get("RESULTS_DIR", os.getcwd() + "/results"))
# How much of training dataset to check (floats = percent, int = num_batches)
limit_train_batches: Union[int, float] = 1.0
# How much of validation dataset to check (floats = percent, int = num_batches)
limit_val_batches: Union[int, float] = 1.0
# How much of test dataset to check (floats = percent, int = num_batches)
limit_test_batches: Union[int, float] = 1.0
# If ``True``, enable checkpointing.
# It will configure a default ModelCheckpoint callback if there is no user-defined
# ModelCheckpoint in the `callbacks`.
checkpoint_callback: bool = True
def make_trainer(
self,
config: Config,
callbacks: Optional[List[Callback]] = None,
loggers: Iterable[LightningLoggerBase] = None,
) -> "Trainer":
"""Create a Trainer object from the command-line args.
Adds the given loggers and callbacks as well.
"""
# FIXME: Trying to subclass the DataConnector to fix issues while iterating
# over gym envs, that arise because of the _with_is_last() function from
# lightning.
import pytorch_lightning.trainer.trainer
from pytorch_lightning.trainer.connectors.data_connector import DataConnector
setattr(pytorch_lightning.trainer.trainer, "DataConnector", DataConnector)
trainer = Trainer(
logger=loggers,
callbacks=callbacks,
gpus=self.gpus,
num_nodes=self.num_nodes,
max_epochs=self.max_epochs,
accelerator=self.accelerator,
log_gpu_memory=self.log_gpu_memory,
overfit_batches=self.overfit_batches,
fast_dev_run=self.fast_dev_run,
auto_scale_batch_size=self.auto_scale_batch_size,
auto_lr_find=self.auto_lr_find,
# TODO: Either move the log-dir-related stuff from Config to this
# class, or figure out a way to pass the value from Config to this
# function
default_root_dir=self.default_root_dir,
limit_train_batches=self.limit_train_batches,
limit_val_batches=self.limit_val_batches,
limit_test_batches=self.limit_train_batches,
checkpoint_callback=self.checkpoint_callback,
profiler=None, # TODO: Seem to have an impact on the problem below.
)
return trainer
class Trainer(_Trainer):
def __init__(self, **kwargs):
super().__init__(**kwargs)
def fit(self, model, train_dataloader=None, val_dataloaders=None, datamodule=None):
# TODO: Figure out what method to overwrite to fix the problem of accessing two
# batches in a row in the environment. (with_is_last annoyance.)
if isinstance(train_dataloader, gym.Env):
if has_wrapper(train_dataloader, GymDataLoader):
train_env = train_dataloader
# raise NotImplementedError("TODO: Fix this.")
return super().fit(
model,
train_dataloader=train_dataloader,
val_dataloaders=val_dataloaders,
datamodule=datamodule,
)
# TODO: Debugging/fixing this buggy method from Pytorch-Lightning.
# def _apply_to_collection(
# data: Any,
# dtype: Union[type, tuple],
# function: Callable,
# *args,
# wrong_dtype: Optional[Union[type, tuple]] = None,
# **kwargs
# ) -> Any:
apply_to_collection = singledispatch(apply_to_collection)
setattr(pytorch_lightning.utilities.apply_func, "apply_to_collection", apply_to_collection)
# import pytorch_lightning.overrides.data_parallel
# setattr(pytorch_lightning.overrides.data_parallel, "apply_to_collection", apply_to_collection)
@apply_to_collection.register(Batch)
def _apply_to_batch(
data: Batch,
dtype: Union[type, tuple],
function: Callable,
*args,
wrong_dtype: Optional[Union[type, tuple]] = None,
**kwargs,
) -> Any:
# assert False, f"YAY! {type(data)}"
# logger.debug(f"{type(data)}, {dtype}, {function}, {args}, {wrong_dtype}, {kwargs}")
return type(data)(
**{
k: apply_to_collection(v, dtype, function, *args, wrong_dtype=wrong_dtype, **kwargs)
for k, v in data.items()
}
)
class ProfiledEnvironment(IterableWrapper, DataLoader):
def __iter__(self):
for i, obs in enumerate(super().__iter__()):
# logger.debug(f"Step {i}, obs.done={obs.done}")
done = obs.done
if not isinstance(done, bool) or not done.shape:
# TODO: When we have batch size of 1, or more generally in RL, do we
# want one call to `trainer.fit` to last a given number of episodes ?
# TODO: Look into the `max_steps` argument to Trainer.
done = all(done)
# done = done or self.is_closed()
done = self.is_closed()
yield i, (obs, done)
class PatchedDataConnector(DataConnector):
def get_profiled_train_dataloader(self, train_dataloader: DataLoader):
if isinstance(train_dataloader, CombinedLoader) and isinstance(
train_dataloader.loaders, gym.Env
):
env = train_dataloader.loaders
# TODO: Replacing this 'CombinedLoader' on the Trainer with the env, since I
# don't think we need it (not using multiple train dataloaders with PL atm.)
self.trainer.train_dataloader = env
if not isinstance(env.unwrapped, PassiveEnvironment):
# Only really need to do this 'profile' thing for 'active' environments.
return ProfiledEnvironment(env)
else:
# This gets called before each epoch, so we get here on the start of the
# second training epoch.
# TODO: Check that this isn't causing issues between tasks
assert train_dataloader is self.trainer.train_dataloader
profiled_dl = self.trainer.profiler.profile_iterable(
enumerate(prefetch_iterator(train_dataloader)), "get_train_batch"
)
return profiled_dl
setattr(
pytorch_lightning.trainer.connectors.data_connector,
"DataConnector",
PatchedDataConnector,
)
pytorch_lightning.trainer.connectors.data_connector.DataConnector = PatchedDataConnector
================================================
FILE: sequoia/methods.puml
================================================
@startuml methods
' !include gym.plantuml
' remove gym.spaces
' TODO: There must be a simpler way to only keep a single node, right?
' !include settings.puml
' remove settings.active
' remove settings.assumptions
' remove settings.passive
' remove SettingABC
' !include settings/base.puml
package methods {
package base_method {
class BaseMethod implements Method {
+ hparams: BaseModel.HParams
+ config: Config
+ trainer_options: TrainerConfig
+ trainer: Trainer
}
}
package aux_tasks {
package auxiliary_task {
abstract class AuxiliaryTask {
+ options: AuxiliaryTask.Options
+ get_loss(ForwardPass, Actions, Rewards): Loss
}
abstract class AuxiliaryTask.Options {
+ coefficient: float
}
AuxiliaryTask *-- AuxiliaryTask.Options
}
}
!include ./methods/models.puml
}
@enduml
================================================
FILE: sequoia/sequoia.puml
================================================
@startuml sequoia
package sequoia {
!include common.puml
!include settings.puml
!include methods.puml
}
@enduml
================================================
FILE: sequoia/settings/README.md
================================================
# Sequoia - Settings
### (WIP) Adding a new Setting:
Prerequisites:
- Take a quick look at the `dataclasses` example
- Take a quick look at [simple_parsing](https://github.com/lebrice/SimpleParsing) (A python package I've created) which we use to generate the command-line arguments for the Settings.
## Available Settings:
- ## [Setting](sequoia/settings/base/setting.py)
Base class for all research settings in ML: Root node of the tree.
A 'setting' is loosely defined here as a learning problem with a specific
set of assumptions, restrictions, and an evaluation procedure.
For example, Reinforcement Learning is a type of Setting in which we assume
that an Agent is able to observe an environment, take actions upon it, and
receive rewards back from the environment. Some of the assumptions include
that the reward is dependant on the action taken, and that the actions have
an impact on the environment's state (and on the next observations the agent
will receive). The evaluation procedure consists in trying to maximize the
reward obtained from an environment over a given number of steps.
This 'Setting' class should ideally represent the most general learning
problem imaginable, with almost no assumptions about the data or evaluation
procedure.
This is a dataclass. Its attributes are can also be used as command-line
arguments using `simple_parsing`.
Abstract (required) methods:
- **apply** Applies a given Method on this setting to produce Results.
- **prepare_data** (things to do on 1 GPU/TPU not on every GPU/TPU in distributed mode).
- **setup** (things to do on every accelerator in distributed mode).
- **train_dataloader** the training environment/dataloader.
- **val_dataloader** the val environments/dataloader(s).
- **test_dataloader** the test environments/dataloader(s).
"Abstract"-ish (required) class attributes:
- `Results`: The class of Results that are created when applying a Method on
this setting.
- `Observations`: The type of Observations that will be produced in this
setting.
- `Actions`: The type of Actions that are expected from this setting.
- `Rewards`: The type of Rewards that this setting will (potentially) return
upon receiving an action from the method.
- ## [RLSetting](sequoia/settings/rl/setting.py)
LightningDataModule for an 'active' setting.
This is to be the parent of settings like RL or maybe Active Learning.
- ## [ContinualRLSetting](sequoia/settings/rl/continual/setting.py)
Reinforcement Learning Setting where the environment changes over time.
This is an Active setting which uses gym environments as sources of data.
These environments' attributes could change over time following a task
schedule. An example of this could be that the gravity increases over time
in cartpole, making the task progressively harder as the agent interacts with
the environment.
- ## [DiscreteTaskAgnosticRLSetting](sequoia/settings/rl/discrete/setting.py)
Continual Reinforcement Learning Setting where there are clear task boundaries,
but where the task information isn't available.
- ## [IncrementalRLSetting](sequoia/settings/rl/incremental/setting.py)
Continual RL setting in which:
- Changes in the environment's context occur suddenly (same as in Discrete, Task-Agnostic RL)
- Task boundary information (and task labels) are given at training time
- Task boundary information is given at test time, but task identity is not.
- ## [TaskIncrementalRLSetting](sequoia/settings/rl/task_incremental/setting.py)
Continual RL setting with clear task boundaries and task labels.
The task labels are given at both train and test time.
- ## [MultiTaskRLSetting](sequoia/settings/rl/multi_task/setting.py)
Reinforcement Learning setting where the environment alternates between a set
of tasks sampled uniformly.
Implemented as a TaskIncrementalRLSetting, but where the tasks are randomly sampled
during training.
- ## [TraditionalRLSetting](sequoia/settings/rl/traditional/setting.py)
Your usual "Classical" Reinforcement Learning setting.
Implemented as a MultiTaskRLSetting, but with a single task.
- ## [MultiTaskRLSetting](sequoia/settings/rl/multi_task/setting.py)
Reinforcement Learning setting where the environment alternates between a set
of tasks sampled uniformly.
Implemented as a TaskIncrementalRLSetting, but where the tasks are randomly sampled
during training.
- ## [SLSetting](sequoia/settings/sl/setting.py)
Supervised Learning Setting.
Core assuptions:
- Current actions have no influence on future observations.
- The environment gives back "dense feedback", (the 'reward' associated with all
possible actions at each step, rather than a single action)
For example, supervised learning is a Passive setting, since predicting a
label has no effect on the reward you're given (the label) or on the next
samples you observe.
- ## [ContinualSLSetting](sequoia/settings/sl/continual/setting.py)
Continuous, Task-Agnostic, Continual Supervised Learning.
This is *currently* the most "general" Supervised Continual Learning setting in
Sequoia.
- Data distribution changes smoothly over time.
- Smooth transitions between "tasks"
- No information about task boundaries or task identity (no task IDs)
- Maximum of one 'epoch' through the environment.
- ## [DiscreteTaskAgnosticSLSetting](sequoia/settings/sl/discrete/setting.py)
Continual Supervised Learning Setting where there are clear task boundaries, but
where the task information isn't available.
- ## [IncrementalSLSetting](sequoia/settings/sl/incremental/setting.py)
Supervised Setting where the data is a sequence of 'tasks'.
This class is basically is the supervised version of an Incremental Setting
The current task can be set at the `current_task_id` attribute.
- ## [TaskIncrementalSLSetting](sequoia/settings/sl/task_incremental/setting.py)
Setting where data arrives in a series of Tasks, and where the task
labels are always available (both train and test time).
- ## [MultiTaskSLSetting](sequoia/settings/sl/multi_task/setting.py)
IID version of the Task-Incremental Setting, where the data is shuffled.
Can be used to estimate the upper bound performance of Task-Incremental CL Methods.
- ## [DomainIncrementalSLSetting](sequoia/settings/sl/domain_incremental/setting.py)
Supervised CL Setting where the input domain shifts incrementally.
Task labels and task boundaries are given at training time, but not at test-time.
The crucial difference between the Domain-Incremental and Class-Incremental settings
is that the action space is smaller in domain-incremental learning, as it is a
`Discrete(n_classes_per_task)`, rather than the `Discrete(total_classes)` in
Class-Incremental setting.
For example: Create a classifier for odd vs even hand-written digits. It first be
trained on digits 0 and 1, then digits 2 and 3, then digits 4 and 5, etc.
At evaluation time, it will be evaluated on all digits
- ## [TraditionalSLSetting](sequoia/settings/sl/traditional/setting.py)
Your 'usual' supervised learning Setting, where the samples are i.i.d.
This Setting is slightly different than the others, in that it can be recovered in
*two* different ways:
- As a variant of Task-Incremental learning, but where there is only one task;
- As a variant of Domain-Incremental learning, but where there is only one task.
- ## [MultiTaskSLSetting](sequoia/settings/sl/multi_task/setting.py)
IID version of the Task-Incremental Setting, where the data is shuffled.
Can be used to estimate the upper bound performance of Task-Incremental CL Methods.
================================================
FILE: sequoia/settings/__init__.py
================================================
"""
"""
import inspect
from typing import Any, Dict, Iterable, List, Set, Type
from .base.bases import Method, SettingABC
from .base.environment import Environment
from .base.objects import Actions, ActionType, Observations, ObservationType, Rewards, RewardType
from .base.results import Results
from .base.setting import Setting, SettingType
from .rl import *
from .sl import *
# # all concrete settings:
# all_settings: List[Type[Setting]] = [
# ClassIncrementalSetting,
# DomainIncrementalSetting,
# TaskIncrementalSLSetting,
# TraditionalSLSetting,
# MultiTaskSetting,
# ContinualRLSetting,
# IncrementalRLSetting,
# TaskIncrementalRLSetting,
# RLSetting,
# ]
# Or, get All the settings:
all_settings: Set[Type[SettingABC]] = set([Setting, *Setting.children()])
# FIXME: Remove this, just checking the inspect atm.:
# import inspect
# import pprint
# print(Setting.get_tree_string())
# exit()
# print(inspect.getclasstree(all_settings, unique=True))
# assert False
# assert False, all_settings
================================================
FILE: sequoia/settings/assumptions/__init__.py
================================================
""" WIP: Mixin-style classes that define 'traits'/'assumptions' about a Setting.
IDEA: This package could define things that are to be reused in both the RL and
the CL branches, kindof like a horizontal slice accross the tree.
The reasoning behind this is that some methods might require task labels, but
apply on both sides of the tree.
An alternative to this could also be to allow Methods to target multiple
settings, but this could get weird pretty quick.
"""
from .incremental import IncrementalAssumption
# from .task_incremental import TaskIncrementalSLSetting
================================================
FILE: sequoia/settings/assumptions/assumptions.puml
================================================
@startuml assumptions
package assumptions {
' TODO: How to describe relationship between gym.Env and these other
' assumptions about the env?
' abstract class Environment {
' }
' gym.Env --|> Environment
package "assumptions about the environment" as supervision_assumptions {
package "effect of future actions on the environment" as active_vs_passive
{
interface PossiblyActiveEnvironment <> {
# Actions MAY influence future observations
}
abstract class ActiveEnvironment <> extends PossiblyActiveEnvironment {
# Actions DO influence future observations
--
Examples:
Playing tennis
}
abstract class PassiveEnvironment <> extends PossiblyActiveEnvironment {
Actions DONT influence future observations
--
Examples:
+ Predicting what might happen next when watching a movie.
}
' Environment --|> PossiblyActiveEnvironment
}
package "type of feedback (rewards)" as feedback_type_assumption
{
interface Feedback <> {}
abstract class SparseFeedback <> extends Feedback {
the environment only gives back the reward associated with the action taken.
--
Example: When you play a game, you get a reward based on how good your action was.
}
abstract class DenseFeedback <> extends SparseFeedback {
The environment gives the reward for all possible actions at every step.
--
Example: Image classification: The method is told what the image was and
what it was not. The reward (correct vs incorrect prediction) is given
for all the potential actions!
}
}
}
package "assumptions about the context" as context_assumption_family {
package "discrete vs continuous" as context_continuous_vs_discrete {
abstract class ContinuousContext <> {
The context variable is continuous: c ∈ R
Example: Varying friction with the ground in an environment.
}
abstract class DiscreteContext <> extends ContinuousContext {
The context variable is discrete: c ∈ N
Example: A list of possible tasks
}
abstract class FixedContext <> extends DiscreteContext {
The context variable is fixed to a single value
}
}
package "observability" as context_observability {
abstract class HiddenContext <> {
Methods don't have access to the context variable.
}
' abstract class BoundariesObservable <> extends HiddenContext {
' Task boundaries are given during training
' }
abstract class PartiallyObservableContext <> extends HiddenContext {
Methods may have access to the context variable some of the time
Example: Have task labels during training, but not during testing.
}
abstract class FullyObservableContext <> extends PartiallyObservableContext {
Methods always have access to the context variable.
i.e., during training and testing.
}
}
package "non-stationarity" as context_nonstationarity_assumption {
abstract class Continual <> {
The context may change smoothly over time.
}
abstract class Incremental <> extends Continual {
The context can change suddenly (task boundaries)
}
abstract class Stationary <> extends Incremental {
The context is sampled uniformly
}
}
package "shared vs disjoint spaces between tasks" as action_space_assumption {
' NOTE: We could have this for the observation and reward spaces too!
abstract class PossiblySharedActionSpace {
It is possible that there is an overlap in the action space between tasks.
}
abstract class SharedActionSpaces extends PossiblySharedActionSpace {
The action space remains the same in all tasks.
}
abstract class DisjointActionSpaces extends PossiblySharedActionSpace {
Each task has its own (disjoint) action space.
}
}
}
}
package cl {
package continuous {
abstract class ContinuousTaskAgnosticSetting <> extends base.SettingABC {
- clear_task_boundaries: bool = False
' - task_labels_at_train_time: bool = False
' - task_labels_at_test_time: bool = False
' - stationary_context: bool = False
' - shared_action_space: bool = False
}
abstract class continuous.Environment <> extends gym.Env {}
abstract class continuous.Observations <> extends base.Observations {}
abstract class continuous.Actions <> extends base.Actions {}
abstract class continuous.Rewards <> extends base.Rewards {}
' continuous.Environment -.- continuous.Observations: yields
' continuous.Environment -.- continuous.Actions: receives
' continuous.Environment -.- continuous.Rewards: returns
}
package discrete {
abstract class DiscreteTaskAgnosticSetting <> extends ContinuousTaskAgnosticSetting {
== New assumptions ==
+ clear_task_boundaries: Constant[bool] = True
' + known_task_boundaries_at_train_time: bool = False
' + known_task_boundaries_at_test_time: bool = False
== Inherited assumptions ==
' # task_labels_at_train_time: bool = False
' # task_labels_at_test_time: bool = False
' # stationary_context: bool = False
' # shared_action_space: bool = False
}
abstract class discrete.Environment <> extends continuous.Environment {}
abstract class discrete.Observations <> extends continuous.Observations {}
abstract class discrete.Actions <> extends continuous.Actions {}
abstract class discrete.Rewards <> extends continuous.Rewards {}
' discrete.Environment -.- discrete.Observations: yields
' discrete.Environment -.- discrete.Actions: receives
' discrete.Environment -.- discrete.Rewards: returns
}
package incremental {
abstract class IncrementalSetting <> extends DiscreteTaskAgnosticSetting{
== New assumptions ==
+ known_task_boundaries_at_train_time: Constant[bool] = True
+ known_task_boundaries_at_test_time: Constant[bool] = True
== Inherited assumptions ==
# clear_task_boundaries: Constant[bool] = True
' # task_labels_at_train_time: bool = False
' # task_labels_at_test_time: bool = False
' # shared_action_space: bool = False
' # stationary_context: bool = False
}
abstract class incremental.Environment <> extends discrete.Environment {}
abstract class incremental.Observations <> extends discrete.Observations {}
abstract class incremental.Actions <> extends discrete.Actions {}
abstract class incremental.Rewards <> extends discrete.Rewards {}
' incremental.Environment -.- incremental.Observations: yields
' incremental.Environment -.- incremental.Actions: receives
' incremental.Environment -.- incremental.Rewards: returns
}
package class_incremental {
abstract class ClassIncrementalSetting <> extends IncrementalSetting {
== New assumptions ==
+ shared_action_space: Constant[bool] = False
== Inherited assumptions ==
# clear_task_boundaries: Constant[bool] = True
# known_task_boundaries_at_train_time: Constant[bool] = True
# known_task_boundaries_at_test_time: Constant[bool] = True
' # task_labels_at_train_time: bool = False
' # task_labels_at_test_time: bool = False
' # stationary_context: bool = False
}
abstract class class_incremental.Environment <> extends incremental.Environment {}
abstract class class_incremental.Observations <> extends incremental.Observations {}
abstract class class_incremental.Actions <> extends incremental.Actions {}
abstract class class_incremental.Rewards <> extends incremental.Rewards {}
' class_incremental.Environment -.- class_incremental.Observations: yields
' class_incremental.Environment -.- class_incremental.Actions: receives
' class_incremental.Environment -.- class_incremental.Rewards: returns
}
package domain_incremental {
abstract class DomainIncrementalSetting <> extends IncrementalSetting {
== New assumptions ==
+ shared_action_space: Constant[bool] = True
== Inherited assumptions ==
# clear_task_boundaries: Constant[bool] = True
# known_task_boundaries_at_train_time: Constant[bool] = True
# known_task_boundaries_at_test_time: Constant[bool] = True
}
abstract class domain_incremental.Environment <> extends incremental.Environment {}
abstract class domain_incremental.Observations <> extends incremental.Observations {}
abstract class domain_incremental.Actions <> extends incremental.Actions {}
abstract class domain_incremental.Rewards <> extends incremental.Rewards {}
' domain_incremental.Environment -.- domain_incremental.Observations: yields
' domain_incremental.Environment -.- domain_incremental.Actions: receives
' domain_incremental.Environment -.- domain_incremental.Rewards: returns
}
package task_incremental {
abstract class TaskIncrementalSetting <> extends IncrementalSetting {
== New assumptions ==
+ task_labels_at_train_time: Constant[bool] = True
+ task_labels_at_test_time: Constant[bool] = True
== Inherited assumptions ==
# clear_task_boundaries: Constant[bool] = True
# known_task_boundaries_at_train_time: Constant[bool] = True
# known_task_boundaries_at_test_time: Constant[bool] = True
}
abstract class task_incremental.Environment <> extends incremental.Environment {}
abstract class task_incremental.Observations <> extends incremental.Observations {}
abstract class task_incremental.Actions <> extends incremental.Actions {}
abstract class task_incremental.Rewards <> extends incremental.Rewards {}
' task_incremental.Environment -.- task_incremental.Observations: yields
' task_incremental.Environment -.- task_incremental.Actions: receives
' task_incremental.Environment -.- task_incremental.Rewards: returns
}
package traditional{
abstract class TraditionalSetting <> extends IncrementalSetting {
== New assumptions ==
+ stationary_context: Constant[bool] = True
== Inherited assumptions ==
# clear_task_boundaries: Constant[bool] = True
}
abstract class traditional.Environment <> extends incremental.Environment {}
abstract class traditional.Observations <> extends incremental.Observations {}
abstract class traditional.Actions <> extends incremental.Actions {}
abstract class traditional.Rewards <> extends incremental.Rewards {}
' traditional.Environment -.- traditional.Observations: yields
' traditional.Environment -.- traditional.Actions: receives
' traditional.Environment -.- traditional.Rewards: returns
}
package multi_task {
abstract class MultiTaskSetting <> extends TaskIncrementalSetting, TraditionalSetting {
== New assumptions (compared to Traditional) ==
+ task_labels_at_train_time: Constant[bool] = True
+ task_labels_at_test_time: Constant[bool] = True
== New assumptions (compared to TaskIncremental) ==
+ stationary_context: Context[bool] = True
== Inherited assumptions ==
# stationary_context: Context[bool] = True
# task_labels_at_train_time: Constant[bool] = True
# task_labels_at_test_time: Constant[bool] = True
# clear_task_boundaries: Constant[bool] = True
# known_task_boundaries_at_train_time: Constant[bool] = True
# known_task_boundaries_at_test_time: Constant[bool] = True
}
abstract class multi_task.Environment <> extends task_incremental.Environment, traditional.Environment {}
abstract class multi_task.Observations <> extends task_incremental.Observations, traditional.Observations {}
abstract class multi_task.Actions <> extends task_incremental.Actions, traditional.Actions {}
abstract class multi_task.Rewards <> extends task_incremental.Rewards, traditional.Rewards {}
}
}
' !include settings/base/base.puml
' remove settings.base
' !include gym.puml
remove assumptions
' remove @unlinked
remove class_incremental
remove domain_incremental
' remove <>
' remove <>
' remove <>
' remove <>
' show context_assumption_family
' remove assumptions
' remove supervision_assumptions
' remove context_assumption_family
' remove <>
' remove <>
' remove sl
' remove cl
' remove rl
' show SLSetting
' show RLSetting
' remove <>
' hide empty fields
' hide empty methods
' ' remove gym
' remove gym.spaces
' ' remove cl
' remove class_incremental
' remove domain_incremental
@enduml
================================================
FILE: sequoia/settings/assumptions/base.py
================================================
from sequoia.settings.base.bases import SettingABC
from sequoia.utils.logging_utils import get_logger
logger = get_logger(__name__)
# IDEA: (@lebrice) Exploring the idea of using metaclasses to customize the isinstance
# and subclass checks, so that it could be property-based. This is probably not worth it
# though.
# It's also quite dumb that we have to extend a metaclass from pytorch lightning!
# class AssumptionMeta(_DataModuleWrapper):
# def __instancecheck__(self, instance: Union[SettingABC, Any]):
# logger.debug(f"InstanceCheck on assumption {self} for instance {instance}")
# return super().__instancecheck__(instance)
class AssumptionBase(SettingABC):
pass
================================================
FILE: sequoia/settings/assumptions/classification.py
================================================
# TODO: Test if a `Protocol` task from the typing or typing-extensions module could be
# used as an Assumption, based on the type of action space on the Setting, etc.
# def num_classes_in_task(self, task_id: int, train: bool) -> Union[int, List[int]]:
# """ Returns the number of classes in the given task. """
# increment = self.increment if train else self.test_increment
# if isinstance(increment, list):
# return increment[task_id]
# return increment
# def num_classes_in_current_task(self, train: bool = None) -> int:
# """ Returns the number of classes in the current task. """
# # TODO: Its ugly to have the 'method' tell us if we're currently in
# # train/eval/test, no? Maybe just make a method for each?
# return self.num_classes_in_task(self._current_task_id, train=train)
# def task_classes(self, task_id: int, train: bool) -> List[int]:
# """ Gives back the 'true' labels present in the given task. """
# start_index = sum(self.num_classes_in_task(i, train) for i in range(task_id))
# end_index = start_index + self.num_classes_in_task(task_id, train)
# if train:
# return self.class_order[start_index:end_index]
# else:
# return self.test_class_order[start_index:end_index]
# def current_task_classes(self, train: bool) -> List[int]:
# """ Gives back the labels present in the current task. """
# return self.task_classes(self._current_task_id, train)
================================================
FILE: sequoia/settings/assumptions/context_discreteness.py
================================================
from dataclasses import dataclass
from sequoia.utils.utils import constant, flag
from .base import AssumptionBase
@dataclass
class ContinuousContextAssumption(AssumptionBase):
# Wether we have clear boundaries between tasks, or if the transitions are smooth.
# Equivalent to wether the context variable is discrete vs continuous.
smooth_task_boundaries: bool = flag(True)
@dataclass
class DiscreteContextAssumption(ContinuousContextAssumption):
# Wether we have clear boundaries between tasks, or if the transitions are smooth.
# Equivalent to wether the context variable is discrete vs continuous.
smooth_task_boundaries: bool = constant(False)
================================================
FILE: sequoia/settings/assumptions/context_visibility.py
================================================
from dataclasses import dataclass
from sequoia.utils.utils import constant, flag
from .base import AssumptionBase
@dataclass
class HiddenContextAssumption(AssumptionBase):
# Wether the task labels are observable during training.
task_labels_at_train_time: bool = flag(False)
# Wether the task labels are observable during testing.
task_labels_at_test_time: bool = flag(False)
# Wether we get informed when reaching the boundary between two tasks during
# training.
known_task_boundaries_at_train_time: bool = flag(False)
# Wether we get informed when reaching the boundary between two tasks during
# testing.
known_task_boundaries_at_test_time: bool = flag(False)
@dataclass
class PartiallyObservableContextAssumption(HiddenContextAssumption):
# Wether the task labels are observable during training.
task_labels_at_train_time: bool = constant(True)
# Wether we get informed when reaching the boundary between two tasks during
# training.
known_task_boundaries_at_train_time: bool = constant(True)
known_task_boundaries_at_test_time: bool = flag(True)
@dataclass
class FullyObservableContextAssumption(PartiallyObservableContextAssumption):
# Wether the task labels are observable during testing.
task_labels_at_test_time: bool = constant(True)
# Wether we get informed when reaching the boundary between two tasks during
# testing.
known_task_boundaries_at_test_time: bool = constant(True)
================================================
FILE: sequoia/settings/assumptions/continual.py
================================================
import itertools
import json
import time
from abc import ABC, abstractmethod
from dataclasses import asdict, dataclass, field, is_dataclass
from io import StringIO
from pathlib import Path
from typing import Any, ClassVar, Dict, Optional, Type
import gym
import tqdm
from gym.vector.utils import batch_space
from simple_parsing import field
from simple_parsing.helpers.serialization.serializable import Serializable
from torch import Tensor
from wandb.wandb_run import Run
import wandb
from sequoia.common.config import Config, WandbConfig
from sequoia.common.gym_wrappers.utils import IterableWrapper
from sequoia.common.metrics import Metrics, MetricsType
from sequoia.settings.base import Actions, Method
from sequoia.settings.base.results import Results
from sequoia.utils.logging_utils import get_logger
from sequoia.utils.utils import add_prefix, flag
from .base import AssumptionBase
from .iid_results import TaskResults
logger = get_logger(__name__)
@dataclass
class ContinualResults(TaskResults[MetricsType]):
_runtime: Optional[float] = None
_online_training_performance: Dict[int, MetricsType] = field(default_factory=dict)
@property
def online_performance(self) -> Dict[int, MetricsType]:
"""Returns the online training performance.
In SL, this is only recorded over the first epoch.
Returns
-------
Dict[int, MetricType]
a dictionary mapping from step number to the Metrics object produced at that
step.
"""
if not self._online_training_performance:
return {}
return self._online_training_performance
@property
def online_performance_metrics(self) -> MetricsType:
return sum(self.online_performance.values(), Metrics())
def to_log_dict(self, verbose: bool = False) -> Dict:
log_dict = {}
log_dict["Average Performance"] = super().to_log_dict(verbose=verbose)
if self._online_training_performance:
log_dict["Online Performance"] = self.online_performance_metrics.to_log_dict(
verbose=verbose
)
return log_dict
def summary(self, verbose: bool = False) -> str:
s = StringIO()
print(json.dumps(self.to_log_dict(verbose=verbose), indent="\t"), file=s)
s.seek(0)
return s.read()
@dataclass
class ContinualAssumption(AssumptionBase):
"""Assumptions for Setting where the environments change over time."""
# Which dataset to use.
# dataset: ClassVar[str] = ""
known_task_boundaries_at_train_time: bool = flag(False)
# Wether we get informed when reaching the boundary between two tasks during
# training. Only used when `smooth_task_boundaries` is False.
known_task_boundaries_at_test_time: bool = flag(False)
# Wether we have sudden changes in the environments, or if the transition
# are "smooth".
smooth_task_boundaries: bool = flag(True)
# Wether task labels are available at train time.
# NOTE: Forced to True at the moment.
task_labels_at_train_time: bool = flag(False)
# Wether task labels are available at test time.
task_labels_at_test_time: bool = flag(False)
@dataclass(frozen=True)
class Observations(AssumptionBase.Observations):
task_labels: Optional[Tensor] = None
@dataclass(frozen=True)
class Actions(AssumptionBase.Actions):
pass
@dataclass(frozen=True)
class Rewards(AssumptionBase.Rewards):
pass
# TODO: Move everything necessary to get ContinualRLSetting to work out of
# Incremental and into this here. Makes no sense that ContinualRLSetting inherits
# from Incremental, rather than this!
Results: ClassVar[Type[ContinualResults]] = ContinualResults
# Options related to Weights & Biases (wandb). Turned Off by default. Passing any of
# its arguments will enable wandb.
# NOTE: Adding `cmd=False` here, so we only create the args in `Experiment`.
# TODO: Fix this up.
wandb: Optional[WandbConfig] = field(default=None, compare=False, cmd=False)
def main_loop(self, method: Method) -> ContinualResults:
"""Runs a continual learning training loop, wether in RL or CL."""
# TODO: Add ways of restoring state to continue a given run.
if self.wandb and self.wandb.project:
# Init wandb, and then log the setting's options.
self.wandb_run = self.setup_wandb(method)
method.setup_wandb(self.wandb_run)
train_env = self.train_dataloader()
valid_env = self.val_dataloader()
logger.info(f"Starting training")
method.set_training()
self._start_time = time.process_time()
method.fit(
train_env=train_env,
valid_env=valid_env,
)
train_env.close()
valid_env.close()
logger.info(f"Finished Training.")
results = self.test_loop(method)
if self.monitor_training_performance:
results._online_training_performance = train_env.get_online_performance()
logger.info(f"Resulting objective of Test Loop: {results.objective}")
self._end_time = time.process_time()
runtime = self._end_time - self._start_time
results._runtime = runtime
logger.info(f"Finished main loop in {runtime} seconds.")
self.log_results(method, results)
return results
def test_loop(self, method: Method) -> "IncrementalAssumption.Results":
"""WIP: Continual test loop."""
test_env = self.test_dataloader()
test_env: TestEnvironment
was_training = method.training
method.set_testing()
try:
# If the Method has `test` defined, use it.
method.test(test_env)
test_env.close()
test_env: TestEnvironment
# Get the metrics from the test environment
test_results: Results = test_env.get_results()
except NotImplementedError:
logger.debug(
f"Will query the method for actions at each step, "
f"since it doesn't implement a `test` method."
)
obs = test_env.reset()
# TODO: Do we always have a maximum number of steps? or of episodes?
# Will it work the same for Supervised and Reinforcement learning?
max_steps: int = getattr(test_env, "step_limit", None)
# Reset on the last step is causing trouble, since the env is closed.
pbar = tqdm.tqdm(itertools.count(), total=max_steps, desc="Test")
episode = 0
for step in pbar:
if obs is None:
break
# NOTE: The env might not be closed, while `obs` is actually still there.
# if test_env.is_closed():
# logger.debug(f"Env is closed")
# break
# logger.debug(f"At step {step}")
# BUG: Need to pass an action space that actually reflects the batch
# size, even for the last batch!
# BUG: This doesn't work if the env isn't batched.
action_space = test_env.action_space
batch_size = getattr(test_env, "num_envs", getattr(test_env, "batch_size", 0))
env_is_batched = batch_size is not None and batch_size >= 1
if env_is_batched:
# NOTE: Need to pass an action space that actually reflects the batch
# size, even for the last batch!
obs_batch_size = obs.x.shape[0] if obs.x.shape else None
action_space_batch_size = (
test_env.action_space.shape[0] if test_env.action_space.shape else None
)
if obs_batch_size is not None and obs_batch_size != action_space_batch_size:
action_space = batch_space(test_env.single_action_space, obs_batch_size)
action = method.get_actions(obs, action_space)
if test_env.is_closed():
break
obs, reward, done, info = test_env.step(action)
if done and not test_env.is_closed():
# logger.debug(f"end of test episode {episode}")
obs = test_env.reset()
episode += 1
test_env.close()
test_results: Results = test_env.get_results()
if wandb.run:
d = add_prefix(test_results.to_log_dict(), prefix="Test", sep="/")
# d = add_prefix(test_metrics.to_log_dict(), prefix="Test", sep="/")
# d["current_task"] = task_id
wandb.log(d)
# Restore 'training' mode, if it was set at the start.
if was_training:
method.set_training()
return test_results
# return test_results
# if not self.task_labels_at_test_time:
# # TODO: move this wrapper to common/wrappers.
# test_env = RemoveTaskLabelsWrapper(test_env)
def setup_wandb(self, method: Method) -> Run:
"""Call wandb.init, log the experiment configuration to the config dict.
This assumes that `self.wandb` is not None. This happens when one of the wandb
arguments is passed.
Parameters
----------
method : Method
Method to be applied.
"""
assert isinstance(self.wandb, WandbConfig)
method_name: str = method.get_name()
setting_name: str = self.get_name()
if not self.wandb.run_name:
# Set the default name for this run.
run_name = f"{method_name}-{setting_name}"
dataset = getattr(self, "dataset", None)
if isinstance(dataset, str):
run_name += f"-{dataset}"
if getattr(self, "nb_tasks", 0) > 1:
run_name += f"_{self.nb_tasks}t" # type: ignore
self.wandb.run_name = run_name
run: Run = self.wandb.wandb_init()
run.config["setting"] = setting_name
# Add the setting's options into the config:
setting_config_dict: Dict[str, Any] = {}
if isinstance(self, Serializable):
setting_config_dict = self.to_dict()
elif is_dataclass(self):
setting_config_dict = asdict(self)
run.config.update({f"setting.{k}": v for k, v in setting_config_dict.items()})
run.config["method"] = method_name
run.config["method_full_name"] = method.get_full_name()
run.summary["setting"] = self.get_name()
if isinstance(self.dataset, str):
run.summary["dataset"] = self.dataset
run.summary["method"] = method.get_name()
assert wandb.run is run
return run
def log_results(self, method: Method, results: Results, prefix: str = "") -> None:
"""
TODO: Create the tabs we need to show up in wandb:
1. Final
- Average "Current/Online" performance (scalar)
- Average "Final" performance (scalar)
- Runtime
2. Test
- Task i (evolution over time (x axis is the task id, if possible))
"""
logger.info(results.summary())
if wandb.run:
wandb.summary["method"] = method.get_name()
wandb.summary["setting"] = self.get_name()
dataset = getattr(self, "dataset", "")
if dataset and isinstance(dataset, str):
wandb.summary["dataset"] = dataset
results_dict = results.to_log_dict()
if prefix:
results_dict = add_prefix(results_dict, prefix=prefix, sep="/")
wandb.log(results_dict)
# BUG: Sometimes logging a matplotlib figure causes a crash:
# File "/home/fabrice/miniconda3/envs/sequoia/lib/python3.8/site-packages/plotly/matplotlylib/mplexporter/utils.py", line 246, in get_grid_style
# if axis._gridOnMajor and len(gridlines) > 0:
# AttributeError: 'XAxis' object has no attribute '_gridOnMajor'
# Seems to be fixed by downgrading the matplotlib version to 3.2.2
plots_dict = results.make_plots()
if prefix:
plots_dict = add_prefix(plots_dict, prefix=prefix, sep="/")
wandb.log(plots_dict)
# TODO: Finish the run here? Not sure this is right.
# wandb.run.finish()
@property
def phases(self) -> int:
"""The number of training 'phases', i.e. how many times `method.fit` will be
called.
In the case of Continual and DiscreteTaskAgnostic, fit is only called once,
with an environment that shifts between all the tasks. In Incremental, fit is
called once per task, while in Traditional and MultiTask, fit is called once.
"""
return 1
from gym.vector import VectorEnv
from sequoia.common.gym_wrappers.utils import EnvType
class TestEnvironment(gym.wrappers.Monitor, IterableWrapper[EnvType], ABC):
"""Wrapper around a 'test' environment, which limits the number of steps
and keeps tracks of the performance.
"""
def __init__(
self,
env: EnvType,
directory: Path,
step_limit: int = 1_000, # TODO: Remove this, use a dedicated wrapper for that.
no_rewards: bool = False,
config: Config = None,
*args,
**kwargs,
):
super().__init__(env, directory, *args, **kwargs)
# TODO: Need to stop re-creating the Monitor wrappers when we already have the list of envs
# for each task!
logger.info(f"Creating test env (Monitor) with log directory {self.directory}")
self.step_limit = step_limit
self.no_rewards = no_rewards
self._steps = 0
self.config = config
# if wandb.run:
# wandb.gym.monitor()
def step(self, action):
self._before_step(action)
# NOTE: Monitor wrapper from gym doesn't call `super().step`, so we have to
# overwrite it here.
observation, reward, done, info = IterableWrapper.step(self, action)
done = self._after_step(observation, reward, done, info)
return observation, reward, done, info
def reset(self, **kwargs):
self._before_reset()
observation = IterableWrapper.reset(self, **kwargs)
self._after_reset(observation)
return observation
@abstractmethod
def get_results(self) -> Results:
"""Return how well the Method was applied on this environment.
In RL, this would be based on the mean rewards, while in supervised
learning it could be the average accuracy, for instance.
Returns
-------
Results
[description]
"""
# TODO: Total reward over a number of steps? Over a number of episodes?
# Average reward? What's the metric we care about in RL?
rewards = self.get_episode_rewards()
lengths = self.get_episode_lengths()
total_steps = self.get_total_steps()
return sum(rewards) / total_steps
def step(self, action):
# TODO: Its A bit uncomfortable that we have to 'unwrap' these here..
# logger.debug(f"Step {self._steps}")
action_for_stats = action.y_pred if isinstance(action, Actions) else action
self._before_step(action_for_stats)
if isinstance(action, Tensor):
action = action.cpu().numpy()
observation, reward, done, info = self.env.step(action)
observation_for_stats = observation.x
reward_for_stats = reward.y
# TODO: Always render when debugging? or only when the corresponding
# flag is set in self.config?
try:
if self.config and self.config.render and self.config.debug:
self.render("human")
except NotImplementedError:
pass
if isinstance(self.env.unwrapped, VectorEnv):
done = all(done)
else:
done = bool(done)
done = self._after_step(observation_for_stats, reward_for_stats, done, info)
if self.get_total_steps() >= self.step_limit:
done = True
self.close()
# Remove the rewards if they aren't allowed.
if self.no_rewards:
reward = None
return observation, reward, done, info
TestEnvironment.__test__ = False
================================================
FILE: sequoia/settings/assumptions/discrete_results.py
================================================
import json
from dataclasses import dataclass
from io import StringIO
from typing import ClassVar, Dict, Generic, List
import matplotlib.pyplot as plt
from simple_parsing.helpers import list_field
from sequoia.common.metrics import Metrics
from sequoia.settings.base.results import Results
from .iid_results import MetricType, TaskResults
@dataclass
class TaskSequenceResults(Results, Generic[MetricType]):
"""Results obtained when evaluated on a sequence of (discrete) Tasks."""
task_results: List[TaskResults[MetricType]] = list_field()
# For now, all the 'concrete' objectives (mean reward / episode in RL, accuracy in
# SL) have higher => better
lower_is_better: ClassVar[bool] = False
def __post_init__(self):
if self.task_results and isinstance(self.task_results[0], dict):
self.task_results = [
TaskResults.from_dict(task_result, drop_extra_fields=False)
for task_result in self.task_results
]
@property
def objective_name(self) -> str:
return self.average_metrics.objective_name
@property
def num_tasks(self) -> int:
"""Returns the number of tasks.
Returns
-------
int
Number of tasks.
"""
return len(self.task_results)
@property
def average_metrics(self) -> MetricType:
return sum(self.average_metrics_per_task, Metrics())
@property
def average_metrics_per_task(self) -> List[MetricType]:
return [task_result.average_metrics for task_result in self.task_results]
@property
def objective(self) -> float:
return self.average_metrics.objective
def to_log_dict(self, verbose: bool = False) -> Dict:
result = {}
for task_id, task_results in enumerate(self.task_results):
result[f"Task {task_id}"] = task_results.to_log_dict(verbose=verbose)
result["Average"] = self.average_metrics.to_log_dict(verbose=verbose)
return result
def summary(self, verbose: bool = False):
s = StringIO()
print(json.dumps(self.to_log_dict(verbose=verbose), indent="\t"), file=s)
s.seek(0)
return s.read()
def make_plots(self) -> Dict[str, plt.Figure]:
result = {}
for task_id, task_results in enumerate(self.task_results):
result[f"Task {task_id}"] = task_results.make_plots()
return result
================================================
FILE: sequoia/settings/assumptions/iid.py
================================================
""" IDEA: create the simple train loop for an IID setting (RL or CL).
"""
from dataclasses import dataclass
from sequoia.utils.utils import constant
from .task_incremental import TaskIncrementalAssumption
# TODO: Import and use the `TaskResults` here.
@dataclass
class TraditionalSetting(TaskIncrementalAssumption):
"""Assumption (mixin) for Settings where the data is stationary (only one
task).
"""
nb_tasks: int = constant(1)
@property
def phases(self) -> int:
"""The number of training 'phases', i.e. how many times `method.fit` will be
called.
Defaults to the number of tasks, but may be different, for instance in so-called
Multi-Task Settings, this is set to 1.
"""
return 1
================================================
FILE: sequoia/settings/assumptions/iid_results.py
================================================
""" Results for an IID experiment. """
from dataclasses import dataclass, field
from typing import ClassVar, Dict, Generic, List, TypeVar
import matplotlib.pyplot as plt
from sequoia.common.metrics import Metrics
from sequoia.settings.base.results import Results
MetricType = TypeVar("MetricType", bound=Metrics)
@dataclass
class TaskResults(Results, Generic[MetricType]):
"""Results within a given Task.
This is just a List of a given Metrics type, with additional methods.
"""
# For now, all the 'concrete' objectives (mean reward / episode in RL, accuracy in
# SL) have higher => better
lower_is_better: ClassVar[bool] = False
metrics: List[MetricType] = field(default_factory=list)
plots_dict: Dict[str, plt.Figure] = field(default_factory=dict)
def __post_init__(self):
if self.metrics and isinstance(self.metrics[0], dict):
self.metrics = [
Metrics.from_dict(metrics, drop_extra_fields=False) for metrics in self.metrics
]
def __str__(self) -> str:
return f"{type(self).__name__}(average(metrics)={self.average_metrics})"
def __repr__(self) -> str:
return f"{type(self).__name__}(average(metrics)={self.average_metrics})"
@property
def average_metrics(self) -> MetricType:
"""Returns the average 'Metrics' object for this task."""
return sum(self.metrics, Metrics())
@property
def objective(self) -> float:
"""Returns the main 'objective' value (a float) for this task.
This value could be the average accuracy in SL, or the mean reward / episode in
RL, depending on the type of Metrics stored in `self`.
Returns
-------
float
A single float that describes how 'good' these results are.
"""
return self.average_metrics.objective
@property
def objective_name(self) -> str:
# TODO: Add this objective_name attribute on Metrics
return self.average_metrics.objective_name
def __str__(self):
return f"{type(self).__name__}({self.average_metrics})"
def to_log_dict(self, verbose: bool = False) -> Dict:
"""Produce a dictionary that describes the results / metrics etc.
Can be logged to console or to wandb using `wandb.log(results.to_log_dict())`.
Parameters
----------
verbose : bool, optional
Wether to include very detailed information. Defaults to `False`.
Returns
-------
Dict
A dict mapping from str keys to either values or nested dicts of the same
form.
"""
return self.average_metrics.to_log_dict(verbose=verbose)
def summary(self) -> str:
return str(self.to_log_dict())
def make_plots(self) -> Dict[str, plt.Figure]:
"""Produce a set of plots using the Metrics stored in this object.
Returns
-------
Dict[str, plt.Figure]
Dict mapping from strings to matplotlib plots.
"""
# Could actually create plots here too.
return self.plots_dict
================================================
FILE: sequoia/settings/assumptions/incremental.py
================================================
import itertools
import time
from abc import abstractmethod
from dataclasses import dataclass
from typing import ClassVar, Optional, Sequence, Type, Union
import tqdm
from gym.vector.utils.spaces import batch_space
from simple_parsing import field
from torch import Tensor
from wandb.wandb_run import Run
import wandb
from sequoia.common.gym_wrappers.step_callback_wrapper import StepCallbackWrapper
from sequoia.settings.base import Actions, Environment, Method, Results, Rewards, Setting
from sequoia.utils.logging_utils import get_logger
from sequoia.utils.utils import add_prefix, constant, flag
from .continual import ContinualAssumption, TestEnvironment
from .incremental_results import IncrementalResults, TaskSequenceResults
logger = get_logger(__name__)
@dataclass
class IncrementalAssumption(ContinualAssumption):
"""Mixin that defines methods that are common to all 'incremental'
settings, where the data is separated into tasks, and where you may not
always get the task labels.
Concretely, this holds the train and test loops that are common to the
ClassIncrementalSetting (highest node on the Passive side) and ContinualRL
(highest node on the Active side), therefore this setting, while abstract,
is quite important.
"""
# Which dataset to use.
# dataset: str
Results: ClassVar[Type[Results]] = IncrementalResults
@dataclass(frozen=True)
class Observations(Setting.Observations):
"""Observations produced by an Incremental setting.
Adds the 'task labels' to the base Observation.
"""
task_labels: Union[Optional[Tensor], Sequence[Optional[Tensor]]] = None
# Wether we have clear boundaries between tasks, or if the transition is
# smooth.
smooth_task_boundaries: bool = constant(False) # constant for now.
# Wether task labels are available at train time.
# NOTE: Forced to True at the moment.
task_labels_at_train_time: bool = flag(default=True)
# Wether task labels are available at test time.
task_labels_at_test_time: bool = flag(default=False)
# Wether we get informed when reaching the boundary between two tasks during
# training. Only used when `smooth_task_boundaries` is False.
# TODO: Setting constant for now, but we could add task boundary detection
# later on!
known_task_boundaries_at_train_time: bool = constant(True)
# Wether we get informed when reaching the boundary between two tasks during
# training. Only used when `smooth_task_boundaries` is False.
known_task_boundaries_at_test_time: bool = True
# The number of tasks. By default 0, which means that it will be set
# depending on other fields in __post_init__, or eventually be just 1.
nb_tasks: int = field(5, alias=["n_tasks", "num_tasks"])
# Attributes (not parsed through the command-line):
_current_task_id: int = field(default=0, init=False)
def __post_init__(self):
super().__post_init__()
self.train_env: Environment = None # type: ignore
self.val_env: Environment = None # type: ignore
self.test_env: TestEnvironment = None # type: ignore
self.wandb_run: Optional[Run] = None
self._start_time: Optional[float] = None
self._end_time: Optional[float] = None
self._setting_logged_to_wandb: bool = False
@property
def phases(self) -> int:
"""The number of training 'phases', i.e. how many times `method.fit` will be
called.
Defaults to the number of tasks, but may be different, for instance in so-called
Multi-Task Settings, this is set to 1.
"""
return self.nb_tasks
@property
def current_task_id(self) -> Optional[int]:
"""Get the current task id.
TODO: Do we want to return None if the task labels aren't currently
available? (at either Train or Test time?) Or if we 'detect' if
this is being called from the method?
TODO: This property doesn't really make sense in the Multi-Task SL or RL
settings.
"""
return self._current_task_id
@current_task_id.setter
def current_task_id(self, value: int) -> None:
"""Sets the current task id."""
self._current_task_id = value
def task_boundary_reached(self, method: Method, task_id: int, training: bool):
known_task_boundaries = (
self.known_task_boundaries_at_train_time
if training
else self.known_task_boundaries_at_test_time
)
task_labels_available = (
self.task_labels_at_train_time if training else self.task_labels_at_test_time
)
if known_task_boundaries:
# Inform the model of a task boundary. If the task labels are
# available, then also give the id of the new task to the
# method.
# TODO: Should we also inform the method of wether or not the
# task switch is occuring during training or testing?
if not hasattr(method, "on_task_switch"):
logger.warning(
UserWarning(
f"On a task boundary, but since your method doesn't "
f"have an `on_task_switch` method, it won't know about "
f"it! "
)
)
elif not task_labels_available:
method.on_task_switch(None)
elif self.phases == 1:
# NOTE: on_task_switch won't be called if there is only one task.
pass
else:
method.on_task_switch(task_id)
def main_loop(self, method: Method) -> IncrementalResults:
"""Runs an incremental training loop, wether in RL or CL."""
# TODO: Add ways of restoring state to continue a given run?
# For each training task, for each test task, a list of the Metrics obtained
# during testing on that task.
# NOTE: We could also just store a single metric for each test task, but then
# we'd lose the ability to create a plots to show the performance within a test
# task.
# IDEA: We could use a list of IIDResults! (but that might cause some circular
# import issues)
results = self.Results()
if self.monitor_training_performance:
results._online_training_performance = []
if self.wandb and self.wandb.project:
# Init wandb, and then log the setting's options.
self.wandb_run = self.setup_wandb(method)
method.setup_wandb(self.wandb_run)
# TODO: Fix this up, need to set the '_objective_scaling_factor' to a different
# value depending on the 'dataset' / environment.
results._objective_scaling_factor = self._get_objective_scaling_factor()
method.set_training()
self._start_time = time.process_time()
for task_id in range(self.phases):
logger.info(
f"Starting training" + (f" on task {task_id}." if self.nb_tasks > 1 else ".")
)
self.current_task_id = task_id
self.task_boundary_reached(method, task_id=task_id, training=True)
# Creating the dataloaders ourselves (rather than passing 'self' as
# the datamodule):
task_train_env = self.train_dataloader()
task_valid_env = self.val_dataloader()
method.fit(
train_env=task_train_env,
valid_env=task_valid_env,
)
task_train_env.close()
task_valid_env.close()
if self.monitor_training_performance:
results._online_training_performance.append(task_train_env.get_online_performance())
logger.info(f"Finished Training on task {task_id}.")
test_metrics: TaskSequenceResults = self.test_loop(method)
# Add a row to the transfer matrix.
results.task_sequence_results.append(test_metrics)
logger.info(f"Resulting objective of Test Loop: {test_metrics.objective}")
if wandb.run:
d = add_prefix(test_metrics.to_log_dict(), prefix="Test", sep="/")
# d = add_prefix(test_metrics.to_log_dict(), prefix="Test", sep="/")
d["current_task"] = task_id
wandb.log(d)
self._end_time = time.process_time()
runtime = self._end_time - self._start_time
results._runtime = runtime
logger.info(f"Finished main loop in {runtime} seconds.")
self.log_results(method, results)
return results
def test_loop(self, method: Method) -> "IncrementalAssumption.Results":
"""(WIP): Runs an incremental test loop and returns the Results.
The idea is that this loop should be exactly the same, regardless of if
you're on the RL or the CL side of the tree.
NOTE: If `self.known_task_boundaries_at_test_time` is `True` and the
method has the `on_task_switch` callback defined, then a callback
wrapper is added that will invoke the method's `on_task_switch` and pass
it the task id (or `None` if `not self.task_labels_available_at_test_time`)
when a task boundary is encountered.
This `on_task_switch` 'callback' wrapper gets added the same way for
Supervised or Reinforcement learning settings.
"""
test_env = self.test_dataloader()
test_env: TestEnvironment
was_training = method.training
method.set_testing()
if self.known_task_boundaries_at_test_time and self.nb_tasks > 1:
def _on_task_switch(step: int, *arg) -> None:
# TODO: This attribute isn't on IncrementalAssumption itself, it's defined
# on ContinualRLSetting.
if step not in test_env.boundary_steps:
return
if not hasattr(method, "on_task_switch"):
logger.warning(
UserWarning(
f"On a task boundary, but since your method doesn't "
f"have an `on_task_switch` method, it won't know about "
f"it! "
)
)
return
if self.task_labels_at_test_time:
# TODO: Should this 'test boundary' step depend on the batch size?
task_steps = sorted(test_env.boundary_steps)
# TODO: If the ordering of tasks were different (shuffled
# tasks for example), then this wouldn't work, we'd need a
# list of the task ids or something like that.
task_id = task_steps.index(step)
logger.debug(
f"Calling `method.on_task_switch({task_id})` "
f"since task labels are available at test-time."
)
method.on_task_switch(task_id)
else:
logger.debug(
f"Calling `method.on_task_switch(None)` "
f"since task labels aren't available at "
f"test-time, but task boundaries are known."
)
method.on_task_switch(None)
test_env = StepCallbackWrapper(test_env, callbacks=[_on_task_switch])
# If the Method has `test` defined, use it.
method.test(test_env)
test_env: TestEnvironment
# Get the metrics from the test environment
test_results: TaskSequenceResults = test_env.get_results()
# Restore 'training' mode, if it was set at the start.
if was_training:
method.set_training()
return test_results
# return test_results
# if not self.task_labels_at_test_time:
# # TODO: move this wrapper to common/wrappers.
# test_env = RemoveTaskLabelsWrapper(test_env)
@abstractmethod
def train_dataloader(
self, *args, **kwargs
) -> Environment["IncrementalAssumption.Observations", Actions, Rewards]:
"""Returns the DataLoader/Environment for the current train task."""
return super().train_dataloader(*args, **kwargs)
@abstractmethod
def val_dataloader(
self, *args, **kwargs
) -> Environment["IncrementalAssumption.Observations", Actions, Rewards]:
"""Returns the DataLoader/Environment used for validation on the
current task.
"""
return super().val_dataloader(*args, **kwargs)
@abstractmethod
def test_dataloader(
self, *args, **kwargs
) -> Environment["IncrementalAssumption.Observations", Actions, Rewards]:
"""Returns the Test Environment (for all the tasks)."""
return super().test_dataloader(*args, **kwargs)
def _get_objective_scaling_factor(self) -> float:
return 1.0
================================================
FILE: sequoia/settings/assumptions/incremental_results.py
================================================
""" Results of an Incremental setting. """
import json
import warnings
from dataclasses import dataclass
from io import StringIO
from typing import ClassVar, Dict, Generic, List, Optional, Union
import matplotlib.pyplot as plt
import numpy as np
from gym.utils import colorize
from simple_parsing.helpers import list_field
from simple_parsing.helpers.serialization import encode
import wandb
from sequoia.common.metrics import Metrics
from sequoia.settings.base.results import Results
from .discrete_results import TaskSequenceResults
from .iid_results import MetricType, TaskResults
@dataclass
class IncrementalResults(Results, Generic[MetricType]):
"""Results for a whole train loop (transfer matrix).
This class is basically just a 2d list of TaskResults objects, with some convenience
methods and properties.
We get one TaskSequenceResults (a 1d list of TaskResults objects) as a result of
every test loop, which, in the Incremental Settings, happens after training on each
task, hence why we get a nb_tasks x nb_tasks matrix of results.
"""
task_sequence_results: List[TaskSequenceResults[MetricType]] = list_field()
min_runtime_hours: ClassVar[float] = 0.0
max_runtime_hours: ClassVar[float] = 12.0
def __post_init__(self):
self._runtime: Optional[float] = None
self._online_training_performance: Optional[List[Dict[int, Metrics]]] = None
# Factor used to scale the 'objective' to a 'score' between 0 and 1.
self._objective_scaling_factor: float = 1.0
@property
def runtime_minutes(self) -> Optional[float]:
return self._runtime / 60 if self._runtime is not None else None
@property
def runtime_hours(self) -> Optional[float]:
return self._runtime / 3600 if self._runtime is not None else None
@property
def transfer_matrix(self) -> List[List[TaskResults]]:
return [
task_sequence_result.task_results for task_sequence_result in self.task_sequence_results
]
@property
def metrics_matrix(self) -> List[List[MetricType]]:
"""Returns the 'transfer matrix' but with the average metrics for each task
in each cell.
NOTE: This is different from `transfer_matrix` since it returns the matrix of
`TaskResults` objects (which are themselves lists of Metrics)
Returns
-------
List[List[MetricType]]
2d grid of average metrics for each task.
"""
return [
[task_results.average_metrics for task_results in task_sequence_result]
for task_sequence_result in self
]
@property
def objective_matrix(self) -> List[List[float]]:
"""Return transfer matrix containing the value of the 'objective' for each task.
The value at the index (i, j) gives the test performance on task j after having
learned tasks 0-i.
Returns
-------
List[List[float]]
The 2d matrix of objectives (floats).
"""
return [
[task_result.objective for task_result in task_sequence_result]
for task_sequence_result in self.transfer_matrix
]
@property
def cl_score(self) -> float:
"""CL Score, as a weigted sum of three objectives:
- The average final performance over all tasks
- The average 'online' performance over all tasks
- Runtime
TODO: @optimass Determine the weights for each factor.
Returns
-------
float
[description]
"""
# TODO: Determine the function to use to get a runtime score between 0 and 1.
score = (
+0.30 * self._online_performance_score()
+ 0.40 * self._final_performance_score()
+ 0.30 * self._runtime_score()
)
return score
def _runtime_score(self) -> float:
# TODO: function that takes the total runtime in seconds and returns a
# normalized float score between 0 and 1.
runtime_seconds = self._runtime
if self._runtime is None:
warnings.warn(
RuntimeWarning(
colorize(
"Runtime is None! Returning runtime score of 0.\n (Make sure the "
"Setting had its `monitor_training_performance` attr set to True!",
color="red",
)
)
)
return 0
runtime_hours = runtime_seconds / 3600
# Get the maximum runtime for this type of Results (and Setting)
min_runtime_hours = type(self).min_runtime_hours
max_runtime_hours = type(self).max_runtime_hours
assert 0 <= min_runtime_hours < max_runtime_hours
assert 0 < runtime_hours
if runtime_hours <= min_runtime_hours:
return 1.0
if max_runtime_hours <= runtime_hours:
return 0.0
return 1 - ((runtime_hours - min_runtime_hours) / (max_runtime_hours - min_runtime_hours))
def _online_performance_score(self) -> float:
"""Function that takes the 'objective' of the Metrics from the average online
performance, and returns a normalized float score between 0 and 1.
"""
objectives: List[float] = [
task_online_metric.objective for task_online_metric in self.online_performance_metrics
]
return self._objective_scaling_factor * np.mean(objectives)
# return self._objective_scaling_factor * self.average_online_performance.objective
def _final_performance_score(self) -> float:
"""Function that takes the 'objective' of the Metrics from the average
final performance, and returns a normalized float score between 0 and 1.
"""
objectives: List[float] = [
task_metric.objective for task_metric in self.final_performance_metrics
]
return self._objective_scaling_factor * np.mean(objectives)
# return self._objective_scaling_factor * self.average_final_performance.objective
@property
def objective(self) -> float:
# return self.cl_score
return self.average_final_performance.objective
@property
def num_tasks(self) -> int:
return len(self.task_sequence_results)
@property
def online_performance(self) -> List[Dict[int, MetricType]]:
"""Returns the online training performance for each task. i.e. the diagonal of
the transfer matrix.
In SL, this is only recorded over the first epoch.
Returns
-------
List[Dict[int, MetricType]]
A List containing, for each task, a dictionary mapping from step number to
the Metrics object produced at that step.
"""
if not self._online_training_performance:
return [{} for _ in range(self.num_tasks)]
return self._online_training_performance
# return [self[i][i] for i in range(self.num_tasks)]
@property
def online_performance_metrics(self) -> List[MetricType]:
return [
sum(online_performance_dict.values(), Metrics())
for online_performance_dict in self.online_performance
]
@property
def final_performance(self) -> List[TaskResults[MetricType]]:
return self.transfer_matrix[-1]
@property
def final_performance_metrics(self) -> List[MetricType]:
return [task_result.average_metrics for task_result in self.final_performance]
@property
def average_online_performance(self) -> MetricType:
return sum(self.online_performance_metrics, Metrics())
@property
def average_final_performance(self) -> MetricType:
return sum(self.final_performance_metrics, Metrics())
def to_log_dict(self, verbose: bool = False) -> Dict:
log_dict = {}
# TODO: This assumes that the metrics were stored in the right index for their
# corresponding task.
for task_id, task_sequence_result in enumerate(self.task_sequence_results):
log_dict[f"Task {task_id}"] = task_sequence_result.to_log_dict(verbose=verbose)
if self._online_training_performance:
log_dict["Online Performance"] = {
f"Task {task_id}": task_online_metrics.to_log_dict(verbose=verbose)
for task_id, task_online_metrics in enumerate(self.online_performance_metrics)
}
log_dict.update(
{
"Final/Average Online Performance": self._online_performance_score(),
"Final/Average Final Performance": self._final_performance_score(),
"Final/Runtime (seconds)": self._runtime,
"Final/CL Score": self.cl_score,
}
)
return log_dict
def summary(self, verbose: bool = False):
s = StringIO()
log_dict = self.to_log_dict(verbose=verbose)
log_dict_json = json.dumps(log_dict, indent="\t", default=encode)
print(log_dict_json, file=s)
s.seek(0)
return s.read()
def make_plots(self) -> Dict[str, Union[plt.Figure, Dict]]:
plots = {
f"Task {task_id}": task_sequence_result.make_plots()
for task_id, task_sequence_result in enumerate(self.task_sequence_results)
}
axis_labels = [f"Task {task_id}" for task_id in range(self.num_tasks)]
if wandb.run:
plots["Transfer matrix"] = wandb.plots.HeatMap(
x_labels=axis_labels,
y_labels=axis_labels,
matrix_values=self.objective_matrix,
show_text=True,
)
objective_array = np.asfarray(self.objective_matrix)
perf_per_step = objective_array.mean(-1)
table = wandb.Table(
data=[[i + 1, perf] for i, perf in enumerate(perf_per_step)],
columns=["# of learned tasks", "Average Test performance on all tasks"],
)
plots["Test Performance"] = wandb.plot.line(
table,
x="# of learned tasks",
y="Average Test performance on all tasks",
title="Test Performance vs # of Learned tasks",
)
return plots
def __str__(self) -> str:
return self.summary()
================================================
FILE: sequoia/settings/assumptions/incremental_test.py
================================================
from typing import List, Optional
import gym
import numpy as np
from gym import Space
from gym.vector.utils.spaces import batch_space
from sequoia.methods import Method
from sequoia.settings import Actions, Environment, Observations
from .incremental import IncrementalAssumption, TestEnvironment
class DummyMethod(Method, target_setting=IncrementalAssumption):
"""Dummy method used to check that the Setting calls `on_task_switch` with the
right arguments.
"""
def __init__(self):
self.n_task_switches = 0
self.n_fit_calls = 0
self.received_task_ids: List[Optional[int]] = []
self.received_while_training: List[bool] = []
self.train_steps_per_task: List[int] = []
self.train_episodes_per_task: List[int] = []
def fit(self, train_env: gym.Env = None, valid_env: gym.Env = None):
self.n_fit_calls += 1
self.train_steps_per_task.append(0)
self.train_episodes_per_task.append(0)
obs = train_env.reset()
for i in range(100):
obs, reward, done, info = train_env.step(train_env.action_space.sample())
self.train_steps_per_task[-1] += 1
if done:
self.train_episodes_per_task[-1] += 1
break
def test(self, test_env: TestEnvironment):
while not test_env.is_closed():
done = False
obs = test_env.reset()
while not done:
actions = test_env.action_space.sample()
obs, _, done, info = test_env.step(actions)
def get_actions(
self, observations: IncrementalAssumption.Observations, action_space: gym.Space
):
return np.ones(action_space.shape)
def on_task_switch(self, task_id: int = None):
self.n_task_switches += 1
self.received_task_ids.append(task_id)
self.received_while_training.append(self.training)
class OtherDummyMethod(Method, target_setting=IncrementalAssumption):
def __init__(self):
self.batch_sizes: List[int] = []
def fit(self, train_env: Environment, valid_env: Environment):
for i, batch in enumerate(train_env):
if isinstance(batch, Observations):
observations, rewards = batch, None
else:
assert isinstance(batch, tuple) and len(batch) == 2
observations, rewards = batch
y_preds = train_env.action_space.sample()
if rewards is None:
action_space = train_env.action_space
if train_env.action_space.shape:
# This is a bit complicated, but it's needed because the last batch
# might have a different batch dimension than the env's action
# space, (only happens on the last batch in supervised learning).
# TODO: Should we perhaps drop the last batch?
action_space = train_env.action_space
batch_size = getattr(train_env, "num_envs", getattr(train_env, "batch_size", 0))
env_is_batched = batch_size is not None and batch_size >= 1
if env_is_batched:
# NOTE: Need to pass an action space that actually reflects the batch
# size, even for the last batch!
obs_batch_size = observations.x.shape[0] if observations.x.shape else None
action_space_batch_size = (
train_env.action_space.shape[0]
if train_env.action_space.shape
else None
)
if obs_batch_size is not None and obs_batch_size != action_space_batch_size:
action_space = batch_space(
train_env.single_action_space, obs_batch_size
)
y_preds = action_space.sample()
rewards = train_env.send(Actions(y_pred=y_preds))
def get_actions(self, observations: Observations, action_space: Space) -> Actions:
# This won't work on weirder spaces.
if action_space.shape:
assert observations.x.shape[0] == action_space.shape[0]
if getattr(observations.x, "shape", None):
batch_size = 1
if observations.x.ndim > 1:
batch_size = observations.x.shape[0]
self.batch_sizes.append(batch_size)
else:
self.batch_sizes.append(0) # X isn't batched.
return action_space.sample()
================================================
FILE: sequoia/settings/assumptions/task_incremental.py
================================================
from dataclasses import dataclass
from sequoia.utils.utils import constant
from .context_visibility import FullyObservableContextAssumption
from .incremental import IncrementalAssumption
@dataclass
class TaskIncrementalAssumption(FullyObservableContextAssumption, IncrementalAssumption):
"""Assumption (mixin) for Settings where the task labels are available at
both train and test time.
"""
task_labels_at_train_time: bool = constant(True)
task_labels_at_test_time: bool = constant(True)
================================================
FILE: sequoia/settings/assumptions/task_type.py
================================================
from dataclasses import dataclass
from typing import Union
from torch import LongTensor, Tensor
from sequoia.settings.base import Actions
@dataclass(frozen=True)
class ClassificationActions(Actions):
"""Typed dict-like class that represents the 'forward pass'/output of a
classification head, which correspond to the 'actions' to be sent to the
environment, in the general formulation.
"""
y_pred: Union[LongTensor, Tensor]
logits: Tensor
@property
def action(self) -> LongTensor:
return self.y_pred
@property
def y_pred_log_prob(self) -> Tensor:
"""returns the log probabilities for the chosen actions/predictions."""
return self.logits[:, self.y_pred]
@property
def y_pred_prob(self) -> Tensor:
"""returns the log probabilities for the chosen actions/predictions."""
return self.probabilities[self.y_pred]
@property
def probabilities(self) -> Tensor:
"""Returns the normalized probabilies for each class, i.e. the
softmax-ed version of `self.logits`.
"""
return self.logits.softmax(-1)
================================================
FILE: sequoia/settings/base/__init__.py
================================================
from .bases import Method, SettingABC
from .environment import Environment
from .objects import Actions, ActionType, Observations, ObservationType, Rewards, RewardType
from .results import Results
from .setting import Setting, SettingType
================================================
FILE: sequoia/settings/base/base.puml
================================================
@startuml base
!include gym.puml
remove gym.spaces
remove Wrapper
hide empty members
package sequoia as settings.base {
' namespace base.objects {
together {
together {
abstract class Observations extends Batch {
+ x: Tensor
}
abstract class Actions extends Batch {
+ y_pred: Tensor
}
abstract class Rewards extends Batch {
+ y: Tensor
}
}
Environment --* Observations: yields
Environment --* Actions: receives
Environment --* Rewards: returns
interface Environment extends gym.Env, torch.DataLoader {
+ observation_space: Space
+ action_space: Space
+ reward_space: Space
+ step(Actions actions) -> Tuple[Observations, Rewards, bool, Dict]
+ reset() -> Observations
}
abstract class Results {
+ objective: float
}
interface SettingABC {
-- static (class) attributes --
+ {static} Results: Type[Results]
+ {static} Observations: Type[Observations]
+ {static} Actions: Type[Actions]
+ {static} Rewards: Type[Rewards]
--
{abstract} + apply(Method): Results
}
' TODO: Here we just show the most basic interface.
abstract class Setting extends SettingABC, pytorch_lightning.LightningDataModule {
-- static (class) attributes --
+ {static} Results: Type[Results]
+ {static} Observations: Type[Observations]
+ {static} Actions: Type[Actions]
+ {static} Rewards: Type[Rewards]
' TODO: should we move this to `Setting` rather than SettingABC?
-- inherited from LightningDataModule --
{abstract} + prepare_data()
{abstract} + setup()
{abstract} + train_dataloader() -> Environment
{abstract} + val_dataloader() -> Environment
{abstract} + test_dataloader() -> Environment
== Abstract Method ==
{abstract} + apply(Method) -> Results
}
' NOTE: Choose either of the following code blocks:
' -------------
remove Setting
remove pytorch_lightning
SettingABC -.left-> Environment : creates
SettingABC -.-> Results : produces
SettingABC -.-> Method : applies
SettingABC <-.- Method : targets
' ----- OR -----
' remove SettingABC
' Setting -.left-> Environment : creates
' Setting -.-> Results : produces
' Setting -.-> Method : applies
' Setting <-.- Method : targets
' -------------
}
Method <-.-> Environment : interacts with
abstract class Method {
.. abstract static attributes ..
{static} {abstract} target_setting: Type[S]
.. abstract (required) methods ..
{abstract} + fit(train_env: Environment, valid_env: Environment)
{abstract} + get_actions(observations: Observations, action_space: Space)
.. optional methods ..
+ configure(setting: S)
+ on_task_switch(task_id: Optional[int])
+ test(test_env: Environment)
' - is_applicable(setting: SettingABC): bool
}
abstract class Model {
+ forward(input: Observations) -> Actions
}
Method -.- Model : ( can use )
}
remove Batch
@enduml
================================================
FILE: sequoia/settings/base/bases.py
================================================
""" This module defines the base classes for Settings and Methods.
"""
import json
import traceback
import typing
from abc import ABC, abstractmethod
from functools import partial
from io import StringIO
from pathlib import Path
from typing import (
Any,
ClassVar,
Dict,
Generic,
Iterable,
List,
Mapping,
Optional,
Set,
Tuple,
Type,
TypeVar,
Union,
)
import gym
from gym.utils import colorize
from pytorch_lightning import LightningDataModule
from wandb.wandb_run import Run
import wandb
if typing.TYPE_CHECKING:
from sequoia.common.config.config import Config
from sequoia.settings.base.environment import Environment
from sequoia.settings.base.objects import Actions, Observations, Rewards
from sequoia.settings.base.results import Results
from sequoia.utils.logging_utils import get_logger
from sequoia.utils.parseable import Parseable
from sequoia.utils.utils import (
camel_case,
compute_identity,
flatten_dict,
get_path_to_source_file,
remove_suffix,
)
logger = get_logger(__name__)
class SettingABC:
"""Abstract base class for a Setting.
This just shows the minimal API. For more info, see the `Setting` class,
which is the concrete implementation of this class, and the 'root' of the
tree.
Abstract (required) methods:
- **apply** Applies a given Method on this setting to produce Results.
"Abstract"-ish (required) class attributes:
- `Results`: The class of Results that are created when applying a Method on
this setting.
- `Observations`: The type of Observations that will be produced in this
setting.
- `Actions`: The type of Actions that are expected from this setting.
- `Rewards`: The type of Rewards that this setting will (potentially) return
upon receiving an action from the method.
"""
Results: ClassVar[Type[Results]] = Results
Observations: ClassVar[Type[Observations]] = Observations
Actions: ClassVar[Type[Actions]] = Actions
Rewards: ClassVar[Type[Rewards]] = Rewards
@abstractmethod
def apply(self, method: "Method", config: "Config" = None) -> "SettingABC.Results":
"""Applies a Method on this experimental Setting to produce Results.
Defines the training/evaluation procedure specific to this Setting.
The training/evaluation loop can be defined however you want, as long as
it respects the following constraints:
1. This method should always return either a float or a Results object
that indicates the "performance" of this method on this setting.
2. More importantly: You **have** to make sure that you do not break
compatibility with more general methods targetting a parent setting!
It should always be the case that all methods designed for any of
this Setting's parents should also be applicable via polymorphism,
i.e., anything that is defined to work on the class `Animal` should
also work on the class `Cat`!
3. While not enforced, it is strongly encourged that you define your
training/evaluation routines at a pretty high level, so that Methods
that get applied to your Setting can make use of pytorch-lightning's
`Trainer` & `LightningDataModule` API to be neat and fast.
Parameters
----------
method : Method
A Method to apply on this Setting.
config : Optional[Config]
Optional configuration object with things like the log dir, the data
dir, cuda, wandb config, etc. When None, will be parsed from the
current command-line arguments.
Returns
-------
Results
An object that is used to measure or quantify the performance of the
Method on this experimental Setting.
"""
raise NotImplementedError()
@abstractmethod
def prepare_data(self, *args, **kwargs):
pass
@abstractmethod
def setup(self, stage: Optional[str] = None):
pass
@abstractmethod
def train_dataloader(self, *args, **kwargs) -> Environment[Observations, Actions, Rewards]:
pass
@abstractmethod
def val_dataloader(self, *args, **kwargs) -> Environment[Observations, Actions, Rewards]:
pass
@abstractmethod
def test_dataloader(self, *args, **kwargs) -> Environment[Observations, Actions, Rewards]:
pass
@classmethod
@abstractmethod
def get_available_datasets(cls) -> Iterable[str]:
"""Returns an iterable of the names of available datasets."""
# --- Below this are some class attributes and methods related to the Tree. ---
# These are some "private" class attributes.
# For any new Setting subclass, it's parent setting.
_parent: ClassVar[Type["SettingABC"]] = None
# A list of all the direct children of this setting.
_children: ClassVar[Set[Type["SettingABC"]]] = set()
# List of all methods that directly target this Setting.
_targeted_methods: ClassVar[Set[Type["Method"]]] = set()
def __init_subclass__(cls, **kwargs):
"""Called whenever a new subclass of `Setting` is declared."""
# logger.debug(f"Registering a new setting: {cls.get_name()}")
# Exceptionally, create this new empty list that will hold all the
# forthcoming subclasses of this particular new setting.
cls._children = set()
cls._targeted_methods = set()
# Inform the immediate parents in the tree that they have a new child.
for immediate_parent in cls.get_immediate_parents():
immediate_parent._children.add(cls)
super().__init_subclass__(**kwargs)
@classmethod
def get_applicable_methods(cls) -> List[Type["Method"]]:
"""Returns all the Methods applicable on this Setting."""
applicable_methods: List[Method] = []
from sequoia.methods import get_all_methods
for method_type in get_all_methods():
if method_type.is_applicable(cls):
applicable_methods.append(method_type)
return applicable_methods
@classmethod
def register_method(cls, method: Type["Method"]):
"""Register a method as being Applicable on this type of Setting."""
cls._targeted_methods.add(method)
@classmethod
def get_name(cls) -> str:
"""Gets the name of this Setting."""
# LightningDataModule has a `name` class attribute of `...`!
if getattr(cls, "name", None) != Ellipsis:
return cls.name
name = camel_case(cls.__qualname__)
return remove_suffix(name, "_setting")
@classmethod
def immediate_children(cls) -> Iterable[Type["SettingABC"]]:
"""Returns the immediate children of this Setting in the hierarchy.
In most cases, this will be a list with only one value.
"""
yield from cls._children
@classmethod
def get_immediate_children(cls) -> List[Type["SettingABC"]]:
"""Returns a list of the immediate children of this Setting."""
return list(cls.immediate_children())
@classmethod
def children(cls) -> Iterable[Type["SettingABC"]]:
"""Returns an Iterator over all the children of this Setting, in-order."""
# Yield the immediate children.
for child in cls._children:
yield child
# Yield from the children themselves.
yield from child.children()
@classmethod
def get_children(cls) -> List[Type["SettingABC"]]:
return list(cls.children())
@classmethod
def immediate_parents(cls) -> List[Type["SettingABC"]]:
"""Returns the immediate parent(s) Setting(s).
In most cases, this will be a list with only one value.
"""
return [parent for parent in cls.__bases__ if issubclass(parent, SettingABC)]
@classmethod
def get_immediate_parents(cls) -> List[Type["SettingABC"]]:
"""Returns the immediate parent(s) Setting(s).
In most cases, this will be a list with only one value.
"""
return cls.immediate_parents()
@classmethod
def parents(cls) -> Iterable[Type["SettingABC"]]:
"""yields the lineage, from bottom to top.
NOTE: In the case of Settings having multiple parents (such as TraditionalSLSetting),
this is still just a list that reflects the method resolution order for that
setting.
"""
return [
parent_class for parent_class in cls.mro()[1:] if issubclass(parent_class, SettingABC)
]
@classmethod
def get_parents(cls) -> List[Type["SettingABC"]]:
return list(cls.parents())
@classmethod
def get_path_to_source_file(cls: Type) -> Path:
from sequoia.utils.utils import get_path_to_source_file
return get_path_to_source_file(cls)
@classmethod
def get_tree_string(
cls,
formatting: str = "command_line",
with_methods: bool = False,
with_assumptions: bool = False,
with_docstrings: bool = False,
) -> str:
"""Returns a string representation of the tree starting at this node downwards."""
from sequoia.utils.readme import get_tree_string, get_tree_string_markdown
formatting_functions = {
"command_line": get_tree_string,
"markdown": get_tree_string_markdown,
}
if formatting not in formatting_functions.keys():
raise RuntimeError(
f"formatting must be one of {','.join(formatting_functions)}, " f"got {formatting}"
)
return formatting_functions[formatting](
cls,
with_methods=with_methods,
with_assumptions=with_assumptions,
with_docstrings=with_docstrings,
)
SettingType = TypeVar("SettingType", bound=SettingABC)
class Method(Generic[SettingType], Parseable, ABC):
"""ABC for a Method, which is a solution to a research problem (a Setting)."""
# Class attribute that holds the setting this method was designed to target.
# Needs to either be passed to the class statement or set as a class
# attribute.
target_setting: ClassVar[Type[SettingType]] = None
_training: bool
def configure(self, setting: SettingType) -> None:
"""Configures this method before it gets applied on the given Setting.
Args:
setting (SettingType): The setting the method will be evaluated on.
"""
@abstractmethod
def get_actions(
self, observations: Observations, action_space: gym.Space
) -> Union[Actions, Any]:
"""Get a batch of predictions (actions) for the given observations.
returned actions must fit the action space.
"""
@abstractmethod
def fit(
self,
train_env: Environment[Observations, Actions, Rewards],
valid_env: Environment[Observations, Actions, Rewards],
):
"""Called by the Setting to give the method data to train with.
Might be called more than once before training is 'complete'.
"""
def test(self, test_env: Environment[Observations, Actions, Optional[Rewards]]):
"""(WIP) Optional method which could be called by the setting to give
your Method more flexibility about how it wants to arrange the test env.
Parameters
----------
test_env : Environment[Observations, Actions, Optional[Rewards]]
Test environment which monitors your actions, and in which you are
only allowed a limited number of steps.
"""
import tqdm
pbar = tqdm.tqdm(desc="Testing")
postfix = {}
steps = 0
episodes = 0
while not test_env.is_closed():
observations = test_env.reset()
done = False
episode_steps = 0
while not (done or test_env.is_closed()):
actions = self.get_actions(observations, action_space=test_env.action_space)
observations, rewards, done, info = test_env.step(actions)
steps += 1
episode_steps += 1
postfix.update(steps=steps, episode_steps=episode_steps)
pbar.set_postfix(postfix)
pbar.update()
episodes += 1
postfix.update(episodes=episodes)
pbar.close()
def receive_results(self, setting: SettingType, results: Results) -> None:
"""Receive the Results of applying this method on the given Setting.
This method is optional.
This will be called after the method has been successfully applied to
a Setting, and could be used to log or persist the results somehow.
Parameters
----------
results : Results
The `Results` object constructed by `setting`, as a result of applying
this Method to it.
"""
run_name = ""
# Set the default name for this run.
# run_name = f"{method_name}-{setting_name}"
# dataset = getattr(self, "dataset", None)
# if isinstance(dataset, str):
# run_name += f"-{dataset}"
# if getattr(self, "nb_tasks", 0) > 1:
# run_name += f"_{self.nb_tasks}t"
setting_name = setting.get_name()
method_name = self.get_name()
base_results_dir: Path = setting.config.log_dir / setting_name / method_name
dataset_name = getattr(setting, "dataset", None)
if isinstance(dataset_name, str):
base_results_dir /= dataset_name
if wandb.run and wandb.run.id:
# if setting.wandb and setting.wandb.project:
run_id = wandb.run.id
assert isinstance(run_id, str)
# results_dir = base_results_dir / run_id
# TODO: Fix this:
results_dir = wandb.run.dir
else:
for suffix in [f"run_{i}" for i in range(100)]:
results_dir = base_results_dir / suffix
try:
results_dir.mkdir(exist_ok=False, parents=True)
except FileExistsError:
pass
else:
break
else:
raise RuntimeError(
f"Unable to create a unique results dir under {base_results_dir} "
)
results_dir = Path(results_dir)
logger.info(f"Saving results in directory {results_dir}")
results_json_path = results_dir / "results.json"
try:
with open(results_json_path, "w") as f:
json.dump(results.to_log_dict(), f)
except Exception as e:
print(f"Unable to save the results: {e}")
setting_path = results_dir / "setting.yaml"
try:
setting.save(setting_path)
except Exception as e:
print(f"Unable to save the Setting: {e}")
method_path = results_dir / "method.yaml"
try:
self.save(method_path)
except Exception as e:
print(f"Unable to save the Method: {e}")
if wandb.run:
wandb.save(str(results_json_path))
if setting_path.exists():
wandb.save(str(setting_path))
if method_path.exists():
wandb.save(str(method_path))
def setup_wandb(self, run: Run) -> None:
"""Called by the Setting when using Weights & Biases, after `wandb.init`.
This method is here to provide Methods with the opportunity to log some of their
configuration options or hyper-parameters to wandb.
NOTE: The Setting has already set the `"setting"` entry in the `wandb.config` by
this point.
Parameters
----------
run : wandb.Run
Current wandb Run.
"""
def set_training(self) -> None:
"""Called by the Setting to let the Method know it is in the "training" phase.
By default, this will try to to look for any nn.Module attributes on `self`, and
call their `train()` method.
"""
self._training = True
try:
from torch import nn
for attribute, value in vars(self).items():
if isinstance(value, nn.Module):
logger.debug(f"Calling 'train()' on the Method's {attribute} attribute.")
value.train()
except Exception as exc:
logger.warning(f"Unable to call `train()` on nn.Modules of the Method: {exc}")
def set_testing(self) -> None:
"""Called by the Setting to let the Method know when it is in "testing" phase.
By default, this will try to to look for any nn.Module attributes on `self`, and
call their `eval()` method.
"""
self._training = False
try:
from torch import nn
for attribute, value in vars(self).items():
if isinstance(value, nn.Module):
logger.debug(f"Calling 'eval()' on the Method's {attribute} attribute.")
value.eval()
except Exception as exc:
logger.warning(f"Unable to call `eval()` on nn.Modules of the Method: {exc}")
@property
def training(self) -> bool:
"""Wether we're currently in the 'training' phase.
Returns
-------
bool
Wether we're in the 'training' phase or not.
"""
return getattr(self, "_training", True)
@property
def testing(self) -> bool:
"""Wether we're currently in the 'testing' phase.
Returns
-------
bool
Wether we're in the 'testing' phase or not.
"""
return not self.training
# --------
# Below this are some class attributes and methods related to the Tree
# structure and for launching Experiments using this method.
# --------
@classmethod
def main(cls, argv: Optional[Union[str, List[str]]] = None) -> Results:
"""Run an Experiment from the command-line using this method.
(TODO: @lebrice Finish writing a good docstring here that explains how this works
and how to use it.)
You can then select which setting, dataset, etc. this method will be
applied to using the --setting , and the rest of the
arguments will be passed to the Setting's from_args method.
"""
from sequoia.main import Experiment
experiment: Experiment
# Create the Method object from the command-line:
method = cls.from_args(argv, strict=False)
# Then create the 'Experiment' from the command-line, which makes it
# possible to choose between all the settings.
experiment = Experiment.from_args(argv, strict=False)
# Set the method attribute to be the one parsed above.
experiment.method = method
results: Results = experiment.launch(argv)
return results
@classmethod
def is_applicable(cls, setting: Union[SettingType, Type[SettingType]]) -> bool:
"""Returns wether this Method is applicable to the given setting.
A method is applicable on a given setting if and only if the setting is
the method's target setting, or if it is a descendant of the method's
target setting (below the target setting in the tree).
Concretely, since the tree is implemented as an inheritance hierarchy,
a method is applicable to any setting which is an instance (or subclass)
of its target setting.
Args:
setting (SettingABC): a Setting.
Returns:
bool: Wether or not this method is applicable on the given setting.
"""
# if given an object, get it's type.
if isinstance(setting, LightningDataModule):
setting = type(setting)
if not issubclass(setting, SettingABC) and issubclass(setting, LightningDataModule):
# TODO: If we're trying to check if this method would be compatible
# with a LightningDataModule, rather than a Setting, then we treat
# that LightningModule the same way we would an TraditionalSLSetting.
# i.e., if we're trying to apply a Method on something that isn't in
# the tree, then we consider that datamodule as the TraditionalSLSetting node.
from sequoia.settings import TraditionalSLSetting
setting = TraditionalSLSetting
return issubclass(setting, cls.target_setting)
@classmethod
def get_applicable_settings(cls) -> List[Type[SettingType]]:
"""Returns all settings on which this method is applicable.
NOTE: This only returns 'concrete' Settings.
"""
from sequoia.settings import all_settings
return list(filter(cls.is_applicable, all_settings))
# This would return ALL the setting:
# return list([cls.target_setting, *cls.target_setting.children()])
@classmethod
def all_evaluation_settings(cls, **kwargs) -> Iterable[SettingType]:
"""Generator over all the combinations of Settings/datasets on which
this method is applicable.
If keyword arguments are passed, they will be passed to the constructor
of each setting.
"""
for setting_type in cls.get_applicable_settings():
for dataset in setting_type.get_available_datasets():
setting = setting_type(dataset=dataset, **kwargs)
yield setting
@classmethod
def get_name(cls) -> str:
"""Gets the name of this method class."""
name = getattr(cls, "name", None)
if name is None:
name = camel_case(cls.__qualname__)
name = remove_suffix(name, "_method")
return name
@classmethod
def get_family(cls) -> Optional[str]:
"""Gets the name of the 'family' of Methods which contains this method class.
This is used to differentiate methods with the same name, for instance
sb3/DQN versus pl_bolts/DQN, sequoia/EWC vs avalanche/EWC, etc.
"""
return getattr(cls, "family", None)
@classmethod
def get_full_name(cls) -> str:
"""Gets the 'full name' of a method, which is the "{family}.{name}" if the
family is set, and just the name otherwise.
The full name is used as the option on the command-line.
"""
name = cls.get_name()
family = cls.get_family()
return f"{family}.{name}" if family is not None else name
def __init_subclass__(cls, target_setting: Type[SettingType] = None, **kwargs) -> None:
"""Called when creating a new subclass of Method.
Args:
target_setting (Type[Setting], optional): The target setting.
Defaults to None, in which case the method will inherit the
target setting of it's parent class.
"""
if target_setting:
cls.target_setting = target_setting
elif getattr(cls, "target_setting", None):
target_setting = cls.target_setting
else:
raise RuntimeError(
f"You must either pass a `target_setting` argument to the "
f"class statement or have a `target_setting` class variable "
f"when creating a new subclass of {__class__}."
)
# Register this new method on the Setting.
target_setting.register_method(cls)
return super().__init_subclass__(**kwargs)
@classmethod
def get_path_to_source_file(cls) -> Path:
return get_path_to_source_file(cls)
def get_experiment_name(self, setting: SettingABC, experiment_id: str = None) -> str:
"""Gets a unique name for the experiment where `self` is applied to `setting`.
This experiment name will be passed to `orion` when performing a run of
Hyper-Parameter Optimization.
Parameters
----------
- setting : Setting
The `Setting` onto which this method will be applied. This method will be used when
- experiment_id: str, optional
A custom hash to append to the experiment name. When `None` (default), a
unique hash will be created based on the values of the Setting's fields.
Returns
-------
str
The name for the experiment.
"""
if not experiment_id:
setting_dict = setting.to_dict()
# BUG: Some settings have non-string keys/value or something?
d = flatten_dict(setting_dict)
experiment_id = compute_identity(size=5, **d)
assert isinstance(setting.dataset, str), "assuming that dataset is a str for now."
return f"{self.get_name()}-{setting.get_name()}_{setting.dataset}_{experiment_id}"
def get_search_space(self, setting: SettingABC) -> Mapping[str, Union[str, Dict]]:
"""Returns the search space to use for HPO in the given Setting.
Parameters
----------
setting : Setting
The Setting on which the run of HPO will take place.
Returns
-------
Mapping[str, Union[str, Dict]]
An orion-formatted search space dictionary, mapping from hyper-parameter
names (str) to their priors (str), or to nested dicts of the same form.
"""
raise NotImplementedError(
"You need to provide an implementation for the `get_search_space` method "
"in order to enable HPO sweeps."
)
def adapt_to_new_hparams(self, new_hparams: Dict[str, Any]) -> None:
"""Adapts the Method when it receives new Hyper-Parameters to try for a new run.
It is required that this method be implemented if you want to perform HPO sweeps
with Orion.
NOTE: It is very strongly recommended that you always re-create your model and
any modules / components that depend on these hyper-parameters inside the
`configure` method! (Otherwise these new hyper-parameters will not be used in
the next run)
Parameters
----------
new_hparams : Dict[str, Any]
The new hyper-parameters being recommended by the HPO algorithm. These will
have the same structure as the search space.
"""
raise NotImplementedError(
"You need to provide an implementation for the `adapt_to_new_hparams` "
"method in order to enable HPO sweeps."
)
def hparam_sweep(
self,
setting: SettingABC,
search_space: Dict[str, Union[str, Dict]] = None,
experiment_id: str = None,
database_path: Union[str, Path] = None,
max_runs: int = None,
hpo_algorithm: Union[str, Dict] = "BayesianOptimizer",
debug: bool = False,
) -> Tuple[Dict, float]:
"""Performs a Hyper-Parameter Optimization sweep using orion.
Changes the values in `self.hparams` iteratively, returning the best hparams
found so far.
Parameters
----------
setting : Setting
Setting to run the sweep on.
search_space : Dict[str, Union[str, Dict]], optional
Search space of the hyper-parameter optimization algorithm. Defaults to
`None`, in which case the result of the `get_search_space` method is used.
experiment_id : str, optional
Unique Id to use when creating the experiment in Orion. Defaults to `None`,
in which case a hash of the `setting`'s fields is used.
database_path : Union[str, Path], optional
Path to a pickle file to be used by Orion to store the hyper-parameters and
their corresponding values. Default to `None`, in which case the database is
created at path `./orion_db.pkl`.
max_runs : int, optional
Maximum number of runs to perform. Defaults to `None`, in which case the run
lasts until the search space is exhausted.
hpo_algorithm : Union[str, Dict], optional
The hyper-parameter optimization algorithms to use.
debug : bool, optional
Wether to run Orion in debug-mode, where the database is an EphemeralDb,
meaning it gets created for the sweep and destroyed at the end of the sweep.
Returns
-------
Tuple[BaseModel.HParams, float]
Best HParams, and the corresponding performance.
"""
try:
from orion.client import build_experiment
from orion.core.worker.trial import Trial
except ImportError as e:
raise RuntimeError(
f"Need to install the optional dependencies for HPO, using "
f"`pip install -e .[hpo]` (error: {e})"
) from e
search_space = search_space or self.get_search_space(setting)
logger.info("HPO Search space:\n" + json.dumps(search_space, indent="\t"))
database_path: Path = Path(database_path or "./orion_db.pkl")
logger.info(f"Will use database at path '{database_path}'.")
experiment_name = self.get_experiment_name(setting, experiment_id=experiment_id)
experiment = build_experiment(
name=experiment_name,
space=search_space,
debug=debug,
algorithms=hpo_algorithm,
max_trials=max_runs,
storage={
"type": "legacy",
"database": {"type": "pickleddb", "host": str(database_path)},
},
)
previous_trials: List[Trial] = experiment.fetch_trials_by_status("completed")
# Since Orion works in a 'lower is better' fashion, so if the `objective` of the
# Results class for the given Setting have "higher is better", we negate the
# objectives when extracting them and again before submitting them to Orion.
lower_is_better = setting.Results.lower_is_better
sign = 1 if lower_is_better else -1
if previous_trials:
logger.info(
f"Using existing Experiment {experiment} which has "
f"{len(previous_trials)} existing trials."
)
else:
logger.info(f"Created new experiment with name {experiment_name}")
trials_performed = 0
failed_trials = 0
red = partial(colorize, color="red")
green = partial(colorize, color="green")
while not (experiment.is_done or failed_trials == 3):
# Get a new suggestion of hparams to try:
trial: Trial = experiment.suggest()
# ---------
# (Re)create the Model with the suggested Hparams values.
# ---------
new_hparams: Dict = trial.params
# Inner function, just used to make the code below a bit simpler.
# TODO: We should probably also change some values in the Config (e.g.
# log_dir, checkpoint_dir, etc) between runs.
logger.info("Suggested values for this run:\n" + json.dumps(new_hparams, indent="\t"))
self.adapt_to_new_hparams(new_hparams)
# ---------
# Evaluate the (adapted) method on the setting:
# ---------
try:
result: Results = setting.apply(self)
except Exception:
logger.error(red("Encountered an error, this trial will be dropped:"))
logger.error(red("-" * 60))
with StringIO() as s:
traceback.print_exc(file=s)
s.seek(0)
logger.error(red(s.read()))
logger.error(red("-" * 60))
failed_trials += 1
logger.error(red(f"({failed_trials} failed trials so far). "))
experiment.release(trial)
else:
# Report the results to Orion:
orion_result = dict(
name=result.objective_name,
type="objective",
value=sign * result.objective,
)
experiment.observe(trial, [orion_result])
trials_performed += 1
logger.info(
green(
f"Trial #{trials_performed}: {result.objective_name} = {result.objective}"
)
)
# Receive the results, maybe log to wandb, whatever you wanna do.
self.receive_results(setting, result)
logger.info(
"Experiment statistics: \n"
+ "\n".join(f"\t{key}: {value}" for key, value in experiment.stats.items())
)
logger.info(f"Number of previous trials: {len(previous_trials)}")
logger.info(f"Trials successfully completed by this worker: {trials_performed}")
logger.info(f"Failed Trials attempted by this worker: {failed_trials}")
if "best_trials_id" not in experiment.stats:
raise RuntimeError("Can't find the best trial, experiment might be broken!")
best_trial: Trial = experiment.get_trial(uid=experiment.stats["best_trials_id"])
best_hparams = best_trial.params
best_objective = best_trial.objective
return best_hparams, best_objective
================================================
FILE: sequoia/settings/base/environment.py
================================================
"""Defines the Abstract Base class for an "Environment".
NOTE (@lebrice): This 'Environment' abstraction isn't super useful at the moment
because there's only the `ActiveDataLoader` that fits this interface (since we
can't send anything to the usual DataLoader).
"""
from abc import ABC
from typing import Generic
import gym
from sequoia.utils.logging_utils import get_logger
from .objects import ActionType, ObservationType, RewardType
logger = get_logger(__name__)
from abc import abstractmethod
class Environment(
gym.Env,
Generic[ObservationType, ActionType, RewardType],
ABC,
):
"""ABC for a learning 'environment' in *both* Supervised and Reinforcement Learning.
Different settings can implement this interface however they want.
"""
reward_space: gym.Space
# @abstractmethod
def is_closed(self) -> bool:
"""Returns wether this environment is closed."""
if hasattr(self, "env") and hasattr(self.env, "is_closed"):
return self.env.is_closed()
raise NotImplementedError(self)
================================================
FILE: sequoia/settings/base/objects.py
================================================
from dataclasses import dataclass
from typing import Generic, TypeVar
import numpy as np
from torch import Tensor
from sequoia.common import Batch
@dataclass(frozen=True)
class Observations(Batch):
"""A batch of "observations" coming from an Environment."""
x: Tensor
@property
def state(self) -> Tensor:
return self.x
def __len__(self) -> int:
return self.batch_size
@dataclass(frozen=True)
class Actions(Batch):
"""A batch of "actions" coming from an Environment.
For example, in a supervised setting, this would be the predicted labels,
while in an RL setting, this would be the next 'actions' to take in the
Environment.
"""
y_pred: Tensor
@property
def actions(self) -> Tensor:
return self.y_pred
@property
def actions_np(self) -> np.ndarray:
"""Returns the prediction/action as a numpy array."""
if isinstance(self.y_pred, Tensor):
return self.y_pred.detach().cpu().numpy()
return np.asarray(self.y_pred)
@property
def predictions(self) -> Tensor:
return self.y_pred
T = TypeVar("T")
@dataclass(frozen=True)
class Rewards(Batch, Generic[T]):
"""A batch of "rewards" coming from an Environment.
For example, in a supervised setting, this would be the true labels, while
in an RL setting, this would be the 'reward' for a state-action pair.
TODO: Maybe add the task labels as a part of the 'Reward', to help with the
training of task-inference methods later on when we add those.
"""
# TODO: Rename this to 'reward', and add a 'y' field in the 'DenseRewards' class.
y: T
@property
def labels(self) -> T:
return self.y
@property
def reward(self) -> T:
return self.y
ObservationType = TypeVar("ObservationType", bound=Observations)
ActionType = TypeVar("ActionType", bound=Actions)
RewardType = TypeVar("RewardType", bound=Rewards)
================================================
FILE: sequoia/settings/base/results.py
================================================
"""In the current setup, `Results` objects are created by a Setting when a
method is applied to them. Each setting can define its own type of `Results` to
customize what the ‘objective’ is in that particular setting.
For instance, the TaskIncrementalSLSetting class also defines a
TaskIncrementalResults class, where the average accuracy across all tasks is the
objective.
We currently have a unit testing setup that, for a given Method class, performs
a quick run of training / testing (using the --fast_dev_run option from
Pytorch-Lightning).
In those tests, there is also a `validate_results` function, which is basically
used to make sure that the results make sense, for the given method and setting.
For instance, when testing a RandomBaselineMethod on an TraditionalSLSetting, the accuracy
should be close to chance level. Likewise, in the `baseline_test.py` file, we
make sure that the BaseMethod (just a classifier, no CL adjustments) also
exhibits catastrophic forgetting when applied on a Class or Task Incremental
Setting.
"""
from abc import ABC, abstractmethod
from dataclasses import dataclass
from functools import total_ordering
from pathlib import Path
from typing import Any, ClassVar, Dict, TypeVar, Union
import matplotlib.pyplot as plt
from simple_parsing import Serializable
from sequoia.utils.logging_utils import get_logger
logger = get_logger(__name__)
@dataclass
@total_ordering
class Results(Serializable, ABC):
"""Represents the results of an experiment.
Here you can define what the quantity to maximize/minize is. This class
should also be used to create the plots that will be helpful to understand
and compare different results.
TODO: Add wandb logging here somehow.
"""
lower_is_better: ClassVar[bool] = False
# Name for the 'objective'.
objective_name: ClassVar[str] = "Objective"
@property
@abstractmethod
def objective(self) -> float:
"""Returns a float value that indicating how "good" this result is.
If the `lower_is_better` class variable is set to `False` (default),
then this
"""
raise NotImplementedError("Each Result subclass should implement this.")
@abstractmethod
def summary(self) -> str:
"""Gives a string describing the results, in a way that is easy to understand.
:return: A summary of the results.
:rtype: str
"""
@abstractmethod
def make_plots(self) -> Dict[str, plt.Figure]:
"""Generates the plots that are useful for understanding/interpreting or
comparing this kind of results.
:return: A dictionary mapping from plot name to the matplotlib figure.
:rtype: Dict[str, plt.Figure]
"""
@abstractmethod
def to_log_dict(self, verbose: bool = False) -> Dict[str, Any]:
"""Create a dict version of the results, to be logged to wandb"""
return {self.objective_name: self.objective}
def save(self, path: Union[str, Path], dump_fn=None, **kwargs) -> None:
path = Path(path)
path.parent.mkdir(exist_ok=True, parents=True)
return super().save(path, dump_fn=dump_fn, **kwargs)
def save_to_dir(self, save_dir: Union[str, Path], filename: str = "results.json") -> None:
save_dir = Path(save_dir)
save_dir.mkdir(exist_ok=True, parents=True)
print(f"Results summary:")
self.summary
results_dump_file = save_dir / filename
self.save(results_dump_file)
print(f"Saved a copy of the results to {results_dump_file}")
plots: Dict[str, plt.Figure] = self.make_plots()
plot_paths: Dict[str, Path] = {}
for fig_name, figure in plots.items():
print(f"fig_name: {fig_name}")
# figure.show()
# plt.waitforbuttonpress(10)
path = (save_dir / fig_name).with_suffix(".jpg")
path.parent.mkdir(exist_ok=True, parents=True)
figure.savefig(path)
# print(f"Saved figure at path {path}")
plot_paths[fig_name] = path
print(f"\nSaved Plots to: {plot_paths}\n")
def __eq__(self, other: Any) -> bool:
if isinstance(other, Results):
return self.objective == other.objective
elif isinstance(other, float):
return self.objective == other
return NotImplemented
def __gt__(self, other: Any) -> bool:
if isinstance(other, Results):
return self.objective > other.objective
elif isinstance(other, float):
return self.objective > other
return NotImplemented
ResultsType = TypeVar("ResultsType", bound=Results)
================================================
FILE: sequoia/settings/base/setting.py
================================================
""" This module defines the `Setting` class, an ML "problem" to solve.
The `Setting` class is an abstract base class which should represent the most
general learning setting imaginable, i.e. with the fewest assumptions about the
data, the environment, the agent, etc.
The Setting class is currently loosely based on the `LightningDataModule` class
from pytorch-lightning, with the goal of having an `TraditionalSLSetting` node somewhere
in the tree, which would be totally interchangeable with existing datamodules
from pytorch-lightning.
The hope is that by staying close to that API, we can make it easier for people
to adopt the repo, and also, if possible, directly reuse existing models from
pytorch-lightning.
See: [Pytorch-Lightning](https://pytorch-lightning.readthedocs.io/en/latest/)
See: [LightningDataModule](https://pytorch-lightning.readthedocs.io/en/latest/datamodules.html)
"""
import itertools
import sys
import typing
from abc import abstractmethod
from dataclasses import dataclass
from pathlib import Path
from typing import Any, ClassVar, Dict, Generic, Iterable, List, Optional, Type, TypeVar, Union
import gym
import numpy as np
import torch
from gym import spaces
from pytorch_lightning import LightningDataModule
from simple_parsing import Serializable, field
from torch import Tensor
from sequoia.common.config import Config, WandbConfig
from sequoia.common.metrics import Metrics
if typing.TYPE_CHECKING:
from sequoia.common.transforms import Compose
from sequoia.common.transforms.transform_enum import Transforms
from sequoia.settings.base.bases import Method, SettingABC
from sequoia.settings.base.environment import Environment
from sequoia.settings.base.objects import Actions, Observations, Rewards
from sequoia.settings.base.results import Results, ResultsType
from sequoia.settings.base.setting_meta import SettingMeta
from sequoia.settings.presets import setting_presets
from sequoia.utils import Parseable, get_logger
from sequoia.utils.utils import take
logger = get_logger(__name__)
SettingType = TypeVar("SettingType", bound="Setting")
EnvironmentType = TypeVar("EnvironmentType", bound=Environment)
@dataclass
class Setting(
SettingABC,
Parseable,
Serializable,
LightningDataModule,
Generic[EnvironmentType],
metaclass=SettingMeta,
):
"""Base class for all research settings in ML: Root node of the tree.
A 'setting' is loosely defined here as a learning problem with a specific
set of assumptions, restrictions, and an evaluation procedure.
For example, Reinforcement Learning is a type of Setting in which we assume
that an Agent is able to observe an environment, take actions upon it, and
receive rewards back from the environment. Some of the assumptions include
that the reward is dependant on the action taken, and that the actions have
an impact on the environment's state (and on the next observations the agent
will receive). The evaluation procedure consists in trying to maximize the
reward obtained from an environment over a given number of steps.
This 'Setting' class should ideally represent the most general learning
problem imaginable, with almost no assumptions about the data or evaluation
procedure.
This is a dataclass. Its attributes are can also be used as command-line
arguments using `simple_parsing`.
Abstract (required) methods:
- **apply** Applies a given Method on this setting to produce Results.
- **prepare_data** (things to do on 1 GPU/TPU not on every GPU/TPU in distributed mode).
- **setup** (things to do on every accelerator in distributed mode).
- **train_dataloader** the training environment/dataloader.
- **val_dataloader** the val environments/dataloader(s).
- **test_dataloader** the test environments/dataloader(s).
"Abstract"-ish (required) class attributes:
- `Results`: The class of Results that are created when applying a Method on
this setting.
- `Observations`: The type of Observations that will be produced in this
setting.
- `Actions`: The type of Actions that are expected from this setting.
- `Rewards`: The type of Rewards that this setting will (potentially) return
upon receiving an action from the method.
"""
# ---------- Class Variables -------------
# Fields in this block are class attributes. They don't create command-line
# arguments.
# Type of Observations that the dataloaders (a.k.a. "environments") will
# produce for this type of Setting.
Observations: ClassVar[Type[Observations]] = Observations
# Type of Actions that the dataloaders (a.k.a. "environments") will receive
# through their `send` method, for this type of Setting.
Actions: ClassVar[Type[Actions]] = Actions
# Type of Rewards that the dataloaders (a.k.a. "environments") will return
# after receiving an action, for this type of Setting.
Rewards: ClassVar[Type[Rewards]] = Rewards
# The type of Results that are given back when a method is applied on this
# Setting. The `Results` class basically defines the 'evaluation metric' for
# a given type of setting. See the `Results` class for more info.
Results: ClassVar[Type[Results]] = Results
available_datasets: ClassVar[Dict[str, Any]] = {}
# Transforms to be applied to the observatons of the train/valid/test
# environments.
transforms: Optional[List[Transforms]] = None
# Transforms to be applied to the training datasets.
train_transforms: Optional[List[Transforms]] = None
# Transforms to be applied to the validation datasets.
val_transforms: Optional[List[Transforms]] = None
# Transforms to be applied to the testing datasets.
test_transforms: Optional[List[Transforms]] = None
# Fraction of training data to use to create the validation set.
# (Only applicable in Passive settings.)
val_fraction: float = 0.2
# TODO: Still not sure where exactly we should be adding the 'batch_size'
# and 'num_workers' arguments. Adding it here for now with cmd=False, so
# that they can be passed to the constructor of the Setting.
batch_size: Optional[int] = field(default=None, cmd=False)
num_workers: Optional[int] = field(default=None, cmd=False)
# # TODO: Add support for semi-supervised training.
# # Fraction of the dataset that is labeled.
# labeled_data_fraction: int = 1.0
# # Number of labeled examples.
# n_labeled_examples: Optional[int] = None
# Options related to Weights & Biases (wandb). Turned Off by default. Passing any of
# its arguments will enable wandb.
# NOTE: Adding `cmd=False` here, so we only create the args in `Experiment`.
# TODO: Fix this up.
wandb: Optional[WandbConfig] = field(default=None, compare=False, cmd=False)
# Group of configuration options like log_dir, data dir, etc.
# TODO: It's a bit confusing to also have a `config` attribute on the
# Setting. Might want to change this a bit.
config: Optional[Config] = field(default=None, cmd=False)
def __post_init__(
self,
observation_space: gym.Space = None,
action_space: gym.Space = None,
reward_space: gym.Space = None,
):
"""Initializes the fields of the setting that weren't set from the
command-line.
"""
from sequoia.common.transforms import Compose
logger.debug("__post_init__ of Setting")
# BUG: simple-parsing sometimes parses a list with a single item, itself the
# list of transforms. Not sure if this still happens.
def is_list_of_list(v: Any) -> bool:
return isinstance(v, list) and len(v) == 1 and isinstance(v[0], list)
if is_list_of_list(self.train_transforms):
self.train_transforms = self.train_transforms[0]
if is_list_of_list(self.val_transforms):
self.val_transforms = self.val_transforms[0]
if is_list_of_list(self.test_transforms):
self.test_transforms = self.test_transforms[0]
# if all(
# t is None
# for t in [
# self.transforms,
# self.train_transforms,
# self.val_transforms,
# self.test_transforms,
# ]
# ):
# # Use these two transforms by default if no transforms are passed at all.
# # TODO: Remove this after the competition perhaps.
# self.transforms = Compose([Transforms.to_tensor, Transforms.three_channels])
# TODO: Should change this, so that these transform fields are only the
# additional transforms compared to `self.transforms` (the 'base' transforms)
# If the constructor is called with just the `transforms` argument, like this:
# (dataset="bob", transforms=foo_transform)
# Then we use this value as the default for the train, val and test transforms.
if self.transforms and not any(
[self.train_transforms, self.val_transforms, self.test_transforms]
):
if not isinstance(self.transforms, list):
self.transforms = Compose([self.transforms])
self.train_transforms = self.transforms.copy()
self.val_transforms = self.transforms.copy()
self.test_transforms = self.transforms.copy()
if self.train_transforms is not None and not isinstance(self.train_transforms, list):
self.train_transforms = [self.train_transforms]
if self.val_transforms is not None and not isinstance(self.val_transforms, list):
self.val_transforms = [self.val_transforms]
if self.test_transforms is not None and not isinstance(self.test_transforms, list):
self.test_transforms = [self.test_transforms]
# Actually compose the list of Transforms or callables into a single transform.
self.train_transforms = Compose(self.train_transforms or [])
self.val_transforms = Compose(self.val_transforms or [])
self.test_transforms = Compose(self.test_transforms or [])
LightningDataModule.__init__(
self,
train_transforms=self.train_transforms,
val_transforms=self.val_transforms,
test_transforms=self.test_transforms,
)
self._observation_space = observation_space
self._action_space = action_space
self._reward_space = reward_space
self.train_env: Environment = None # type: ignore
self.val_env: Environment = None # type: ignore
self.test_env: Environment = None # type: ignore
@abstractmethod
def apply(self, method: Method, config: Config = None) -> "Setting.Results":
# NOTE: The actual train/test loop should be defined in a more specific
# setting. This is just here as an illustration of what that could look
# like.
raise NotImplementedError("this is just here for illustration purposes. ")
method.fit(
train_env=self.train_dataloader(),
valid_env=self.val_dataloader(),
)
# Test loop:
test_env = self.test_dataloader()
test_metrics = []
# Number of episodes to test on:
n_test_episodes = 1
# Perform a set number of episodes in the test environment.
for episode in range(n_test_episodes):
# Get initial observations.
observations = test_env.reset()
for i in itertools.count():
# Get the predictions/actions for a batch of observations.
actions = method.get_actions(observations, test_env.action_space)
observations, rewards, done, info = test_env.step(actions)
# Calculate the 'metrics' (TODO: This should be done be in the env!)
batch_metrics = ...
test_metrics.append(batch_metrics)
if done:
break
return self.Results(test_metrics=test_metrics)
def get_metrics(self, actions: Actions, rewards: Rewards) -> Union[float, Metrics]:
"""Calculate the "metric" from the model predictions (actions) and the true labels (rewards).
In this example, we return a 'Metrics' object:
- `ClassificationMetrics` for classification problems,
- `RegressionMetrics` for regression problems.
We use these objects because they are awesome (they basically simplify
making plots, wandb logging, and serialization), but you can also just
return floats if you want, no problem.
TODO: This is duplicated from Incremental. Need to fix this.
"""
from sequoia.common.metrics import get_metrics
# In this particular setting, we only use the y_pred from actions and
# the y from the rewards.
if isinstance(actions, Actions):
actions = torch.as_tensor(actions.y_pred)
if isinstance(rewards, Rewards):
rewards = torch.as_tensor(rewards.y)
# TODO: At the moment there's this problem, ClassificationMetrics wants
# to create a confusion matrix, which requires 'logits' (so it knows how
# many classes.
if isinstance(actions, Tensor):
actions = actions.cpu().numpy()
if isinstance(rewards, Tensor):
rewards = rewards.cpu().numpy()
if isinstance(self.action_space, spaces.Discrete):
batch_size = rewards.shape[0]
actions = torch.as_tensor(actions)
if len(actions.shape) == 1 or (actions.shape[-1] == 1 and self.action_space.n != 2):
fake_logits = torch.zeros([batch_size, self.action_space.n], dtype=int)
# FIXME: There must be a smarter way to do this indexing.
for i, action in enumerate(actions):
fake_logits[i, action] = 1
actions = fake_logits
return get_metrics(y_pred=actions, y=rewards)
@property
def image_space(self) -> Optional[gym.Space]:
if isinstance(self.observation_space, spaces.Box):
return self.observation_space
if isinstance(self.observation_space, spaces.Tuple):
assert isinstance(self.observation_space["x"], spaces.Box)
return self.observation_space["x"]
if isinstance(self.observation_space, spaces.Dict):
return self.observation_space.spaces["x"]
logger.warning(
f"Don't know what the image space is. "
f"(self.observation_space={self.observation_space})"
)
return None
@property
def observation_space(self) -> gym.Space:
return self._observation_space
@observation_space.setter
def observation_space(self, value: gym.Space) -> None:
"""Sets a the observation space.
NOTE: This also changes the value of the `dims` attribute and the result
of the `size()` method from LightningDataModule.
"""
if not isinstance(value, gym.Space):
raise RuntimeError(f"Value must be a `gym.Space` (got {value})")
if not self._dims:
if isinstance(value, spaces.Box):
self.dims = value.shape
elif isinstance(value, spaces.Tuple):
self.dims = tuple(space.shape for space in value.spaces)
elif isinstance(value, spaces.Dict) and "x" in value.spaces:
self.dims = value.spaces["x"].shape
else:
raise NotImplementedError(
f"Don't know how to set the 'dims' attribute using "
f"observation space {value}"
)
self._observation_space = value
@property
def action_space(self) -> gym.Space:
return self._action_space
@action_space.setter
def action_space(self, value: gym.Space) -> None:
self._action_space = value
@property
def reward_space(self) -> gym.Space:
return self._reward_space
@reward_space.setter
def reward_space(self, value: gym.Space) -> None:
self._reward_space = value
@classmethod
def get_available_datasets(cls) -> Iterable[str]:
"""Returns an iterable of strings which represent the names of datasets."""
return cls.available_datasets
def _setup_config(self, method: Method) -> Config:
config: Config
if isinstance(getattr(method, "config", None), Config):
config = method.config
logger.debug(f"Using Config from the Method: {config}")
elif isinstance(getattr(self, "config", None), Config):
config = self.config
logger.debug(f"Using Config from the Setting: {config}")
else:
argv = self._argv
if argv:
logger.debug(f"Parsing the Config from the command-line arguments ({argv})")
else:
logger.debug(f"Parsing the config from the current command-line arguments.")
config = Config.from_args(argv, strict=False)
return config
@classmethod
def main(cls, argv: Optional[Union[str, List[str]]] = None) -> Results:
from sequoia.main import Experiment
experiment: Experiment
# Create the Setting object from the command-line:
setting = cls.from_args(argv)
# Then create the 'Experiment' from the command-line, which makes it
# possible to choose between all the methods.
experiment = Experiment.from_args(argv)
# fix the setting attribute to be the one parsed above.
experiment.setting = setting
results: ResultsType = experiment.launch(argv)
return results
def apply_all(self, argv: Union[str, List[str]] = None) -> Dict[Type["Method"], Results]:
applicable_methods = self.get_applicable_methods()
from sequoia.methods import Method
all_results: Dict[Type[Method], Results] = {}
config = Config.from_args(argv)
for method_type in applicable_methods:
method = method_type.from_args(argv)
results = self.apply(method, config)
all_results[method_type] = results
logger.info(f"All results for setting of type {type(self)}:")
logger.info(
{
method.get_name(): (results.get_metric() if results else "crashed")
for method, results in all_results.items()
}
)
return all_results
def _check_environments(self):
"""Do a quick check to make sure that interacting with the envs/dataloaders
works correctly.
"""
# Check that the env's spaces are batched versions of the settings'.
from gym.vector.utils import batch_space
from sequoia.settings.sl import PassiveEnvironment
batch_size = self.batch_size
for loader_method in [
self.train_dataloader,
self.val_dataloader,
self.test_dataloader,
]:
print(f"\n\nChecking loader method {loader_method.__name__}\n\n")
env = loader_method(batch_size=batch_size)
batch_size = env.batch_size
# We could compare the spaces directly, but that's a bit messy, and
# would be depends on the type of spaces for each. Instead, we could
# check samples from such spaces on how the spaces are batched.
if batch_size:
expected_observation_space = batch_space(self.observation_space, n=batch_size)
expected_action_space = batch_space(self.action_space, n=batch_size)
expected_reward_space = batch_space(self.reward_space, n=batch_size)
else:
expected_observation_space = self.observation_space
expected_action_space = self.action_space
expected_reward_space = self.reward_space
# TODO: Batching the 'Sparse' makes it really ugly, so just
# comparing the 'image' portion of the space for now.
assert env.observation_space["x"].shape == expected_observation_space[0].shape, (
env.observation_space["x"],
expected_observation_space[0],
)
assert env.action_space == expected_action_space, (
env.action_space,
expected_action_space,
)
assert env.reward_space == expected_reward_space, (
env.reward_space,
expected_reward_space,
)
# Check that the 'gym API' interaction is working correctly.
reset_obs: Observations = env.reset()
self._check_observations(env, reset_obs)
for i in range(5):
actions = env.action_space.sample()
self._check_actions(env, actions)
step_observations, step_rewards, done, info = env.step(actions)
self._check_observations(env, step_observations)
self._check_rewards(env, step_rewards)
if batch_size:
assert not any(done)
else:
assert not done
# assert not (done if isinstance(done, bool) else any(done))
for batch in take(env, 5):
observations: Observations
rewards: Optional[Rewards]
if isinstance(env, PassiveEnvironment):
observations, rewards = batch
else:
# in RL atm, the 'dataset' gives back only the observations.
# Coul
observations, rewards = batch, None
self._check_observations(env, observations)
if rewards is not None:
self._check_rewards(env, rewards)
if batch_size:
actions = tuple(self.action_space.sample() for _ in range(batch_size))
else:
actions = self.action_space.sample()
# actions = self.Actions(torch.as_tensor(actions))
rewards = env.send(actions)
self._check_rewards(env, rewards)
env.close()
def _check_observations(self, env: Environment, observations: Any):
"""Check that the given observation makes sense for the given environment.
TODO: This should probably not be in this file here. It's more used for
testing than anything else.
"""
assert isinstance(observations, self.Observations), observations
images = observations.x
assert isinstance(images, (torch.Tensor, np.ndarray))
if isinstance(images, Tensor):
images = images.cpu().numpy()
# Find the 'image' space:
if isinstance(env.observation_space, spaces.Box):
image_space = env.observation_space
elif isinstance(env.observation_space, spaces.Tuple):
image_space = env.observation_space["x"]
else:
raise RuntimeError(
f"Don't know how to find the image space in the "
f"env's obs space ({env.observation_space})."
)
assert images in image_space
def _check_actions(self, env: Environment, actions: Any):
if isinstance(actions, Actions):
assert isinstance(actions, self.Actions)
actions = actions.y_pred.cpu().numpy()
elif isinstance(actions, Tensor):
actions = actions.cpu().numpy()
elif isinstance(actions, np.ndarray):
actions = actions
assert actions in env.action_space
def _check_rewards(self, env: Environment, rewards: Any):
if isinstance(rewards, Rewards):
assert isinstance(rewards, self.Rewards)
rewards = rewards.y
if isinstance(rewards, Tensor):
rewards = rewards.cpu().numpy()
if isinstance(rewards, np.ndarray):
rewards = rewards
if isinstance(rewards, (int, float)):
rewards = np.asarray(rewards)
assert rewards in env.reward_space, (rewards, env.reward_space)
# Just to make type hinters stop throwing errors when using the constructor
# to create a Setting.
def __new__(cls, *args, **kwargs):
return super().__new__(cls, *args, **kwargs)
@classmethod
def load_benchmark(cls: Type[SettingType], benchmark: Union[str, Path]) -> SettingType:
"""Load the given "benchmark" (pre-configured Setting) of this type.
Parameters
----------
cls : Type[SettingType]
Type of Setting to create.
benchmark : Union[str, Path]
Either the name of a benchmark (e.g. "cartpole_state", "monsterkong", etc.)
or a path to a json/yaml file.
Returns
-------
SettingType
Setting of type `cls`, appropriately populated according to the chosen
benchmark.
Raises
------
RuntimeError
If `benchmark` isn't an existing file or a known preset.
RuntimeError
If any command-line arguments are present in sys.argv which would be ignored
when creating this setting.
"""
# If the provided benchmark isn't a path, try to get the value from
# the `setting_presets` dict. If it isn't in the dict, raise an
# error.
if not Path(benchmark).is_file():
if benchmark in setting_presets:
benchmark = setting_presets[benchmark]
else:
raise RuntimeError(
f"Could not find benchmark '{benchmark}': it "
f"is neither a path to a file or a key of the "
f"`setting_presets` dictionary. \n"
f"(Available presets: {setting_presets}) "
)
# Creating an experiment for the given setting, loaded from the
# config file.
# TODO: IDEA: Do the same thing for loading the Method?
logger.info(
f"Will load the options for setting {cls} from the file " f"at path {benchmark}."
)
# Raise an error if any of the args in sys.argv would have been used
# up by the Setting, just to prevent any ambiguities.
_, unused_args = cls.from_known_args()
consumed_args = list(set(sys.argv[1:]) - set(unused_args))
if consumed_args:
# TODO: This could also be trigerred if there were arguments
# in the method with the same name as some from the Setting.
raise RuntimeError(
f"Cannot pass command-line arguments for the Setting when "
f"loading a benchmark, since these arguments whould have been "
f"ignored when creating the setting of type {cls} "
f"anyway: {consumed_args}"
)
drop_extras = False
# Actually load the setting from the file.
setting = cls.load(path=benchmark, drop_extra_fields=drop_extras)
return setting
================================================
FILE: sequoia/settings/base/setting_meta.py
================================================
"""
"""
import dataclasses
from dataclasses import Field
from typing import Dict, List, Type
from sequoia.utils.logging_utils import get_logger
logger = get_logger(__name__)
class SettingMeta(Type["Setting"]):
"""Metaclass for the nodes in the Setting inheritance tree.
Might remove this. Was experimenting with using this to create class
properties for each Setting.
What this currently does is to remove any keyword argument passed to the
constructor if its value is marked as a 'constant'.
TODO: A little while back I noticed some strange behaviour when trying
to create a Setting class (either manually or through the command-line), and
I attributed it to PL adding a `_DataModuleWrapper` metaclass to
`LightningDataModule`, which seemed to be causing problems related to
calling __init__ when using dataclasses. I don't quite recall exactly what
was happening and was causing an issue, so it would be a good idea to try
removing this metaclass and writing a test to make sure there was a problem
to begin with, and also to make sure that adding back this class fixes it.
"""
def __call__(cls, *args, **kwargs):
# This is used to filter the arguments passed to the constructor
# of the Setting and only keep the ones that are fields with init=True.
fields: Dict[str, Field] = {field.name: field for field in dataclasses.fields(cls)}
init_fields: List[str] = [name for name, f in fields.items() if f.init]
for key in list(kwargs.keys()):
value = kwargs[key]
if key not in fields:
# We let this through, so that if there is a problem, it is
# raised when calling the constructor below.
continue
# elif key in fields and key not in init_fields:
# # We let this through, so that if there is a problem, it is
# # raised when calling the constructor below.
# logger.warning(RuntimeWarning(
# f"Constructor Argument {key} is a field with init=False but"
# f"but is being passed to the constructor."
# ))
# continue
# Alternative: Raise a custom Exception directly:
# raise RuntimeError((
# Other idea: go up two stackframes so that it looks like
# `cls(blabla=123)` is what's causing the exception?
field = fields[key]
_missing = object()
constant_value = field.metadata.get("constant", _missing)
if constant_value is not _missing and value != constant_value:
logger.warning(
UserWarning(
f"Ignoring argument {key}={value} when creating class "
f"{cls}, since it has that field marked as constant with a "
f"value of {constant_value}."
)
)
kwargs.pop(key)
return super().__call__(*args, **kwargs)
def __instancecheck__(self, instance):
from sequoia.client import SettingProxy
if isinstance(instance, SettingProxy) or hasattr(instance, "_setting_type"):
# If the setting is a proxy, then we check if its a proxy to a setting of
# this type.
return issubclass(instance._setting_type, self)
return super().__instancecheck__(instance)
================================================
FILE: sequoia/settings/base/setting_test.py
================================================
import functools
import inspect
from dataclasses import dataclass
from typing import Union
import pytest
from sequoia.methods import Method
from sequoia.utils.utils import constant
from .setting import Setting
@dataclass
class Setting1(Setting):
foo: int = 1
bar: int = 2
def __post_init__(self):
print(f"Setting1 __init__ ({self})")
super().__post_init__()
@dataclass
class Setting2(Setting1):
bar: int = constant(1)
def __post_init__(self):
print(f"Setting2 __init__ ({self})")
super().__post_init__()
@pytest.mark.xfail(reason="Changed this.")
def test_settings_override_with_constant_take_init():
"""Test that when a value for one of the constant fields is passed to the
constructor, its value is ignored and getting that attribute on the object
gives back the constant value.
If the field isn't constant, the value should be set on the object as usual.
"""
bob1 = Setting1(foo=3, bar=7)
assert bob1.foo == 3
assert bob1.bar == 7
bob2 = Setting2(foo=4, bar=4)
assert bob2.bar == 1.0
assert bob2.foo == 4
def test_loading_benchmark_doesnt_overwrite_constant():
setting1 = Setting1.loads_json('{"foo":1, "bar":2}')
assert setting1.foo == 1
assert setting1.bar == 2
setting2 = Setting2.loads_json('{"foo":1, "bar":2}')
assert setting2.foo == 1
assert setting2.bar == 1
def test_init_still_works():
setting = Setting(val_fraction=0.01)
assert setting.val_fraction == 0.01
def test_passing_unexpected_arg_raises_typeerror():
with pytest.raises(TypeError):
bob2 = Setting2(foo=4, bar=4, baz=123123)
@dataclass
class SettingA(Setting):
pass
@dataclass
class SettingA1(SettingA):
pass
@dataclass
class SettingA2(SettingA):
pass
@dataclass
class SettingB(Setting):
pass
class MethodA(Method, target_setting=SettingA):
pass
class MethodB(Method, target_setting=SettingB):
pass
class CoolGeneralMethod(Method, target_setting=Setting):
pass
def test_that_transforms_can_be_set_through_command_line():
from sequoia.common.transforms import Compose, Transforms
setting = Setting(train_transforms=[])
assert setting.train_transforms == []
setting = Setting.from_args("--train_transforms channels_first")
assert setting.train_transforms == [Transforms.channels_first]
assert isinstance(setting.train_transforms, Compose)
setting = Setting.from_args("--train_transforms channels_first")
assert setting.train_transforms == [Transforms.channels_first]
assert isinstance(setting.train_transforms, Compose)
from typing import Any, ClassVar, Dict, Type
from sequoia.common.config import Config
from sequoia.methods.random_baseline import RandomBaselineMethod
from .setting import Setting
class SettingTests:
"""Class that groups all the tests for a given setting.
You should create a test class for your new setting, ideally in a file placed next to the class
under test, named with the "_test.py" suffix.
The test class can be created in one of two ways:
- Either using a 'Setting' class attribute:
```python
from sequoia.settings.base.setting_test import SettingTests
class TestMySetting(SettingTests):
Setting = MySetting
def test_something(self):
setting = self.Setting(...)
...
```
- OR, by passing the `setting` keyword argument to the class statement:
```python
class TestMySetting(SettingTests, setting=MySetting):
def test_something(self):
setting = self.Setting(...)
...
```
If your setting is based on something more concrete than just the `Setting` class, then you
should use the associated test class as a base for your new test class:
```python
# (Taking ContinualRLSetting here as an example)
# *Important*: Remember to rename the test class if needed so that pytest doesn't also run them
# when testing your module:
from sequoia.settings.rl.continual.setting_test import TestContinualRLSetting as ContinualRLSettingTests
from .my_custom_setting import MyCustomSetting
class TestMyCustomSetting(ContinualRLSettingTests, setting=MyCustomSetting):
def my_custom_test(self):
...
# OR
class TestMyCustomSetting(ContinualRLSettingTests):
Setting = MyCustomSetting
```
This also generates a `dataset` fixture.
"""
Setting: ClassVar[Type[Setting]]
# Autogenerated fixture that will yield each entry from the available dataset of the setting
# class under test.
dataset: pytest.fixture
# The kwargs to be passed to the Setting when we want to create a 'short' setting.
fast_dev_run_kwargs: ClassVar[Dict[str, Any]] = {}
def __init_subclass__(cls, setting: Type[Setting] = None):
"""Autogenerates fixtures on the class under test."""
super().__init_subclass__()
if not setting and not hasattr(cls, "Setting"):
raise RuntimeError(
"Need to either pass `setting` when subclassing or set "
"a 'Sethod' class attribute."
)
if setting is not None:
# Make the setting accessible to tests as either self.Setting or cls.Setting for
# classmethods.
cls.Setting = setting
cls.dataset: pytest.fixture = make_dataset_fixture(cls.Setting)
def assert_chance_level(self, setting: Setting, results: Setting.Results):
"""Called during testing. Use this to assert that the results you get
from applying your method on the given setting match your expectations.
Args:
setting
results (Results): A given Results object.
"""
assert results is not None
assert results.objective > 0
print(f"Objective when applied to a setting of type {type(setting)}: {results.objective}")
@pytest.mark.timeout(60)
def test_random_baseline(self, config: Config):
"""
Test that applies a random baseline to the Setting, and checks that the results
are around chance level.
"""
# Create the Setting
setting_type = self.Setting
# if issubclass(setting_type, ContinualRLSetting):
# kwargs.update(max_steps=100, test_steps_per_task=100)
# if issubclass(setting_type, IncrementalRLSetting):
# kwargs.update(nb_tasks=2)
# if issubclass(setting_type, ClassIncrementalSetting):
# kwargs = dict(nb_tasks=5)
# if issubclass(setting_type, (TraditionalSLSetting, RLSetting)):
# kwargs.pop("nb_tasks", None)
# if isinstance(setting, SLSetting):
# method.batch_size = 64
# elif isinstance(setting, RLSetting):
# method.batch_size = None
# setting.train_max_steps = 100
setting: Setting = setting_type(**self.fast_dev_run_kwargs)
method = RandomBaselineMethod()
results = setting.apply(method, config=config)
self.assert_chance_level(setting, results=results)
def make_dataset_fixture(setting_type: Union[Type[Setting], functools.partial]):
"""Create a parametrized fixture that will go through all the available datasets
for a given setting."""
def dataset(_, request):
dataset = request.param
return dataset
if isinstance(setting_type, functools.partial):
setting_type = setting_type.args[0]
assert inspect.isclass(setting_type) and issubclass(setting_type, Setting)
datasets = set(setting_type.available_datasets.keys())
datasets_to_remove = set(["MT10", "MT50", "CW10", "CW20"])
# NOTE: Need deterministic ordering for the datasets for tests to be parallelizable
# with pytest-xdist.
datasets = sorted(list(datasets - datasets_to_remove))
return pytest.fixture(
params=datasets,
scope="module",
)(dataset)
================================================
FILE: sequoia/settings/offline_rl/setting.py
================================================
from dataclasses import dataclass
from typing import Any, ClassVar, Dict, List
import gym
from gym.wrappers import RecordEpisodeStatistics
from matplotlib import pyplot as plt
from simple_parsing.helpers import choice
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader
from sequoia import Results
from sequoia.settings.base import Setting
try:
import d3rlpy
except ImportError as err:
raise RuntimeError(f"You need to have `d3rlpy` installed to use these methods.") from err
@dataclass
class OfflineRLResults(Results):
# TODO: Write these methods
def summary(self) -> str:
return f"Offline RL results: {self.objective_name} = {self.objective}"
def make_plots(self) -> Dict[str, plt.Figure]:
return {}
def to_log_dict(self, verbose: bool = False) -> Dict[str, Any]:
return {self.objective_name: self.objective}
# Metrics from online testing
test_rewards: list
test_episode_length: list
test_episode_count: list
objective_name: ClassVar[str] = "Average Reward"
@property
def objective(self):
return sum(self.test_rewards) / len(self.test_rewards)
# Offline datasets from d3rlpy (not including atari)
offline_datasets_from_d3rlpy = {
"cartpole-replay",
"cartpole-random",
"pendulum-replay",
"pendulum-random",
"hopper",
"halfcheetah",
"walker",
"ant",
}
# Offline atari datasets from d3rlpy
offline_atari_datasets_from_d3rlpy = set(d3rlpy.datasets.ATARI_GAMES)
@dataclass
class OfflineRLSetting(Setting):
# A list of available offline rl datasets
available_datasets: ClassVar[List[str]] = list(offline_datasets_from_d3rlpy) + list(
offline_atari_datasets_from_d3rlpy
)
# choice of dataset for the current setting
dataset: str = choice(available_datasets, default="cartpole-replay")
# size of validation set
val_size: float = 0.2
# mask for control bootstrapping
create_mask: bool = False
mask_size: int = 1
def __post_init__(self):
# Load d3rlpy offline dataset
if (
self.dataset in offline_datasets_from_d3rlpy
or self.dataset in offline_atari_datasets_from_d3rlpy
):
mdp_dataset, self.env = d3rlpy.datasets.get_dataset(
self.dataset, self.create_mask, self.mask_size
)
self.train_dataset, self.valid_dataset = train_test_split(
mdp_dataset, test_size=self.val_size
)
# Load other dataset types here
else:
raise NotImplementedError
def train_dataloader(self, batch_size: int = None) -> DataLoader:
return DataLoader(self.train_dataset, batch_size=batch_size)
def val_dataloader(self, batch_size: int = None) -> DataLoader:
return DataLoader(self.valid_dataset, batch_size=batch_size)
def test(self, method, test_env: gym.Env):
"""
Test self.algo on given test_env for self.test_steps iterations
"""
test_env = RecordEpisodeStatistics(test_env)
obs = test_env.reset()
for _ in range(method.test_steps):
obs, reward, done, info = test_env.step(
method.get_actions(obs, action_space=test_env.action_space)
)
if done:
break
test_env.close()
return test_env.episode_returns, test_env.episode_lengths, test_env.episode_count
def apply(self, method) -> OfflineRLResults:
method.configure(self)
method.fit(train_env=self.train_dataset, valid_env=self.valid_dataset)
# Test
test_rewards, test_episode_length, test_episode_count = self.test(method, self.env)
return OfflineRLResults(
test_rewards=test_rewards,
test_episode_length=test_episode_length,
test_episode_count=test_episode_count,
)
================================================
FILE: sequoia/settings/presets/__init__.py
================================================
import os
from pathlib import Path
from typing import Dict
presets_dir = Path(os.path.dirname(__file__))
setting_presets: Dict[str, Path] = {file.stem: file for file in presets_dir.rglob("*.yaml")}
================================================
FILE: sequoia/settings/presets/cartpole_pixels.yaml
================================================
dataset: PixelCartPole-v0
max_episodes: null
nb_tasks: 3
train_max_steps: 3000
steps_per_task: 1000
test_max_steps: 3000
test_steps_per_task: 1000
train_task_schedule:
0:
gravity: 10
length: 0.2
1000:
gravity: 100
length: 1.2
2000:
gravity: 10
length: 0.2
val_task_schedule:
0:
gravity: 10
length: 0.2
1000:
gravity: 100
length: 1.2
2000:
gravity: 10
length: 0.2
test_task_schedule:
0:
gravity: 10
length: 0.2
1000:
gravity: 100
length: 1.2
2000:
gravity: 10
length: 0.2
================================================
FILE: sequoia/settings/presets/cartpole_state.yaml
================================================
dataset: CartPole-v0
max_episodes: null
nb_tasks: 2
train_max_steps: 4000
test_max_steps: 1000
test_steps_per_task: 500
# TODO: Need to fix these task schedules: They probably won't work the same with
# 'Continual' settings vs in the IncremementalRL Settings. Also need to decide what
# happens with the last key in MultiTask RL.
train_task_schedule:
0:
gravity: 10
length: 0.3
2000:
gravity: 10
length: 0.8
val_task_schedule:
0:
gravity: 10
length: 0.3
2000:
gravity: 10
length: 0.8
================================================
FILE: sequoia/settings/presets/cifar10.yaml
================================================
dataset: cifar10
================================================
FILE: sequoia/settings/presets/cifar100.yaml
================================================
dataset: cifar100
================================================
FILE: sequoia/settings/presets/classic_control/cartpole.yaml
================================================
dataset: cartpole
monitor_training_performance: true
nb_tasks: 8
steps_per_task: 20_000
test_steps_per_task: 10_000
train_task_schedule:
0:
force_mag: 10.0
gravity: 9.8
length: 0.5
masscart: 1.0
masspole: 0.1
tau: 0.02
1:
force_mag: 8.666898797953921
gravity: 7.760853554007704
length: 0.5217446765844818
masscart: 0.8908045485782948
masspole: 0.15674543117467288
tau: 0.0220635245382657
2:
force_mag: 7.458618324495651
gravity: 9.400984342498948
length: 0.6462064142932058
masscart: 1.3539692996769968
masspole: 0.133507111769919
tau: 0.021147855257131764
3:
force_mag: 8.5574863595876
gravity: 6.7285307726150085
length: 0.38294798778813294
masscart: 0.8574588708166866
masspole: 0.0615236260048324
tau: 0.02307661947728138
4:
force_mag: 8.02716944821746
gravity: 11.150504602382693
length: 0.4854716271338247
masscart: 1.0456215435706913
masspole: 0.10899768542795317
tau: 0.019865776370441367
5:
force_mag: 11.700513704843809
gravity: 6.312815408929171
length: 0.45130592348981863
masscart: 1.0380878429865934
masspole: 0.07187238299019481
tau: 0.014052652786485233
6:
force_mag: 13.934001347849406
gravity: 10.133200774940446
length: 0.4905968584092335
masscart: 0.9859796874461285
masspole: 0.08510387732488867
tau: 0.01695718912603805
7:
force_mag: 10.523014205764852
gravity: 9.174287955179715
length: 0.560680060936186
masscart: 0.9513630929456718
masspole: 0.07683588323840541
tau: 0.016089633251709107
================================================
FILE: sequoia/settings/presets/classic_control/mountaincar_continuous.yaml
================================================
dataset: MountainCarContinuous-v0
monitor_training_performance: true
nb_tasks: 8
train_max_steps: 160_000
train_steps_per_task: 20_000
test_max_steps: 80_000
test_steps_per_task: 10_000
train_task_schedule:
0:
goal_position: 0.45
goal_velocity: 0
1:
goal_position: 0.4565062937130897
goal_velocity: 0
2:
goal_position: 0.526503904898121
goal_velocity: 0
3:
goal_position: 0.37901356007820275
goal_velocity: 0
4:
goal_position: 0.5132810016616194
goal_velocity: 0
5:
goal_position: 0.5023364056388072
goal_velocity: 0
6:
goal_position: 0.47315246637784114
goal_velocity: 0
7:
goal_position: 0.45239346485932264
goal_velocity: 0
================================================
FILE: sequoia/settings/presets/fashion_mnist.yaml
================================================
dataset: fashion_mnist
# Two classes per task:
increment: 2
test_increment: 2
================================================
FILE: sequoia/settings/presets/mnist.yaml
================================================
dataset: mnist
================================================
FILE: sequoia/settings/presets/monsterkong/monsterkong_3each.yaml
================================================
dataset: monsterkong
steps_per_task: 10_000_000
test_steps_per_task: 10_000
train_task_schedule:
0:
level: 0
1:
level: 1
2:
level: 2
3:
level: 10
4:
level: 11
5:
level: 12
6:
level: 20
7:
level: 21
8:
level: 22
================================================
FILE: sequoia/settings/presets/monsterkong/monsterkong_4each.yaml
================================================
dataset: monsterkong
steps_per_task: 10_000_000
test_steps_per_task: 10_000
train_task_schedule:
0:
level: 0
1:
level: 1
2:
level: 2
3:
level: 3
4:
level: 10
5:
level: 11
6:
level: 12
7:
level: 13
8:
level: 20
9:
level: 21
10:
level: 22
11:
level: 23
================================================
FILE: sequoia/settings/presets/monsterkong/monsterkong_5each.yaml
================================================
dataset: monsterkong
steps_per_task: 10_000_000
test_steps_per_task: 10_000
train_task_schedule:
0:
level: 0
1:
level: 1
2:
level: 2
3:
level: 3
4:
level: 4
5:
level: 10
6:
level: 11
7:
level: 12
8:
level: 13
9:
level: 14
10:
level: 20
11:
level: 21
12:
level: 22
13:
level: 23
14:
level: 24
================================================
FILE: sequoia/settings/presets/monsterkong/monsterkong_all.yaml
================================================
dataset: monsterkong
steps_per_task: 10_000_000
test_steps_per_task: 10_000
train_task_schedule:
0:
level: 0
1:
level: 1
2:
level: 2
3:
level: 3
4:
level: 4
5:
level: 5
6:
level: 6
7:
level: 7
8:
level: 8
9:
level: 9
10:
level: 10
11:
level: 11
12:
level: 12
13:
level: 13
14:
level: 14
15:
level: 15
16:
level: 16
17:
level: 17
18:
level: 18
19:
level: 19
20:
level: 20
21:
level: 21
22:
level: 22
23:
level: 23
24:
level: 24
25:
level: 25
26:
level: 26
27:
level: 27
28:
level: 28
29:
level: 29
================================================
FILE: sequoia/settings/presets/monsterkong/monsterkong_jumps.yaml
================================================
dataset: monsterkong
steps_per_task: 10_000_000
test_steps_per_task: 10_000
train_task_schedule:
0:
level: 0
1:
level: 1
2:
level: 2
3:
level: 3
4:
level: 4
5:
level: 5
6:
level: 6
7:
level: 7
8:
level: 8
9:
level: 9
================================================
FILE: sequoia/settings/presets/monsterkong/monsterkong_jumps_and_ladders.yaml
================================================
dataset: monsterkong
steps_per_task: 10_000_000
test_steps_per_task: 10_000
train_task_schedule:
0:
level: 20
1:
level: 21
2:
level: 22
3:
level: 23
4:
level: 24
5:
level: 25
6:
level: 26
7:
level: 27
8:
level: 28
9:
level: 29
================================================
FILE: sequoia/settings/presets/monsterkong/monsterkong_ladders.yaml
================================================
dataset: monsterkong
steps_per_task: 10_000_000
test_steps_per_task: 10_000
train_task_schedule:
0:
level: 10
1:
level: 11
2:
level: 12
3:
level: 13
4:
level: 14
5:
level: 15
6:
level: 16
7:
level: 17
8:
level: 18
9:
level: 19
================================================
FILE: sequoia/settings/presets/monsterkong/monsterkong_mix.yaml
================================================
dataset: monsterkong
monitor_training_performance: true
force_pixel_observations: true
nb_tasks: 8
train_max_steps: 1_600_000
train_steps_per_task: 200_000
test_steps_per_task: 10_000
test_max_steps: 80_000
train_task_schedule:
0:
level: 0
1:
level: 1
2:
level: 10
3:
level: 11
4:
level: 20
5:
level: 21
6:
level: 30
7:
level: 31
================================================
FILE: sequoia/settings/presets/mujoco/half_cheetah.yaml
================================================
dataset: ContinualHalfCheetah-v2
monitor_training_performance: true
nb_tasks: 8
train_steps_per_task: 200_000
test_steps_per_task: 10_000
train_task_schedule:
0:
gravity: -9.81
1:
gravity: -7.3087968946619615
2:
gravity: -5.615716866871361
3:
gravity: -12.45890973547683
4:
gravity: -7.6875976238634465
5:
gravity: -5.807262467656652
6:
gravity: -8.448144726367474
7:
gravity: -7.750512896029625
================================================
FILE: sequoia/settings/presets/rl_track.yaml
================================================
dataset: monsterkong
known_task_boundaries_at_train_time: true
known_task_boundaries_at_test_time: false
task_labels_at_train_time: true
task_labels_at_test_time: false
monitor_training_performance: true
steps_per_task: 200_000
test_steps_per_task: 10_000
train_task_schedule:
0:
level: 0
1:
level: 1
2:
level: 10
3:
level: 11
4:
level: 20
5:
level: 21
6:
level: 30
7:
level: 31
================================================
FILE: sequoia/settings/presets/sl_track.yaml
================================================
dataset: synbols
nb_tasks: 12
known_task_boundaries_at_train_time: true
known_task_boundaries_at_test_time: false
task_labels_at_train_time: true
task_labels_at_test_time: false
monitor_training_performance: true
================================================
FILE: sequoia/settings/rl/__init__.py
================================================
from .environment import RLEnvironment
from .setting import RLSetting
ActiveEnvironment = RLEnvironment
from .continual import ContinualRLSetting, make_continuous_task
from .discrete import DiscreteTaskAgnosticRLSetting, make_discrete_task
from .incremental import IncrementalRLSetting, make_incremental_task
# TODO: Properly Add the multi-task RL setting.
from .multi_task import MultiTaskRLSetting
from .task_incremental import TaskIncrementalRLSetting
from .traditional import TraditionalRLSetting
================================================
FILE: sequoia/settings/rl/continual/__init__.py
================================================
from .environment import GymDataLoader
from .objects import Actions, ActionType, Observations, ObservationType, Rewards, RewardType
from .results import ContinualRLResults
from .setting import ContinualRLSetting
from .tasks import make_continuous_task
ContinualRLEnvironment = GymDataLoader
Results = ContinualRLResults
================================================
FILE: sequoia/settings/rl/continual/environment.py
================================================
""" Dataloader for a Gym Environment. Uses multiple parallel environments.
TODO: @lebrice: We need to decide which of these two behaviours we want to
support in the GymDataLoader, (if not both):
- Either iterate over the dataset and get the usual 4-item tuples like gym,
by using a policy to generate the actions,
OR
- Give back 3-item tuples (without the reward) and give the reward when
users send back an action for the current observation. Users would either
be required to send actions back after each observation or to provide a
policy to "fill-in-the-gaps" and select the action when the model doesn't
send one back.
The traditional supervised dataloader can be easily recovered in this second
case: since the reward doesn't depend on the action, we can just send back a
random or None action to the dataloader, and group the returned reward with
the batch of observations, before yielding the (observations, rewards)
batch.
In either case, we can easily keep the `step` API from gym available.
Need to talk more about this for sure.
"""
import warnings
from typing import Any, Iterable, Iterator, Optional, TypeVar, Union
import gym
import numpy as np
from gym import Wrapper, spaces
from gym.utils.colorize import colorize
from gym.vector import AsyncVectorEnv, VectorEnv
from gym.vector.utils import batch_space
from torch import Tensor
from torch.utils.data import IterableDataset
from sequoia.common.gym_wrappers import EnvDataset, IterableWrapper
from sequoia.common.gym_wrappers.policy_env import PolicyEnv
from sequoia.common.gym_wrappers.utils import StepResult
from sequoia.settings.base.objects import Actions
from sequoia.settings.rl.environment import ActiveEnvironment
from sequoia.utils.logging_utils import get_logger
logger = get_logger(__name__)
T = TypeVar("T")
# TODO: The typing information from sequoia.settings.base.environment isn't quite
# accurate here... The observations are bound by Tensors or numpy arrays, not
# 'Batch' objects.
# from sequoia.settings.base.environment import ObservationType, ActionType, RewardType
ObservationType = TypeVar("ObservationType")
ActionType = TypeVar("ActionType")
RewardType = TypeVar("RewardType")
class GymDataLoader(
ActiveEnvironment[ObservationType, ActionType, RewardType], IterableWrapper, Iterable
):
"""Environment for RL settings.
Exposes **both** the `gym.Env` as well as the "Active" DataLoader APIs.
This is useful because it makes it easy to adapt a method originally made for SL so
that it can also work in a reinforcement learning context, where the rewards (e.g.
image labels, or correct/incorrect prediction, etc.) are only given *after* the
action (e.g. y_pred) has been received by the environment.
meaning you
can use this in two different ways:
1. Gym-style using `step`:
1. Agent --------- action ----------------> Env
2. Agent <---(state, reward, done, info)--- Env
2. ActiveDataLoader style, using `iter` and `send`:
1. Agent <--- (state, done, info) --- Env
2. Agent ---------- action ---------> Env
3. Agent <--------- reward ---------- Env
This would look something like this in code:
```python
env = GymDataLoader("CartPole-v0", batch_size=32)
for states, done, infos in env:
actions = actor(states)
rewards = env.send(actions)
loss = loss_function(...)
# OR:
state = env.reset()
for i in range(max_steps):
action = self.actor(state)
states, reward, done, info = env.step(action)
loss = loss_function(...)
```
"""
def __init__(
self,
env: Union[EnvDataset, PolicyEnv] = None,
dataset: Union[EnvDataset, PolicyEnv] = None,
batch_size: int = None,
num_workers: int = None,
**kwargs,
):
assert not (
env is None and dataset is None
), "One of the `dataset` or `env` arguments must be passed."
assert not (
env is not None and dataset is not None
), "Only one of the `dataset` and `env` arguments can be used."
if not isinstance(env, IterableDataset):
raise RuntimeError(
f"The env {env} isn't an interable dataset! (You can use the "
f"EnvDataset or PolicyEnv wrappers to make an IterableDataset "
f"from a gym environment."
)
if isinstance(env.unwrapped, VectorEnv):
if batch_size is not None and batch_size != env.num_envs:
logger.warning(
UserWarning(
f"The provided batch size {batch_size} will be ignored, since "
f"the provided env is vectorized with a batch_size of "
f"{env.unwrapped.num_envs}."
)
)
batch_size = env.num_envs
if isinstance(env.unwrapped, AsyncVectorEnv):
num_workers = env.num_envs
else:
num_workers = 0
self.env = env
# NOTE: The batch_size and num_workers attributes reflect the values from the
# iterator (the VectorEnv), not those of the dataloader.
# This is done in order to avoid pytorch workers being ever created, and also so
# that pytorch-lightning stops warning us that the num_workers is too low.
self._batch_size = batch_size
self._num_workers = num_workers
super().__init__(
dataset=self.env,
# The batch size is None, because the VecEnv takes care of
# doing the batching for us.
batch_size=None,
num_workers=0,
collate_fn=None,
**kwargs,
)
Wrapper.__init__(self, env=self.env)
assert not isinstance(self.env, GymDataLoader), "Something very wrong is happening."
# self.max_epochs: int = max_epochs
self.observation_space: gym.Space = self.env.observation_space
self.action_space: gym.Space = self.env.action_space
self.reward_space: gym.Space
if isinstance(env.unwrapped, VectorEnv):
env: VectorEnv
batch_size = env.num_envs
# TODO: Overwriting the action space to be the 'batched' version of
# the single action space, rather than a Tuple(Discrete, ...) as is
# done in the gym.vector.VectorEnv.
self.action_space = batch_space(env.single_action_space, batch_size)
if not hasattr(self.env, "reward_space"):
self.reward_space = spaces.Box(
low=self.env.reward_range[0],
high=self.env.reward_range[1],
shape=(),
dtype=np.float64,
)
if isinstance(self.env.unwrapped, VectorEnv):
# Same here, we use a 'batched' space rather than Tuple.
self.reward_space = batch_space(self.reward_space, batch_size)
# BUG: Fix this bug: the observation / action spaces don't accept Tensors as
# valid samples, even though they should.
# self.observation_space = add_tensor_support(self.observation_space)
# self.action_space = add_tensor_support(self.action_space)
# self.reward_space = add_tensor_support(self.reward_space)
# assert has_tensor_support(self.observation_space)
@property
def num_workers(self) -> Optional[int]:
return self._num_workers
@num_workers.setter
def num_workers(self, value: Any) -> Optional[int]:
if value and value != self._num_workers:
warnings.warn(
RuntimeWarning(
f"Can't set num_workers to {value}, it's hard-set to {self._num_workers}"
)
)
@property
def batch_size(self) -> Optional[int]:
return self._batch_size
@batch_size.setter
def batch_size(self, value: Any) -> Optional[int]:
if value != self._batch_size:
warnings.warn(
RuntimeWarning(
f"Can't set batch size to {value}, it's hard-set to {self._batch_size}"
)
)
def __next__(self) -> ObservationType:
if self._iterator is None:
self._iterator = self.__iter__()
return next(self._iterator)
# def __len__(self):
# if isinstance(self.env, EnvDataset):
# return self.env.max_steps
# raise NotImplementedError(f"TODO: Can't tell the length of the env {self.env}.")
def _obs_have_done_signal(self) -> bool:
"""Try to determine if the observations contain the 'done' signal or not."""
if (
isinstance(self.observation_space, spaces.Dict)
and "done" in self.observation_space.spaces
):
return True
return False
def __iter__(self) -> Iterator:
# TODO: Pretty sure this could be greatly simplified by just always using the loop from EnvDataset.
# return super().__iter__()
# assert False, self.env.__iter__()
if self.is_vectorized:
# elif isinstance(self.observation_space, spaces.Tuple)
if not self._obs_have_done_signal():
warnings.warn(
RuntimeWarning(
colorize(
f"You are iterating over a vectorized env, but the observations "
f"don't seem to contain the 'done' signal! You should definitely "
f"consider applying something like an `AddDoneToObservation` "
f"wrapper to each individual env before vectorization. ",
"red",
)
)
)
return self.env.__iter__()
# yield from IterableWrapper.__iter__(self)
# self.observation_ = self.reset()
# self.done_ = False
# self.action_ = None
# self.reward_ = None
# # Yield the first observation_.
# # TODO: Maybe add something like 't' on the observations to make sure they
# # line up with the rewards we get?
# yield self.observation_
# if self.action_ is None:
# raise RuntimeError(
# f"You have to send an action using send() between every "
# f"observation. (env = {self})"
# )
# def done_is_true(done: Union[bool, np.ndarray, Sequence[bool]]) -> bool:
# return done if isinstance(done, bool) or not done.shape else all(done)
# while not any([done_is_true(self.done_), self.is_closed()]):
# # logger.debug(f"step {self.n_steps_}/{self.max_steps}, (episode {self.n_episodes_})")
# # Set those to None to force the user to call .send()
# self.action_ = None
# self.reward_ = None
# yield self.observation_
# if self.action_ is None:
# raise RuntimeError(
# f"You have to send an action using send() between every "
# f"observation. (env = {self})"
# )
# def __iter__(self) -> Iterable[ObservationType]:
# # This would give back a single-process dataloader iterator over the
# # 'dataset' which in this case is the environment:
# # return super().__iter__()
# # This, on the other hand, completely bypasses the dataloader iterator,
# # and instead just yields the samples from the dataset directly, which
# # is actually what we want!
# # BUG: Somehow this doesn't batch the samples correctly..
# return self.env.__iter__()
# # TODO: BUG: Wrappers applied on top of the GymDataLoader won't have an
# # effect on the values yielded by this iterator. Currently trying to fix
# # this inside the IterableWrapper base class, but it's not that simple.
# # return type(self.env).__iter__(self)
# # if has_wrapper(self.env, EnvDataset):
# # return EnvDataset.__iter__(self)
# # elif has_wrapper(self.env, PolicyEnv):
# # return PolicyEnv.__iter__(self)
# # return type(self.env).__iter__(self)
# # return iter(self.env)
# # yield from self._iterator
# # Could increment the number of epochs here also, if we wanted to keep
# # count.
# def random_actions(self):
# return self.env.random_actions()
def step(self, action: Union[ActionType, Any]) -> StepResult:
# logger.debug(f"Calling step on self.env")
return super().step(action)
def send(self, action: Union[ActionType, Any]) -> RewardType:
# TODO: Remove this unwrapping code, and instead only unwrap stuff if necessary
# for the environment.
if isinstance(action, Actions):
action = action.y_pred
if isinstance(action, Tensor):
action = action.detach().cpu().numpy()
if isinstance(action, np.ndarray) and not action.shape:
action = action.item()
if isinstance(self.env.action_space, spaces.Tuple) and isinstance(action, np.ndarray):
action = action.tolist()
assert action in self.env.action_space, (action, self.env.action_space)
return super().send(action)
# self.action_ = action
# self.observation_, self.reward_, self.done_, self.info_ = su(action)
# return self.reward_
# return self.env.send(action)
================================================
FILE: sequoia/settings/rl/continual/environment_test.py
================================================
from typing import ClassVar, Optional, Type
import gym
import numpy as np
import pytest
import torch
from gym import spaces
from gym.vector.utils import batch_space
from torch import Tensor
from sequoia.common.gym_wrappers import EnvDataset, PixelObservationWrapper
from sequoia.conftest import param_requires_atari_py
from sequoia.utils.logging_utils import get_logger
from sequoia.utils.utils import take
from .environment import GymDataLoader
from .make_env import make_batched_env
logger = get_logger(__name__)
class TestGymDataLoader:
# Grouping tests into a class so we can inherit from it in another test module, for
# instance in the tests for EnvironmentProxy class.
GymDataLoader: ClassVar[Type[GymDataLoader]] = GymDataLoader
@pytest.mark.parametrize("batch_size", [1, 2, 5])
@pytest.mark.parametrize(
"env_name", ["CartPole-v0", param_requires_atari_py("ALE/Breakout-v5")]
)
def test_spaces(self, env_name: str, batch_size: int):
dataset = EnvDataset(make_batched_env(env_name, batch_size=batch_size))
batched_obs_space = dataset.observation_space
# NOTE: the VectorEnv class creates the 'batched' action space by creating a
# Tuple of the single action space, of length 'N', which seems a bit weird.
# batched_action_space = vector_env.action_space
batched_action_space = batch_space(dataset.single_action_space, batch_size)
dataloader_env = self.GymDataLoader(dataset, batch_size=batch_size)
assert dataloader_env.observation_space == batched_obs_space
assert dataloader_env.action_space == batched_action_space
dataloader_env.reset()
for observation_batch in take(dataloader_env, 3):
if isinstance(observation_batch, Tensor):
observation_batch = observation_batch.cpu().numpy()
assert observation_batch in batched_obs_space
actions = dataloader_env.action_space.sample()
assert len(actions) == batch_size
assert actions in batched_action_space
rewards = dataloader_env.send(actions)
# BUG: rewards has dtype np.float64, while the space has np.float32.
assert len(rewards) == batch_size
assert rewards in dataloader_env.reward_space
@pytest.mark.parametrize("batch_size", [None, 1, 2, 5])
@pytest.mark.parametrize(
"env_name", ["CartPole-v0", param_requires_atari_py("ALE/Breakout-v5")]
)
def test_max_steps_is_respected(self, env_name: str, batch_size: int):
max_steps = 5
env_name = "CartPole-v0"
env = make_batched_env(env_name, batch_size=batch_size)
dataset = EnvDataset(env)
from sequoia.common.gym_wrappers.action_limit import ActionLimit
dataset = ActionLimit(dataset, max_steps=max_steps * (batch_size or 1))
env: GymDataLoader = self.GymDataLoader(dataset)
env.reset()
i = 0
for i, obs in enumerate(env):
assert obs in env.observation_space
assert i < max_steps, f"Max steps should have been respected: {i}"
env.send(env.action_space.sample())
assert i == max_steps - 1
env.close()
@pytest.mark.parametrize("batch_size", [None, 1, 2, 5])
@pytest.mark.parametrize("seed", [None, 123, 456])
# @pytest.mark.parametrize(
# "env_name", ["CartPole-v0", param_requires_atari_py("ALE/Breakout-v5")]
# )
def test_multiple_epochs_works(self, batch_size: Optional[int], seed: Optional[int]):
epochs = 3
max_steps_per_episode = 10
from gym.wrappers import TimeLimit
from sequoia.common.gym_wrappers import AddDoneToObservation
from sequoia.conftest import DummyEnvironment
def env_fn():
# FIXME: Using the DummyEnvironment for now since it's easier to debug with.
# env = gym.make(env_name)
env = DummyEnvironment()
env = AddDoneToObservation(env)
env = TimeLimit(env, max_episode_steps=max_steps_per_episode)
return env
# assert False, [env_fn(i).unwrapped for i in range(4)]
# env = gym.vector.make(env_name, num_envs=(batch_size or 1))
env = make_batched_env(env_fn, batch_size=batch_size)
batched_env = env
# from sequoia.common.gym_wrappers.episode_limit import EpisodeLimit
# env = EpisodeLimit(env, max_episodes=epochs)
from sequoia.common.gym_wrappers.convert_tensors import ConvertToFromTensors
env = ConvertToFromTensors(env)
env = EnvDataset(env, max_steps_per_episode=max_steps_per_episode)
env: GymDataLoader = self.GymDataLoader(env)
# BUG: Seems to be a little bug in the shape of the items yielded by the env due
# to the concat_fn of the DataLoader.
# if batch_size and batch_size >= 1:
# assert False, (env.reset().shape, env.observation_space, next(iter(env)).shape)
env.seed(seed)
all_rewards = []
with env:
for epoch in range(epochs):
for step, obs in enumerate(env):
print(f"'epoch' {epoch}, step {step}:, obs: {obs}")
assert obs in env.observation_space, obs.shape
assert ( # BUG: This isn't working: (sometimes!)
step < max_steps_per_episode
), "Max steps per episode should have been respected."
rewards = env.send(env.action_space.sample())
if batch_size is None:
all_rewards.append(rewards)
else:
all_rewards.extend(rewards)
# Since in the VectorEnv, 'episodes' are infinite, we must have
# reached the limit of the number of steps, while in a single
# environment, the episode might have been shorter.
assert step <= max_steps_per_episode - 1
assert epoch == epochs - 1
if batch_size in [None, 1]:
# Some episodes might last shorter than the max number of steps per episode,
# therefore the total should be at most this much:
assert len(all_rewards) <= epochs * max_steps_per_episode
else:
# The maximum number of steps per episode is set, but the env is vectorized,
# so the number of 'total' rewards we get from all envs should be *exactly*
# this much:
assert len(all_rewards) == epochs * max_steps_per_episode * batch_size
@pytest.mark.parametrize("batch_size", [1, 2, 5])
@pytest.mark.parametrize("env_name", [param_requires_atari_py("ALE/Breakout-v5")])
def test_reward_isnt_always_one(self, env_name: str, batch_size: int):
epochs = 3
max_steps_per_episode = 100
env = make_batched_env(env_name, batch_size=batch_size)
dataset = EnvDataset(env, max_steps_per_episode=max_steps_per_episode)
env: GymDataLoader = self.GymDataLoader(env=dataset)
all_rewards = []
with env:
env.reset()
for epoch in range(epochs):
for i, batch in enumerate(env):
rewards = env.send(env.action_space.sample())
all_rewards.extend(rewards)
assert all_rewards != np.ones(len(all_rewards)).tolist()
@pytest.mark.parametrize("env_name", ["CartPole-v0"])
@pytest.mark.parametrize("batch_size", [1, 2, 5, 10])
def test_batched_state(self, env_name: str, batch_size: int):
max_steps_per_episode = 10
env = make_batched_env(env_name, batch_size=batch_size)
dataset = EnvDataset(env, max_steps_per_episode=max_steps_per_episode)
env: GymDataLoader = GymDataLoader(
dataset,
batch_size=batch_size,
)
with gym.make(env_name) as temp_env:
state_shape = temp_env.observation_space.shape
action_shape = temp_env.action_space.shape
state_shape = (batch_size, *state_shape)
action_shape = (batch_size, *action_shape)
reward_shape = (batch_size,)
state = env.reset()
assert state.shape == state_shape
env.seed(123)
i = 0
for obs_batch in take(env, 5):
assert obs_batch.shape == state_shape
random_actions = env.action_space.sample()
assert torch.as_tensor(random_actions).shape == action_shape
assert temp_env.action_space.contains(random_actions[0])
reward = env.send(random_actions)
assert reward.shape == reward_shape
i += 1
assert i == 5
@pytest.mark.parametrize("env_name", ["CartPole-v0"])
@pytest.mark.parametrize("batch_size", [1, 2, 5, 10])
def test_batched_pixels(self, env_name: str, batch_size: int):
max_steps_per_episode = 10
pyglet = pytest.importorskip("pyglet")
wrappers = [PixelObservationWrapper]
env = make_batched_env(env_name, wrappers=wrappers, batch_size=batch_size)
dataset = EnvDataset(env, max_steps_per_episode=max_steps_per_episode)
with gym.make(env_name) as temp_env:
for wrapper in wrappers:
temp_env = wrapper(temp_env)
state_shape = temp_env.observation_space.shape
action_shape = temp_env.action_space.shape
state_shape = (batch_size, *state_shape)
action_shape = (batch_size, *action_shape)
reward_shape = (batch_size,)
env = self.GymDataLoader(
dataset,
batch_size=batch_size,
)
assert isinstance(env.observation_space, spaces.Box)
assert len(env.observation_space.shape) == 4
assert env.observation_space.shape[0] == batch_size
env.seed(1234)
for i, batch in enumerate(env):
assert len(batch) == batch_size
if isinstance(batch, Tensor):
batch = batch.cpu().numpy()
assert batch in env.observation_space
random_actions = env.action_space.sample()
assert torch.as_tensor(random_actions).shape == action_shape
assert temp_env.action_space.contains(random_actions[0])
reward = env.send(random_actions)
assert reward.shape == reward_shape
================================================
FILE: sequoia/settings/rl/continual/make_env.py
================================================
"""Creates an IterableDataset from a gym env by applying different wrappers.
"""
import multiprocessing as mp
import warnings
from functools import partial
from typing import Callable, Dict, Iterable, List, Optional, Tuple, Type, TypeVar, Union
import gym
from gym import Wrapper
from gym.vector import AsyncVectorEnv, SyncVectorEnv, VectorEnv
from sequoia.utils.logging_utils import get_logger
logger = get_logger(__name__)
W = TypeVar("W", bound=Union[gym.Env, gym.Wrapper])
WrapperAndKwargs = Tuple[Type[gym.Wrapper], Dict]
def make_batched_env(
base_env: Union[str, Callable],
batch_size: int = 10,
wrappers: Iterable[Union[Type[Wrapper], WrapperAndKwargs]] = None,
shared_memory: bool = True,
num_workers: Optional[int] = None,
**kwargs,
) -> VectorEnv:
"""Create a vectorized environment from multiple copies of an environment.
NOTE: This function does pretty much the same as `gym.vector.make`, but with
a bit more flexibility:
- Allows passing an env factory to start with, rather than only taking ids.
- Allows passing wrappers to be added to the env on
each worker, as well as wrappers to add on top of the returned (batched) env.
- Allows passing tuples of (Type[Wrapper, kwargs])
Parameters
----------
base_env : str
The environment ID (or an environment factory). This must be a valid ID
from the registry.
batch_size : int
Number of copies of the environment (as well as batch size).
num_workers : Optional[int]
Number of workers to use. When `None` (default), uses as many workers as
there are CPUs on this machine. When 0, the returned environment will be
a `SyncVectorEnv`. When `num_workers` == `batch_size`, returns an
AsyncVectorEnv. When `num_workers` != `batch_size`, returns a
`BatchVectorEnv`.
wrappers : Callable or Iterable of Callables (default: `None`)
If not `None`, then apply the wrappers to each internal environment
during creation.
**kwargs : Dict
Keyword arguments to be passed to `gym.make` when `base_env` is an id.
Returns
-------
env : `gym.vector.VectorEnv` instance
The vectorized environment.
Example
-------
>>> import gym
>>> env = gym.vector.make('CartPole-v1', 3)
>>> env.seed([123, 456, 789])
>>> env.reset()
array([[ 0.01823519, -0.0446179 , -0.02796401, -0.03156282],
[-0.00303268, -0.00523447, -0.03759432, 0.025485 ],
[-0.04084033, -0.0285856 , 0.01318461, -0.03327109]],
dtype=float32)
"""
# Get the default wrappers, if needed.
wrappers = wrappers or []
base_env_factory: Callable[[], gym.Env]
if isinstance(base_env, str):
base_env_factory = partial(gym.make, base_env)
elif callable(base_env):
base_env_factory = base_env
else:
raise NotImplementedError(
f"Unsupported base env: {base_env}. Must be " f"either a string or a callable for now."
)
def pre_batch_env_factory():
env = base_env_factory(**kwargs)
for wrapper in wrappers:
if isinstance(wrapper, tuple):
assert len(wrapper) == 2 and isinstance(wrapper[1], dict)
wrapper = partial(wrapper[0], **wrapper[1])
env = wrapper(env)
return env
if batch_size is None:
return pre_batch_env_factory()
env_fns = [pre_batch_env_factory for _ in range(batch_size)]
if num_workers is None:
if batch_size == 1:
num_workers = 0
else:
num_workers = min(mp.cpu_count(), batch_size)
if num_workers == 0:
if batch_size > 1:
warnings.warn(
UserWarning(
f"Running {batch_size} environments in series, which might be "
f"slow. Consider setting the `num_workers` argument, perhaps to "
f"the number of CPUs on your machine."
)
)
return SyncVectorEnv(env_fns)
if num_workers == batch_size:
return AsyncVectorEnv(env_fns, shared_memory=shared_memory)
raise RuntimeError(f"Need num_workers to match batch_size for now.")
return AsyncVectorEnv(env_fns, shared_memory=shared_memory, n_workers=num_workers)
def wrap(env: gym.Env, wrappers: Iterable[Union[Type[Wrapper], WrapperAndKwargs]]) -> Wrapper:
wrappers = list(wrappers)
# Convert the list of wrapper types or (wrapper_type, kwargs) tuples into
# a list of callables that we can apply successively to the env.
wrapper_fns = _make_wrapper_fns(wrappers)
for wrapper_fn in wrapper_fns:
env = wrapper_fn(env)
return env
def _make_wrapper_fns(
wrappers_and_args: Iterable[Union[Type[Wrapper], Tuple[Type[Wrapper], Dict]]]
) -> List[Callable[[Wrapper], Wrapper]]:
"""Given a list of either wrapper classes or (wrapper, kwargs) tuples,
returns a list of callables, each of which just takes an env and wraps
it using the wrapper and the kwargs, if present.
"""
wrappers_and_args = list(wrappers_and_args or [])
wrapper_functions: List[Callable[[gym.Wrapper], gym.Wrapper]] = []
for wrapper_and_args in wrappers_and_args:
if isinstance(wrapper_and_args, (tuple, list)):
# List element was a tuple with (wrapper, (args?), kwargs).
wrapper, *args, kwargs = wrapper_and_args
logger.debug(f"Wrapper: {wrapper}, args: {args}, kwargs: {kwargs}")
wrapper_fn = partial(wrapper, *args, **kwargs)
else:
# list element is a type of Wrapper or some kind of callable.
wrapper_fn = wrapper_and_args
wrapper_functions.append(wrapper_fn)
return wrapper_functions
================================================
FILE: sequoia/settings/rl/continual/make_env_test.py
================================================
"""
Tests that check that combining wrappers works fine in combination.
"""
from typing import Union
import gym
import pytest
import torch
from gym.vector import AsyncVectorEnv, SyncVectorEnv
from sequoia.conftest import requires_pyglet, slow_param
from .make_env import make_batched_env
@pytest.mark.parametrize("env_name", ["CartPole-v0"])
@pytest.mark.parametrize("batch_size", [1, 5, slow_param(10)])
def test_make_batched_env(env_name: str, batch_size: int):
env = make_batched_env(base_env=env_name, batch_size=batch_size)
start_state = env.reset()
assert start_state.shape == (batch_size, 4)
for i in range(10):
action = env.action_space.sample()
assert torch.as_tensor(action).shape == (batch_size,)
obs, reward, done, info = env.step(action)
assert obs.shape == (batch_size, 4)
assert reward.shape == (batch_size,)
@pytest.mark.xfail(
reason="Not sure that the 'id' function gives an 'absolute' memory adress, or if "
"the address is process-relative, in which case it might be an explanation as to "
"why these tests don't work."
)
@pytest.mark.parametrize("env_name", ["CartPole-v0"])
@pytest.mark.parametrize("batch_size", [4])
@pytest.mark.parametrize("num_workers", [0, 4])
def test_make_batched_env_envs_have_distinct_ids(env_name: str, batch_size: int, num_workers: int):
# NOTE: We get a SyncVectorEnv if num_workers == 0, else we get an AsyncVectorEnv if
# num_workers == batch_size, else we get a BatchVectorEnv.
from gym.wrappers import TimeLimit
def base_env_fn():
env = gym.make(env_name)
return TimeLimit(env, max_episode_steps=10)
env: Union[SyncVectorEnv, AsyncVectorEnv] = make_batched_env(
base_env=base_env_fn, batch_size=batch_size, num_workers=num_workers
)
if isinstance(env, SyncVectorEnv):
envs = env.envs
# Assert that the wrappers are distinct objects
assert len(set(id(env) for env in envs)) == batch_size
# Assert that the unwrapped envs are distinct objects
assert len(set(id(env.unwrapped) for env in envs)) == batch_size
else:
assert isinstance(env, AsyncVectorEnv)
ids = env.apply(id)
assert len(set(ids)) == batch_size
unwrapped_ids = env.apply(get_unwrapped_id)
assert len(set(unwrapped_ids)) == batch_size
def get_unwrapped_id(env):
return id(env.unwrapped)
@requires_pyglet
@pytest.mark.parametrize("env_name", ["CartPole-v0"])
@pytest.mark.parametrize("batch_size", [1, 5, slow_param(10)])
def test_make_env_with_wrapper(env_name: str, batch_size: int):
env = make_batched_env(
base_env=env_name,
batch_size=batch_size,
wrappers=[PixelObservationWrapper],
)
start_state = env.reset()
expected_state_shape = (batch_size, 400, 600, 3)
assert start_state.shape == expected_state_shape
for i in range(10):
action = env.action_space.sample()
assert torch.as_tensor(action).shape == (batch_size,)
obs, reward, done, info = env.step(action)
assert obs.shape == expected_state_shape
assert reward.shape == (batch_size,)
from gym.vector import AsyncVectorEnv
from sequoia.common.gym_wrappers import MultiTaskEnvironment, PixelObservationWrapper
@pytest.mark.xfail(reason="TODO: Check if gym supports remote getattr now.")
@pytest.mark.parametrize("env_name", ["CartPole-v0"])
@pytest.mark.parametrize("batch_size", [1, 5, slow_param(10)])
def test_make_env_with_wrapper_and_kwargs(env_name: str, batch_size: int):
# NOTE: Since BatchVectorEnv and our subclasses of the vectorenvs in gym got removed, we lost
# the ability to use the remote getattr feature.
task_schedule = {0: dict(length=0.5), 50: dict(length=1.5)}
env = make_batched_env(
base_env=env_name,
batch_size=batch_size,
wrappers=[
PixelObservationWrapper,
lambda env: MultiTaskEnvironment(env, task_schedule=task_schedule),
],
# For now, setting the number of workers to the batch size, just so we
# get an AsyncVectorEnv rather than the BatchedVectorEnv (so the remote_getattr works).
num_workers=batch_size,
)
start_state = env.reset()
expected_state_shape = (batch_size, 400, 600, 3)
assert start_state.shape == expected_state_shape
for i in range(100):
action = env.action_space.sample()
assert torch.as_tensor(action).shape == (batch_size,)
assert env.length == [2.0 for i in range(batch_size)]
obs, reward, done, info = env.step(action)
assert obs.shape == expected_state_shape
assert reward.shape == (batch_size,)
================================================
FILE: sequoia/settings/rl/continual/objects.py
================================================
from dataclasses import dataclass
from typing import Optional, Sequence, TypeVar, Union
from torch import Tensor
from sequoia.settings.assumptions.continual import ContinualAssumption
from sequoia.settings.rl import RLSetting
@dataclass(frozen=True)
class Observations(RLSetting.Observations, ContinualAssumption.Observations):
"""Observations from a Continual Reinforcement Learning environment."""
x: Tensor
task_labels: Optional[Tensor] = None
# The 'done' that is normally returned by the 'step' method.
# We add this here in case a method were to iterate on the environments in the
# dataloader-style so they also have access to those (i.e. for the BaseMethod).
done: Optional[Union[bool, Sequence[bool]]] = None
@dataclass(frozen=True)
class Actions(RLSetting.Actions, ContinualAssumption.Actions):
"""Actions to be sent to a Continual Reinforcement Learning environment."""
y_pred: Tensor
@dataclass(frozen=True)
class Rewards(RLSetting.Rewards, ContinualAssumption.Rewards):
"""Rewards obtained from a Continual Reinforcement Learning environment."""
y: Tensor
ObservationType = TypeVar("ObservationType", bound=Observations)
ActionType = TypeVar("ActionType", bound=Actions)
RewardType = TypeVar("RewardType", bound=Rewards)
================================================
FILE: sequoia/settings/rl/continual/results.py
================================================
from typing import ClassVar, Generic, TypeVar
from sequoia.common.metrics.rl_metrics import EpisodeMetrics
from sequoia.settings.assumptions.continual import ContinualResults
from sequoia.utils.plotting import autolabel, plt
MetricType = TypeVar("MetricType", bound=EpisodeMetrics)
class ContinualRLResults(ContinualResults, Generic[MetricType]):
"""Results for a ContinualRLSetting."""
# Higher mean reward / episode => better
lower_is_better: ClassVar[bool] = False
objective_name: ClassVar[str] = "Mean reward per episode"
# Minimum runtime considered (in hours).
# (No extra points are obtained for going faster than this.)
min_runtime_hours: ClassVar[float] = 1.5
# Maximum runtime allowed (in hours).
max_runtime_hours: ClassVar[float] = 12.0
def mean_reward_plot(self):
raise NotImplementedError("TODO")
figure: plt.Figure
axes: plt.Axes
figure, axes = plt.subplots()
x = list(range(self.num_tasks))
y = [metrics.accuracy for metrics in self.average_metrics_per_task]
rects = axes.bar(x, y)
axes.set_title("Task Accuracy")
axes.set_xlabel("Task")
axes.set_ylabel("Accuracy")
axes.set_ylim(0, 1.0)
autolabel(axes, rects)
return figure
================================================
FILE: sequoia/settings/rl/continual/setting.py
================================================
""" Current most general Setting in the Reinforcement Learning side of the tree.
"""
import difflib
import json
import textwrap
import warnings
from dataclasses import dataclass, fields
from functools import partial
from pathlib import Path
from typing import Any, Callable, ClassVar, Dict, List, Optional, Type, Union
import gym
import numpy as np
from gym import spaces
from gym.envs.registration import EnvSpec, registry
from gym.utils import colorize
from gym.wrappers import TimeLimit
from simple_parsing import choice, field, list_field
from simple_parsing.helpers import dict_field
try:
from stable_baselines3.common.atari_wrappers import AtariWrapper as SB3AtariWrapper
except ImportError:
class SB3AtariWrapper:
pass
from gym.wrappers.atari_preprocessing import AtariPreprocessing as GymAtariWrapper
import wandb
from sequoia.common import Config
from sequoia.common.gym_wrappers import (
AddDoneToObservation,
MultiTaskEnvironment,
RenderEnvWrapper,
SmoothTransitions,
TransformObservation,
TransformReward,
)
from sequoia.common.gym_wrappers.action_limit import ActionLimit
from sequoia.common.gym_wrappers.convert_tensors import add_tensor_support
from sequoia.common.gym_wrappers.env_dataset import EnvDataset
from sequoia.common.gym_wrappers.episode_limit import EpisodeLimit
from sequoia.common.gym_wrappers.pixel_observation import ImageObservations
from sequoia.common.gym_wrappers.utils import is_atari_env
from sequoia.common.spaces import Sparse, TypedDictSpace
from sequoia.common.transforms import Transforms
from sequoia.settings.assumptions.continual import ContinualAssumption
from sequoia.settings.base import Method
from sequoia.settings.rl import ActiveEnvironment, RLSetting
from sequoia.settings.rl.wrappers import (
HideTaskLabelsWrapper,
MeasureRLPerformanceWrapper,
TypedObjectsWrapper,
)
from sequoia.utils import get_logger
from sequoia.utils.generic_functions import move
from sequoia.utils.utils import flag, pairwise
from .environment import GymDataLoader
from .make_env import make_batched_env
from .objects import Actions, Observations, Rewards # type: ignore
from .results import ContinualRLResults
from .tasks import ContinuousTask, TaskSchedule, is_supported, make_continuous_task, names_match
from .test_environment import ContinualRLTestEnvironment
logger = get_logger(__name__)
# Type alias for the Environment returned by `train/val/test_dataloader`.
Environment = ActiveEnvironment[
"ContinualRLSetting.Observations",
"ContinualRLSetting.Observations",
"ContinualRLSetting.Rewards",
]
# NOTE: Takes about 0.2 seconds to check for all compatible envs (with loading), and
# only happens once.
supported_envs: Dict[str, EnvSpec] = {
spec.id: spec for env_id, spec in registry.env_specs.items() if is_supported(env_id)
}
available_datasets: Dict[str, str] = {env_id: env_id for env_id in supported_envs}
# available_datasets.update(
# {camel_case(env_id.split("-v")[0]): env_id for env_id in supported_envs}
# )
@dataclass
class ContinualRLSetting(RLSetting, ContinualAssumption):
"""Reinforcement Learning Setting where the environment changes over time.
This is an Active setting which uses gym environments as sources of data.
These environments' attributes could change over time following a task
schedule. An example of this could be that the gravity increases over time
in cartpole, making the task progressively harder as the agent interacts with
the environment.
"""
# (NOTE: commenting out SLSetting.Observations as it is the same class
# as Setting.Observations, and we want a consistent method resolution order.
Observations: ClassVar[Type[Observations]] = Observations
Actions: ClassVar[Type[Actions]] = Actions
Rewards: ClassVar[Type[Rewards]] = Rewards
# The type of results returned by an RL experiment.
Results: ClassVar[Type[Results]] = ContinualRLResults
# The type wrapper used to wrap the test environment, and which produces the
# results.
TestEnvironment: ClassVar[Type[TestEnvironment]] = ContinualRLTestEnvironment
# Dict of all available options for the 'dataset' field below.
available_datasets: ClassVar[Dict[str, Union[str, Any]]] = available_datasets
# The function used to create the tasks for the chosen env.
_task_sampling_function: ClassVar[Callable[..., ContinuousTask]] = make_continuous_task
# Which environment (a.k.a. "dataset") to learn on.
# The dataset could be either a string (env id or a key from the
# available_datasets dict), a gym.Env, or a callable that returns a
# single environment.
dataset: str = choice(available_datasets, default="CartPole-v0")
# The number of "tasks" that will be created for the training, valid and test
# environments.
# NOTE: In the case of settings with smooth task boundaries, this is the number of
# "base" tasks which are created, and the task space consists of interpolations
# between these base tasks.
# When left unset, will use a default value that makes sense
# (something like 5).
nb_tasks: int = field(5, alias=["n_tasks", "num_tasks"])
# Environment/dataset to use for validation. Defaults to the same as `dataset`.
train_dataset: Optional[str] = None
# Environment/dataset to use for validation. Defaults to the same as `dataset`.
val_dataset: Optional[str] = None
# Environment/dataset to use for testing. Defaults to the same as `dataset`.
test_dataset: Optional[str] = None
# Wether the task boundaries are smooth or sudden.
smooth_task_boundaries: bool = True
# Wether the tasks are sampled uniformly. (This is set to True in MultiTaskRLSetting
# and below)
stationary_context: bool = False
# Max number of training steps in total. (Also acts as the "length" of the training
# and validation "Datasets")
train_max_steps: int = 100_000
# Maximum number of episodes in total.
# TODO: Add tests for this 'max episodes' and 'episodes_per_task'.
train_max_episodes: Optional[int] = None
# Total number of steps in the test loop. (Also acts as the "length" of the testing
# environment.)
test_max_steps: int = 10_000
test_max_episodes: Optional[int] = None
# Standard deviation of the multiplicative Gaussian noise that is used to
# create the values of the env attributes for each task.
task_noise_std: float = 0.2
# NOTE: THIS ARG IS DEPRECATED! Only keeping it here so previous config yaml files
# don't cause a crash.
observe_state_directly: Optional[bool] = None
# NOTE: Removing those, in favor of just using the registered Pixel<...>-v? variant.
# force_pixel_observations: bool = False
# """ Wether to use the "pixel" version of `self.dataset`.
# When `False`, does nothing.
# When `True`, will do one of the following, depending on the choice of environment:
# - For classic control envs, it adds a `PixelObservationsWrapper` to the env.
# - For atari envs:
# - If `self.dataset` is a regular atari env (e.g. "ALE/Breakout-v5"), does nothing.
# - if `self.dataset` is the 'RAM' version of an atari env, raises an error.
# - For mujoco envs, this raises a NotImplementedError, as we don't yet know how to
# make a pixel-version the Mujoco Envs.
# - For other envs:
# - If the environment's observation space appears to be image-based, an error
# will be raised.
# - If the environment's observation space doesn't seem to be image-based, does
# nothing.
# """
# force_state_observations: bool = False
# """ Wether to use the "state" version of `self.dataset`.
# When `False`, does nothing.
# When `True`, will do one of the following, depending on the choice of environment:
# - For classic control envs, it does nothing, as they are already state-based.
# - TODO: For atari envs, the 'RAM' version of the chosen env will be used.
# - For mujoco envs, it doesn nothing, as they are already state-based.
# - For other envs, if this is set to True, then
# - If the environment's observation space appears to be image-based, an error
# will be raised.
# - If the environment's observation space doesn't seem to be image-based, does
# nothing.
# """
# NOTE: Removing this from the continual setting.
# By default 1 for this setting, meaning that the context is a linear interpolation
# between the start context (usually the default task for the environment) and a
# randomly sampled task.
# nb_tasks: int = field(5, alias=["n_tasks", "num_tasks"])
# Wether to convert the observations / actions / rewards of the envs (and their
# spaces) such that they return Tensors rather than numpy arrays.
# TODO: Maybe switch this to True by default?
prefer_tensors: bool = False
# Path to a json file from which to read the train task schedule.
train_task_schedule_path: Optional[Path] = None
# Path to a json file from which to read the validation task schedule.
val_task_schedule_path: Optional[Path] = None
# Path to a json file from which to read the test task schedule.
test_task_schedule_path: Optional[Path] = None
# Wether observations from the environments whould include
# the end-of-episode signal. Only really useful if your method will iterate
# over the environments in the dataloader style
# (as does the baseline method).
add_done_to_observations: bool = False
# The maximum number of steps per episode. When None, there is no limit.
max_episode_steps: Optional[int] = None
# Transforms to be applied by default to the observatons of the train/valid/test
# environments.
transforms: List[Transforms] = list_field()
# Transforms to be applied to the training environment, in addition to those already
# in `transforms`.
train_transforms: List[Transforms] = list_field()
# Transforms to be applied to the validation environment, in addition to those
# already in `transforms`.
val_transforms: List[Transforms] = list_field()
# Transforms to be applied to the testing environment, in addition to those already
# in `transforms`.
test_transforms: List[Transforms] = list_field()
# When True, a Monitor-like wrapper will be applied to the training environment
# and monitor the 'online' performance during training. Note that in SL, this will
# also cause the Rewards (y) to be withheld until actions are passed to the `send`
# method of the Environment.
monitor_training_performance: bool = flag(True)
#
# -------- Fields below don't have corresponding command-line arguments. -----------
#
train_task_schedule: Dict[int, Dict[str, float]] = dict_field(cmd=False)
val_task_schedule: Dict[int, Dict[str, float]] = dict_field(cmd=False)
test_task_schedule: Dict[int, Dict[str, float]] = dict_field(cmd=False)
# TODO: Naming is a bit inconsistent, using `valid` here, whereas we use `val`
# elsewhere.
train_wrappers: List[Callable[[gym.Env], gym.Env]] = list_field(cmd=False)
val_wrappers: List[Callable[[gym.Env], gym.Env]] = list_field(cmd=False)
test_wrappers: List[Callable[[gym.Env], gym.Env]] = list_field(cmd=False)
# keyword arguments to be passed to the base environment through gym.make(base_env, **kwargs).
base_env_kwargs: Dict = dict_field(cmd=False)
batch_size: Optional[int] = field(default=None, cmd=False)
num_workers: Optional[int] = field(default=None, cmd=False)
# Maximum number of training steps per task.
# NOTE: In this particular setting there aren't clear 'tasks' to speak of.
train_steps_per_task: Optional[int] = None
# Number of test steps per task.
# NOTE: In this particular setting there aren't clear 'tasks' to speak of.
test_steps_per_task: Optional[int] = None
# # Deprecated: use `train_max_steps` instead.
# max_steps: Optional[int] = deprecated_property(redirects_to="train_max_steps")
# # Deprecated: use `test_max_steps` instead.
# test_steps: Optional[int] = deprecated_property(redirects_to="test_max_steps")
# # Deprecated, use `train_steps_per_task` instead.
# steps_per_task: Optional[int] = deprecated_property(redirects_to="train_steps_per_task")
def __post_init__(self):
defaults = {f.name: f.default for f in fields(self)}
super().__post_init__()
# TODO: Fix nnoying little issues with this trio of fields that are interlinked:
if self.test_steps_per_task is not None:
# We need set the value of self.test_max_steps and self.test_steps_per_task
if self.test_task_schedule and max(self.test_task_schedule) != len(
self.test_task_schedule
):
self.test_max_steps = max(self.test_task_schedule)
elif self.test_max_steps == defaults["test_max_steps"]:
self.test_max_steps = self.nb_tasks * self.test_steps_per_task
else:
self.nb_tasks = self.test_max_steps // self.test_steps_per_task
# if self.max_steps is not None:
# warnings.warn(DeprecationWarning("'max_steps' is deprecated, use 'train_max_steps' instead."))
# self.train_max_steps = self.max_steps
# if self.test_steps is not None:
# warnings.warn(DeprecationWarning("'test_steps' is deprecated, use 'test_max_steps' instead."))
if self.dataset and self.dataset not in self.available_datasets.values():
try:
self.dataset = find_matching_dataset(self.available_datasets, self.dataset)
except NotImplementedError as e:
logger.info(f"Will try to use custom dataset {self.dataset}.")
except Exception as e:
if getattr(self, "train_envs", []):
logger.info(f"Using custom environments / datasets.")
else:
raise gym.error.UnregisteredEnv(
f"({e}) The chosen dataset/environment ({self.dataset}) isn't in the dict of "
f"available datasets/environments, and a task schedule was not passed, "
f"so this Setting ({type(self).__name__}) doesn't know how to create "
f"tasks for that env!\n"
f"Supported envs:\n"
+ ("\n".join(f"- {k}: {v}" for k, v in self.available_datasets.items()))
)
# The ids of the train/valid/test environments.
self.train_dataset: Union[str, Callable[[], gym.Env]] = self.train_dataset or self.dataset
self.val_dataset: Union[str, Callable[[], gym.Env]] = self.val_dataset or self.dataset
self.test_dataset: Union[str, Callable[[], gym.Env]] = self.test_dataset or self.dataset
logger.info(f"Chosen dataset: {textwrap.shorten(str(self.train_dataset), 50)}")
# # The environment 'ID' associated with each 'simple name'.
# self.train_dataset_id: str = self._get_dataset_id(self.train_dataset)
# self.val_dataset_id: str = self._get_dataset_id(self.val_dataset)
# self.train_dataset_id: str = self._get_dataset_id(self.train_dataset)
# Set the number of tasks depending on the increment, and vice-versa.
# (as only one of the two should be used).
assert self.train_max_steps, "assuming this should always be set, for now."
# Load the task schedules from the corresponding files, if present.
if self.train_task_schedule_path:
self.train_task_schedule = _load_task_schedule(self.train_task_schedule_path)
self.nb_tasks = len(self.train_task_schedule) - 1
if self.val_task_schedule_path:
self.val_task_schedule = _load_task_schedule(self.val_task_schedule_path)
if self.test_task_schedule_path:
self.test_task_schedule = _load_task_schedule(self.test_task_schedule_path)
self.train_env: gym.Env
self.valid_env: gym.Env
self.test_env: gym.Env
# Temporary environments which are created and used only for creating the task
# schedules and the 'base' observation spaces, and then closed right after.
self._temp_train_env: Optional[gym.Env] = self._make_env(self.train_dataset)
self._temp_val_env: Optional[gym.Env] = None
self._temp_test_env: Optional[gym.Env] = None
# Create the task schedules, using the 'task sampling' function from `tasks.py`.
# TODO: PLEASE HELP I'm going mad because of the validation logic for these
# fields!!
if not self.train_task_schedule:
self.train_task_schedule = self.create_train_task_schedule()
elif max(self.train_task_schedule) == len(self.train_task_schedule) - 1:
# If the keys correspond to the task ids rather than the steps:
if self.nb_tasks in [defaults["nb_tasks"], None]:
self.nb_tasks = len(self.train_task_schedule) - 1
if self.nb_tasks < 1:
raise RuntimeError(f"Need at least 2 entries in the task schedule!")
logger.info(
f"Assuming that the last entry in the provided task schedule is "
f"the final state, and that there are {self.nb_tasks} tasks. "
)
self.train_steps_per_task = (
self.train_steps_per_task or self.train_max_steps // self.nb_tasks
)
new_keys = np.linspace(
0, self.train_max_steps, self.nb_tasks + 1, endpoint=True, dtype=int
).tolist()
assert len(new_keys) == len(self.train_task_schedule)
self.train_task_schedule = type(self.train_task_schedule)(
{
new_key: self.train_task_schedule[old_key]
for new_key, old_key in zip(new_keys, sorted(self.train_task_schedule.keys()))
}
)
elif self.smooth_task_boundaries:
# We have a task schedule for Continual RL.
if self.train_max_steps == defaults["train_max_steps"]:
self.train_max_steps = max(self.train_task_schedule)
if self.smooth_task_boundaries:
# NOTE: Need to have an entry at the final step
last_task_step = max(self.train_task_schedule.keys())
last_task = self.train_task_schedule[last_task_step]
if self.train_max_steps not in self.train_task_schedule:
# FIXME Duplicating the last task for now?
self.train_task_schedule[self.train_max_steps] = last_task
if 0 not in self.train_task_schedule.keys():
raise RuntimeError(
"`train_task_schedule` needs an entry at key 0, as the initial state"
)
if self.train_max_steps != max(self.train_task_schedule):
if self.train_max_steps in [defaults["train_max_steps"], None]:
# TODO: This might be wrong no?
self.train_max_steps = max(self.train_task_schedule)
logger.info(f"Setting `train_max_steps` to {self.train_max_steps}")
elif self.smooth_task_boundaries:
raise RuntimeError(
f"For now, the train task schedule needs to have a value at key "
f"`train_max_steps` ({self.train_max_steps})."
)
else:
last_task_step = max(self.train_task_schedule)
last_task = self.train_task_schedule[last_task_step]
logger.debug("Using the last task as the final state.")
self.train_task_schedule[self.train_max_steps] = last_task
if not self.val_task_schedule:
# Avoid creating an additional env, just reuse the train_temp_env.
self._temp_val_env = (
self._temp_train_env
if self.val_dataset == self.train_dataset
else self._make_env(self.val_dataset)
)
self.val_task_schedule = self.create_val_task_schedule()
elif max(self.val_task_schedule) == len(self.val_task_schedule) - 1:
# If the keys correspond to the task ids rather than the transition steps
expected_nb_tasks = len(self.val_task_schedule)
old_keys = sorted(self.val_task_schedule.keys())
new_keys = np.linspace(
0, self.train_max_steps, self.nb_tasks + 1, endpoint=True, dtype=int
).tolist()
assert len(new_keys) == len(self.train_task_schedule)
self.val_task_schedule = type(self.val_task_schedule)(
{
new_key: self.val_task_schedule[old_key]
for new_key, old_key in zip(new_keys, old_keys)
}
)
if not self.test_task_schedule:
self._temp_test_env = (
self._temp_train_env
if self.test_dataset == self.train_dataset
else self._make_env(self.val_dataset)
)
self.test_task_schedule = self.create_test_task_schedule()
elif max(self.test_task_schedule) == len(self.test_task_schedule) - 1:
# If the keys correspond to the task ids rather than the transition steps
old_keys = sorted(self.test_task_schedule.keys())
new_keys = np.linspace(
0, self.test_max_steps, self.nb_tasks + 1, endpoint=True, dtype=int
).tolist()
self.test_task_schedule = type(self.test_task_schedule)(
{
new_key: self.test_task_schedule[old_key]
for new_key, old_key in zip(new_keys, old_keys)
}
)
if 0 not in self.test_task_schedule.keys():
raise RuntimeError("`test_task_schedule` needs an entry at key 0, as the initial state")
if self.test_max_steps != max(self.test_task_schedule):
if self.test_max_steps == defaults["test_max_steps"]:
self.test_max_steps = max(self.test_task_schedule)
logger.info(f"Setting `test_max_steps` to {self.test_max_steps}")
elif self.smooth_task_boundaries:
raise RuntimeError(
f"For now, the test task schedule needs to have a value at key "
f"`test_max_steps` ({self.test_max_steps}). "
)
# Close the temporary environments.
# NOTE: Avoid closing the envs for now in case 'live' envs were passed to the Setting.
if self._temp_train_env:
# self._temp_train_env.close()
pass
if self._temp_val_env and self._temp_val_env is not self._temp_train_env:
# self._temp_val_env.close()
pass
if self._temp_test_env and self._temp_test_env is not self._temp_train_env:
# self._temp_test_env.close()
pass
train_task_lengths: List[int] = [
task_b_step - task_a_step
for task_a_step, task_b_step in pairwise(sorted(self.train_task_schedule.keys()))
]
# TODO: This will crash if nb_tasks is 1, right?
# train_max_steps = train_last_boundary + train_task_lengths[-1]
test_task_lengths: List[int] = [
task_b_step - task_a_step
for task_a_step, task_b_step in pairwise(sorted(self.test_task_schedule.keys()))
]
if not (
len(self.train_task_schedule)
== len(self.test_task_schedule)
== len(self.val_task_schedule)
):
raise RuntimeError(
"Training, validation and testing task schedules should have the same "
"number of items for now."
)
train_last_boundary = max(set(self.train_task_schedule.keys()) - {self.train_max_steps})
test_last_boundary = max(set(self.test_task_schedule.keys()) - {self.test_max_steps})
# TODO: Really annoying validation logic for these fields needs to be simplified
# somehow.
# if self.train_steps_per_task is None:
# # if self.nb_tasks
# train_steps_per_task = self.train_max_steps // self.nb_tasks
# if self.train_task_schedule:
# task_lengths = [
# b - a for a, b in pairwise(self.train_task_schedule.keys())
# ]
# if any(
# abs(task_length - train_steps_per_task) > 1
# for task_length in task_lengths
# ):
# raise RuntimeError(
# f"Trying to set a value for `train_steps_per_task`, but "
# f"the keys of the task schedule are either uneven, or not "
# f"equal to {train_steps_per_task}: "
# f"task schedule keys: {self.train_task_schedule.keys()}"
# )
# self.train_steps_per_task = train_steps_per_task
# FIXME: This is quite confusing:
expected_nb_tasks = len(self.train_task_schedule) - 1
# if (
# self.train_max_steps not in [defaults["train_max_steps"], None]
# and self.train_max_steps == max(self.train_task_schedule)
# ) or self.smooth_task_boundaries:
# expected_nb_tasks -= 1
if self.nb_tasks != expected_nb_tasks:
if self.nb_tasks in [None, defaults["nb_tasks"]]:
assert len(self.train_task_schedule) == len(self.test_task_schedule)
self.nb_tasks = len(self.train_task_schedule) - 1
logger.info(f"`nb_tasks` set to {self.nb_tasks} based on the task schedule")
else:
raise RuntimeError(
f"The passed number of tasks ({self.nb_tasks}) is inconsistent "
f"with train_max_steps ({self.train_max_steps}) and the "
f"passed task schedule (with keys "
f"{self.train_task_schedule.keys()}): "
f"Expected nb_tasks to be None or {expected_nb_tasks}."
)
if not train_task_lengths:
assert not test_task_lengths
assert expected_nb_tasks == 1
assert self.train_max_steps > 0
assert self.test_max_steps > 0
train_max_steps = self.train_max_steps
test_max_steps = self.test_max_steps
else:
train_max_steps = sum(train_task_lengths)
test_max_steps = sum(test_task_lengths)
# train_max_steps = round(train_last_boundary + train_task_lengths[-1])
# test_max_steps = round(test_last_boundary + test_task_lengths[-1])
if self.train_max_steps != train_max_steps:
if self.train_max_steps == defaults["train_max_steps"]:
self.train_max_steps = train_max_steps
else:
raise RuntimeError(
f"Value of train_max_steps ({self.train_max_steps}) is "
f"inconsistent with the given train task schedule, which has "
f"the last task boundary at step {train_last_boundary}, with "
f"task lengths of {train_task_lengths}, as it suggests the maximum "
f"total number of steps to be {train_last_boundary} + "
f"{train_task_lengths[-1]} => {train_max_steps}!"
)
if self.test_max_steps != test_max_steps:
if self.test_max_steps == defaults["test_max_steps"]:
self.test_max_steps = test_max_steps
else:
raise RuntimeError(
f"Value of test_max_steps ({self.test_max_steps}) is "
f"inconsistent with the given test task schedule (which has keys "
f"{self.test_task_schedule.keys()}). Expected the last key to be "
f"{test_max_steps}"
)
if self.train_steps_per_task is None:
self.train_steps_per_task = self.train_max_steps // self.nb_tasks
# TODO: Fix these annoying interactions once and for all.
assert self.train_max_steps // self.nb_tasks == self.train_steps_per_task, (
self.train_max_steps,
self.nb_tasks,
self.train_steps_per_task,
self.train_task_schedule.keys(),
)
if self.test_steps_per_task is None:
self.test_steps_per_task = self.test_max_steps // self.nb_tasks
assert self.test_max_steps // self.nb_tasks == self.test_steps_per_task, (
self.test_max_steps,
self.nb_tasks,
self.test_steps_per_task,
self.test_task_schedule.keys(),
)
def create_train_task_schedule(self) -> TaskSchedule:
# change_steps = [0, self.train_max_steps]
# Ex: nb_tasks == 5, train_max_steps = 10_000:
# change_steps = [0, 2_000, 4_000, 6_000, 8_000, 10_000]
if self.train_steps_per_task is not None:
train_max_steps = self.train_steps_per_task * self.nb_tasks
# if self.smooth_task_boundaries:
# train_max_steps = self.train_steps_per_task * self.nb_tasks
# else:
# train_max_steps = self.train_steps_per_task * self.nb_tasks
else:
train_max_steps = self.train_max_steps
assert self.nb_tasks is not None
task_schedule_keys = np.linspace(
0, train_max_steps, self.nb_tasks + 1, endpoint=True, dtype=int
).tolist()
return self.create_task_schedule(
temp_env=self._temp_train_env,
change_steps=task_schedule_keys,
# # TODO: Add properties for the train/valid/test seeds?
seed=self.config.seed if self.config else 123,
)
def create_val_task_schedule(self) -> TaskSchedule:
# Always the same as train task schedule for now.
return self.train_task_schedule.copy()
def create_test_task_schedule(self) -> TaskSchedule[ContinuousTask]:
# Re-scale the steps in the task schedule based on self.test_max_steps
# NOTE: Using the same task schedule as in training and validation for now.
if self.train_task_schedule:
nb_tasks = len(self.train_task_schedule) - 1
else:
nb_tasks = self.nb_tasks
# TODO: Do we want to re-allow the `test_steps_per_task` argument?
if self.test_steps_per_task is not None:
test_max_steps = self.test_steps_per_task * nb_tasks
else:
test_max_steps = self.test_max_steps
test_task_schedule_keys = np.linspace(
0, test_max_steps, nb_tasks + 1, endpoint=True, dtype=int
).tolist()
return {
step: task
for step, task in zip(test_task_schedule_keys, self.train_task_schedule.values())
}
def create_task_schedule(
self,
temp_env: gym.Env,
change_steps: List[int],
seed: int = None,
) -> Dict[int, Dict]:
"""Create the task schedule, which maps from a step to the changes that
will occur in the environment when that step is reached.
Uses the provided `temp_env` to generate the random tasks at the steps
given in `change_steps` (a list of integers).
Returns a dictionary mapping from integers (the steps) to the changes
that will occur in the env at that step.
TODO: For now in ContinualRL we use an interpolation of a dict of attributes
to be set on the unwrapped env, but in IncrementalRL it is possible to pass
callables to be applied on the environment at a given timestep.
"""
task_schedule: Dict[int, Dict] = {}
# TODO: Make it possible to use something other than steps as keys in the task
# schedule, something like a NamedTuple[int, DeltaType], e.g. Episodes(10) or Steps(10)
# something like that!
# IDEA: Even fancier, we could use a TimeDelta to say "do one hour of task 0"!!
for step in change_steps:
# TODO: Pass wether its for training/validation/testing?
task = type(self)._task_sampling_function(
temp_env,
step=step,
change_steps=change_steps,
seed=seed,
)
task_schedule[step] = task
return task_schedule
@property
def observation_space(self) -> TypedDictSpace:
"""The un-batched observation space, based on the choice of dataset and
the transforms at `self.transforms` (which apply to the train/valid/test
environments).
The returned spaces is a TypedDictSpace, with the following properties/items:
- `x`: observation space (e.g. `Image` space)
- `task_labels`: Union[Discrete, Sparse[Discrete]]
The task labels for each sample when task labels are available,
otherwise the task labels space is `Sparse`, and entries will be `None`.
"""
# TODO: Is it right that we set the observation space on the Setting to be the
# observation space of the current train environment?
# In what situation could there be any difference between those?
# - Changing the 'transforms' attributes after training?
# if self.train_env is not None:
# # assert self._observation_space == self.train_env.observation_space
# return self.train_env.observation_space
if isinstance(self._temp_train_env.observation_space, TypedDictSpace):
x_space = self._temp_train_env.observation_space.x
task_label_space = self._temp_train_env.observation_space.task_labels
else:
x_space = self._temp_train_env.observation_space
# apply the transforms to the observation space.
for transform in self.transforms:
x_space = transform(x_space)
task_label_space = self.task_label_space
done_space = spaces.Box(0, 1, shape=(), dtype=bool)
if not self.add_done_to_observations:
done_space = Sparse(done_space, sparsity=1)
observation_space = TypedDictSpace(
x=x_space,
task_labels=task_label_space,
done=done_space,
dtype=self.Observations,
)
if self.prefer_tensors:
observation_space = add_tensor_support(observation_space)
assert isinstance(observation_space, TypedDictSpace)
return observation_space
@property
def task_label_space(self) -> gym.Space:
# TODO: Explore an alternative design for the task sampling, based more around
# gym spaces rather than the generic function approach that's currently used?
# FIXME: This isn't really elegant, there isn't a `nb_tasks` attribute on the
# ContinualRLSetting anymore, so we have to do a bit of a hack.. Would be
# cleaner to maybe put this in the assumption class, under
# `self.task_label_space`?
task_label_space = spaces.Box(0.0, 1.0, shape=())
if not self.task_labels_at_train_time or not self.task_labels_at_test_time:
sparsity = 1
if self.task_labels_at_train_time ^ self.task_labels_at_test_time:
# We have task labels "50%" of the time, ish:
sparsity = 0.5
task_label_space = Sparse(task_label_space, sparsity=sparsity)
return task_label_space
@property
def action_space(self) -> gym.Space:
# TODO: Convert the action/reward spaces so they also use TypedDictSpace (even
# if they just have one item), so that it correctly reflects the objects that
# the envs accept.
y_pred_space = self._temp_train_env.action_space
# action_space = TypedDictSpace(y_pred=y_pred_space, dtype=self.Actions)
return y_pred_space
@property
def reward_space(self) -> gym.Space:
reward_range = self._temp_train_env.reward_range
return getattr(
self._temp_train_env,
"reward_space",
spaces.Box(reward_range[0], reward_range[1], shape=()),
)
def apply(self, method: Method, config: Config = None) -> "ContinualRLSetting.Results":
"""Apply the given method on this setting to producing some results."""
# Use the supplied config, or parse one from the arguments that were
# used to create `self`.
self.config = config or self._setup_config(method)
logger.debug(f"Config: {self.config}")
# TODO: Test to make sure that this doesn't cause any other bugs with respect to
# the display of stuff:
# Call this method, which creates a virtual display if necessary.
self.config.get_display()
# TODO: Should we really overwrite the method's 'config' attribute here?
if not getattr(method, "config", None):
method.config = self.config
# TODO: Remove `Setting.configure(method)` entirely, from everywhere,
# and use the `prepare_data` or `setup` methods instead (since these
# `configure` methods aren't using the `method` anyway.)
method.configure(setting=self)
# BUG This won't work if the task schedule uses callables as the values (as
# they aren't json-serializable.)
if self.stationary_context:
logger.info(
"Train tasks: " + json.dumps(list(self.train_task_schedule.values()), indent="\t")
)
else:
try:
logger.info(
"Train task schedule:" + json.dumps(self.train_task_schedule, indent="\t")
)
# BUG: Sometimes the task schedule isnt json-serializable!
except TypeError:
logger.info("Train task schedule: ")
for key, value in self.train_task_schedule.items():
logger.info(f"{key}: {value}")
if self.config.debug:
logger.debug("Test task schedule:" + json.dumps(self.test_task_schedule, indent="\t"))
# Run the Training loop (which is defined in ContinualAssumption).
results = self.main_loop(method)
logger.info("Results summary:")
logger.info(results.to_log_dict())
logger.info(results.summary())
method.receive_results(self, results=results)
return results
# Run the Test loop (which is defined in IncrementalAssumption).
# results: RlResults = self.test_loop(method)
def setup(self, stage: str = None) -> None:
# Called before the start of each task during training, validation and
# testing.
super().setup(stage=stage)
if stage in {"fit", None}:
self.train_wrappers = self.create_train_wrappers()
if stage in {"validate", None}:
self.valid_wrappers = self.create_valid_wrappers()
elif stage in {"test", None}:
self.test_wrappers = self.create_test_wrappers()
def prepare_data(self, *args, **kwargs) -> None:
# We don't really download anything atm.
if self.config is None:
self.config = Config()
super().prepare_data(*args, **kwargs)
def train_dataloader(
self, batch_size: int = None, num_workers: int = None
) -> ActiveEnvironment:
"""Create a training gym.Env/DataLoader for the current task.
Parameters
----------
batch_size : int, optional
The batch size, which in this case is the number of environments to
run in parallel. When `None`, the env won't be vectorized. Defaults
to None.
num_workers : int, optional
The number of workers (processes) to use in the vectorized env. When
None, the envs are run in sequence, which could be very slow. Only
applies when `batch_size` is not None. Defaults to None.
Returns
-------
GymDataLoader
A (possibly vectorized) environment/dataloader for the current task.
"""
if not self.has_prepared_data:
self.prepare_data()
# NOTE: We actually want to call setup every time, so we re-create the
# wrappers for each task.
self.setup("fit")
batch_size = batch_size or self.batch_size
num_workers = num_workers if num_workers is not None else self.num_workers
train_seed = self.config.seed if self.config else None
env_factory = partial(
self._make_env,
base_env=self.train_dataset,
wrappers=self.train_wrappers,
**self.base_env_kwargs,
)
env_dataloader = self._make_env_dataloader(
env_factory,
batch_size=batch_size,
num_workers=num_workers,
max_steps=self.steps_per_phase,
max_episodes=self.train_max_episodes,
seed=train_seed,
)
if self.monitor_training_performance:
# NOTE: It doesn't always make sense to log stuff with the current task ID!
wandb_prefix = "Train"
if self.known_task_boundaries_at_train_time:
wandb_prefix += f"/Task {self.current_task_id}"
env_dataloader = MeasureRLPerformanceWrapper(env_dataloader, wandb_prefix=wandb_prefix)
if self.config.render and batch_size is None:
env_dataloader = RenderEnvWrapper(env_dataloader)
self.train_env = env_dataloader
# BUG: There is a mismatch between the train env's observation space and the
# shape of its observations.
# self.observation_space = self.train_env.observation_space
return self.train_env
def val_dataloader(self, batch_size: int = None, num_workers: int = None) -> Environment:
"""Create a validation gym.Env/DataLoader for the current task.
Parameters
----------
batch_size : int, optional
The batch size, which in this case is the number of environments to
run in parallel. When `None`, the env won't be vectorized. Defaults
to None.
num_workers : int, optional
The number of workers (processes) to use in the vectorized env. When
None, the envs are run in sequence, which could be very slow. Only
applies when `batch_size` is not None. Defaults to None.
Returns
-------
GymDataLoader
A (possibly vectorized) environment/dataloader for the current task.
"""
if not self.has_prepared_data:
self.prepare_data()
# Need to force this to happen every time, because the wrappers might change
# between tasks.
self._has_setup_validate = False
self.setup("validate")
env_factory = partial(
self._make_env,
base_env=self.val_dataset,
wrappers=self.valid_wrappers,
**self.base_env_kwargs,
)
valid_seed = self.config.seed if self.config else None
env_dataloader = self._make_env_dataloader(
env_factory,
batch_size=batch_size or self.batch_size,
num_workers=num_workers if num_workers is not None else self.num_workers,
max_steps=self.steps_per_phase,
# TODO: Create a new property to limit validation episodes?
max_episodes=self.train_max_episodes,
seed=valid_seed,
)
if self.monitor_training_performance:
# NOTE: We also add it here, just so it logs metrics to wandb.
# NOTE: It doesn't always make sense to log stuff with the current task ID!
wandb_prefix = "Valid"
if self.known_task_boundaries_at_train_time:
wandb_prefix += f"/Task {self.current_task_id}"
env_dataloader = MeasureRLPerformanceWrapper(env_dataloader, wandb_prefix=wandb_prefix)
self.val_env = env_dataloader
return self.val_env
def test_dataloader(self, batch_size: int = None, num_workers: int = None) -> TestEnvironment:
"""Create the test 'dataloader/gym.Env' for all tasks.
NOTE: This test environment isn't just for the current task, it actually
contains the sequence of all tasks. This is different than the train or
validation environments, since if the task labels are available at train
time, then calling train/valid_dataloader` returns the envs for the
current task only, and the `.fit` method is called once per task.
This environment is also different in that it is wrapped with a Monitor,
which we might eventually use to save the results/gifs/logs of the
testing runs.
Parameters
----------
batch_size : int, optional
The batch size, which in this case is the number of environments to
run in parallel. When `None`, the env won't be vectorized. Defaults
to None.
num_workers : int, optional
The number of workers (processes) to use in the vectorized env. When
None, the envs are run in sequence, which could be very slow. Only
applies when `batch_size` is not None. Defaults to None.
Returns
-------
TestEnvironment
A testing environment which keeps track of the performance of the
actor and accumulates logs/statistics that are used to eventually
create the 'Result' object.
"""
if not self.has_prepared_data:
self.prepare_data()
# NOTE: New for PL: The call doesn't go through if self._has_setup_test is True
# Need to force this to happen every time, because the wrappers might change
# between tasks.
self._has_setup_test = False
self.setup("test")
# BUG: gym.wrappers.Monitor doesn't want to play nice when applied to
# Vectorized env, it seems..
# FIXME: Remove this when the Monitor class works correctly with
# batched environments.
batch_size = batch_size or self.batch_size
if batch_size is not None:
logger.warning(
UserWarning(
colorize(
f"WIP: Only support batch size of `None` (i.e., a single env) "
f"for the test environments of RL Settings at the moment, "
f"because the Monitor class from gym doesn't work with "
f"VectorEnvs. (batch size was {batch_size})",
"yellow",
)
)
)
batch_size = None
num_workers = num_workers if num_workers is not None else self.num_workers
test_seed = self.config.seed if self.config else None
env_factory = partial(
self._make_env,
base_env=self.test_dataset,
wrappers=self.test_wrappers,
**self.base_env_kwargs,
)
# TODO: Pass the max_steps argument to this `_make_env_dataloader` method,
# rather than to a `step_limit` on the TestEnvironment.
env_dataloader = self._make_env_dataloader(
env_factory,
batch_size=batch_size,
num_workers=num_workers,
)
if self.test_max_episodes is not None:
raise NotImplementedError(f"TODO: Use `self.test_max_episodes`")
test_loop_max_steps = self.test_max_steps // (batch_size or 1)
# TODO: Find where to configure this 'test directory' for the outputs of
# the Monitor.
if wandb.run:
test_dir = wandb.run.dir
else:
test_dir = self.config.log_dir
# TODO: Split this up into an ActionLimit wrapper, a RecordVideo wrapper,
# and a RecordEpisodeStatistics wrapper.
self.test_env = self.TestEnvironment(
env_dataloader,
task_schedule=self.test_task_schedule,
directory=test_dir,
step_limit=test_loop_max_steps,
config=self.config,
force=True,
video_callable=None if wandb.run or self.config.render else False,
)
self.test_env.seed(seed=test_seed)
self.test_env.action_space.seed(seed=test_seed)
self.test_env.observation_space.seed(seed=test_seed)
return self.test_env
@property
def phases(self) -> int:
"""The number of training 'phases', i.e. how many times `method.fit` will be
called.
In the case of ContinualRL and DiscreteTaskAgnosticRL, fit is only called once,
with an environment that shifts between all the tasks. In IncrementalRL, fit is
called once per task, while in TraditionalRL and MultiTaskRL, fit is called
once.
"""
return 1
@property
def steps_per_phase(self) -> Optional[int]:
"""Returns the number of steps per training "phase", i.e. the max number of
(steps for now) that can be taken in the training environment passed to
`Method.fit`
In most settings, this is the same as `steps_per_task`.
Returns
-------
Optional[int]
`None` if `max_steps` is None, else `max_steps // phases`.
"""
return None if self.train_max_steps is None else self.train_max_steps // self.phases
@staticmethod
def _make_env(
base_env: Union[str, gym.Env, Callable[[], gym.Env]],
wrappers: List[Callable[[gym.Env], gym.Env]] = None,
**base_env_kwargs: Dict,
) -> gym.Env:
"""Helper function to create a single (non-vectorized) environment."""
env: gym.Env
if isinstance(base_env, str):
env = gym.make(base_env, **base_env_kwargs)
elif isinstance(base_env, gym.Env):
env = base_env
elif callable(base_env):
env = base_env(**base_env_kwargs)
else:
raise RuntimeError(
f"base_env should either be a string, a callable, or a gym "
f"env. (got {base_env})."
)
wrappers = wrappers or []
for wrapper in wrappers:
env = wrapper(env)
return env
def _make_env_dataloader(
self,
env_factory: Callable[[], gym.Env],
batch_size: Optional[int],
num_workers: Optional[int] = None,
seed: Optional[int] = None,
max_steps: Optional[int] = None,
max_episodes: Optional[int] = None,
) -> GymDataLoader:
"""Helper function for creating a (possibly vectorized) environment."""
logger.debug(f"batch_size: {batch_size}, num_workers: {num_workers}, seed: {seed}")
env: Union[gym.Env, gym.vector.VectorEnv]
if batch_size is None:
env = env_factory()
else:
env = make_batched_env(
env_factory,
batch_size=batch_size,
num_workers=num_workers,
# TODO: Still debugging shared memory + custom spaces (e.g. Sparse).
shared_memory=False,
)
if max_steps:
env = ActionLimit(env, max_steps=max_steps)
if max_episodes:
env = EpisodeLimit(env, max_episodes=max_episodes)
# Apply the "post-batch" wrappers:
# from sequoia.common.gym_wrappers import ConvertToFromTensors
# TODO: Only the BaseMethod requires this, we should enable it only
# from the BaseMethod, and leave it 'off' by default.
if self.add_done_to_observations:
env = AddDoneToObservation(env)
if self.prefer_tensors and self.config.device:
# TODO: Put this before or after the image transforms?
env = TransformObservation(env, f=partial(move, device=self.config.device))
env = TransformReward(env, f=partial(move, device=self.config.device))
# # Convert the samples to tensors and move them to the right device.
# env = ConvertToFromTensors(env)
# env = ConvertToFromTensors(env, device=self.config.device)
# Add a wrapper that converts numpy arrays / etc to Observations/Rewards
# and from Actions objects to numpy arrays.
env = TypedObjectsWrapper(
env,
observations_type=self.Observations,
rewards_type=self.Rewards,
actions_type=self.Actions,
)
# Create an IterableDataset from the env using the EnvDataset wrapper.
dataset = EnvDataset(env)
# Create a GymDataLoader for the EnvDataset.
env_dataloader = GymDataLoader(dataset)
if batch_size and seed:
# Seed each environment with its own seed (based on the base seed).
env.seed([seed + i for i in range(env_dataloader.num_envs)])
else:
env.seed(seed)
env.action_space.seed(seed)
env.observation_space.seed(seed)
return env_dataloader
def create_train_wrappers(self) -> List[Callable[[gym.Env], gym.Env]]:
"""Get the list of wrappers to add to each training environment.
The result of this method must be pickleable when using
multiprocessing.
Returns
-------
List[Callable[[gym.Env], gym.Env]]
[description]
"""
# We add a restriction to prevent users from getting data from
# previous or future tasks.
# NOTE: This assumes that tasks all have the same length.
return self._make_wrappers(
base_env=self.train_dataset,
task_schedule=self.train_task_schedule,
# TODO: Removing this, but we have to check that it doesn't change when/how
# the task boundaries are given to the Method.
# sharp_task_boundaries=self.known_task_boundaries_at_train_time,
task_labels_available=self.task_labels_at_train_time,
transforms=self.transforms + self.train_transforms,
starting_step=0,
max_steps=self.train_max_steps,
new_random_task_on_reset=self.stationary_context,
)
def create_valid_wrappers(self) -> List[Callable[[gym.Env], gym.Env]]:
"""Get the list of wrappers to add to each validation environment.
The result of this method must be pickleable when using
multiprocessing.
Returns
-------
List[Callable[[gym.Env], gym.Env]]
[description]
TODO: Decide how this 'validation' environment should behave in
comparison with the train and test environments.
"""
return self._make_wrappers(
base_env=self.val_dataset,
task_schedule=self.val_task_schedule,
# sharp_task_boundaries=self.known_task_boundaries_at_train_time,
task_labels_available=self.task_labels_at_train_time,
transforms=self.transforms + self.val_transforms,
starting_step=0,
# TODO: Should there be a limit on the validation steps/episodes?
max_steps=self.train_max_steps,
new_random_task_on_reset=self.stationary_context,
)
def create_test_wrappers(self) -> List[Callable[[gym.Env], gym.Env]]:
"""Get the list of wrappers to add to a single test environment.
The result of this method must be pickleable when using
multiprocessing.
Returns
-------
List[Callable[[gym.Env], gym.Env]]
[description]
"""
return self._make_wrappers(
base_env=self.test_dataset,
task_schedule=self.test_task_schedule,
# sharp_task_boundaries=self.known_task_boundaries_at_test_time,
task_labels_available=self.task_labels_at_test_time,
transforms=self.transforms + self.test_transforms,
starting_step=0,
max_steps=self.test_max_steps,
new_random_task_on_reset=self.stationary_context,
)
def _make_wrappers(
self,
base_env: Union[str, gym.Env, Callable[[], gym.Env]],
task_schedule: Dict[int, Dict],
# sharp_task_boundaries: bool,
task_labels_available: bool,
transforms: List[Transforms] = None,
starting_step: int = None,
max_steps: int = None,
new_random_task_on_reset: bool = False,
) -> List[Callable[[gym.Env], gym.Env]]:
"""helper function for creating the train/valid/test wrappers.
These wrappers get applied *before* the batching, if applicable.
"""
wrappers: List[Callable[[gym.Env], gym.Env]] = []
# TODO: Add some kind of Wrapper around the dataset to make it
# semi-supervised?
if self.max_episode_steps:
wrappers.append(partial(TimeLimit, max_episode_steps=self.max_episode_steps))
# NOTE: Removing this 'ActionLimit' from the 'pre-batch' wrappers.
# wrappers.append(partial(ActionLimit, max_steps=max_steps))
# if is_classic_control_env(base_env):
# If we are in a classic control env, and we dont want the state to
# be fully-observable (i.e. we want pixel observations rather than
# getting the pole angle, velocity, etc.), then add the
# PixelObservation wrapper to the list of wrappers.
# if self.force_pixel_observations:
# wrappers.append(PixelObservationWrapper)
# TODO: Temporary fix for the `is_atari_env` function, which is used to check if the env
# needs a `AtariPreprocessing` wrapper added.
if isinstance(base_env, (str, gym.Env)) and is_atari_env(base_env):
# TODO: Figure out the differences (if there are any) between the
# AtariWrapper from SB3 and the AtariPreprocessing wrapper from gym.
wrappers.append(GymAtariWrapper)
if transforms:
# Apply image transforms if the env will have image-like obs space
# Wrapper to 'wrap' the observation space into an Image space (subclass of
# Box with useful fields like `c`, `h`, `w`, etc.)
wrappers.append(ImageObservations)
# Wrapper to apply the image transforms to the env.
wrappers.append(partial(TransformObservation, f=transforms))
if task_schedule is not None:
# Add a wrapper which will add non-stationarity to the environment.
# The "task" transitions will either be sharp or smooth.
# In either case, the task ids for each sample are added to the
# observations, and the dicts containing the task information (e.g. the
# current values of the env attributes from the task schedule) get added
# to the 'info' dicts.
nb_tasks = None
if self.smooth_task_boundaries:
# Add a wrapper that creates smooth tasks.
cl_wrapper = SmoothTransitions
else:
assert self.nb_tasks >= 1
# Add a wrapper that creates sharp tasks.
# NOTE: The naming here is less than ideal! This isn't "multi-task" as-in stationary
# by default. It just means an env which can do multiple tasks. However, when the
# `new_random_task_on_reset` argument is set, then it does sample tasks IID.
cl_wrapper = MultiTaskEnvironment
nb_tasks = self.nb_tasks
assert starting_step is not None
assert max_steps is not None
wrappers.append(
partial(
cl_wrapper,
noise_std=self.task_noise_std,
task_schedule=task_schedule,
add_task_id_to_obs=True,
add_task_dict_to_info=False,
starting_step=starting_step,
nb_tasks=nb_tasks,
new_random_task_on_reset=new_random_task_on_reset,
max_steps=max_steps,
)
)
# If the task labels aren't available, we then add another wrapper that
# hides that information (setting both of them to None) and also marks
# those spaces as `Sparse`.
if not task_labels_available:
# NOTE: This sets the task labels to None, rather than removing
# them entirely.
# wrappers.append(RemoveTaskLabelsWrapper)
wrappers.append(HideTaskLabelsWrapper)
return wrappers
def _get_objective_scaling_factor(self) -> float:
"""Return the factor to be multiplied with the mean reward per episode
in order to produce a 'performance score' between 0 and 1.
Returns
-------
float
The scaling factor to use.
"""
# TODO: remove this, currently used just so we can get a 'scaling factor' to use
# to scale the 'mean reward per episode' to a score between 0 and 1.
# TODO: Add other environments, for instance 1/200 for cartpole.
# TODO: Rework this so its based on the reward threshold!
max_reward_per_episode = 1
if isinstance(self.dataset, str) and self.dataset.startswith("MetaMonsterKong"):
max_reward_per_episode = 100
elif isinstance(self.dataset, str) and self.dataset == "CartPole-v0":
max_reward_per_episode = 200
else:
warnings.warn(
RuntimeWarning(
f"Unable to determine the right scaling factor to use for dataset "
f"{self.dataset} when calculating the performance score! "
f"The CL Score of this run will most probably not be accurate."
)
)
return 1 / max_reward_per_episode
def _get_simple_name(self, env_name_or_id: str) -> Optional[str]:
"""Returns the 'simple name' for the given environment ID.
For example, when passed "CartPole-v0", returns "cartpole".
When not found, returns None.
"""
if env_name_or_id in self.available_datasets.keys():
return env_name_or_id
if env_name_or_id in self.available_datasets.values():
simple_name: str = [
k for k, v in self.available_datasets.items() if v == env_name_or_id
][0]
return simple_name
return None
def _load_task_schedule(file_path: Path) -> Dict[int, Dict]:
"""Load a task schedule from the given path."""
with open(file_path) as f:
task_schedule = json.load(f)
return {int(k): task_schedule[k] for k in sorted(task_schedule.keys())}
if __name__ == "__main__":
ContinualRLSetting.main()
def find_matching_dataset(
available_datasets: Dict[str, Union[str, Any]], dataset: str
) -> Optional[Union[str, Any]]:
"""Compares `dataset` with the keys in the `available_datasets` dict and return the
value of the matching key if found, else returns None.
"""
if dataset in available_datasets:
return available_datasets[dataset]
if not isinstance(dataset, str):
raise NotImplementedError(dataset)
chosen_env_name, _, chosen_version = dataset.partition("-v")
for key, env_id in available_datasets.items():
if dataset == key:
assert False, "this should be reached, since we do that check above"
env_name, _, env_version = key.partition("-v")
if chosen_version:
# chosen: half_cheetah
# key: HalfCheetah-v2
# HalfCheetah-v2
# halfcheetah-v2
# half_cheetah_v2
if chosen_version != env_version:
continue
if names_match(chosen_env_name, env_name):
return env_id
elif names_match(chosen_env_name, env_name):
# Look for matching entries with that name, and select the highest
# available version.
datasets_with_that_name = {
other_key: other_env_id
for other_key, other_env_id in available_datasets.items()
if names_match(chosen_env_name, other_key.partition("-v")[0])
}
if len(datasets_with_that_name) == 1:
return env_id
versions = {
other_key: int(other_key.partition("-v")[-1])
for other_key in datasets_with_that_name
}
return max(datasets_with_that_name, key=versions.get)
closest_matches = difflib.get_close_matches(dataset, available_datasets)
if closest_matches:
closest_match_key: str = closest_matches[0]
closest_match: Union[str, Any] = available_datasets[closest_match_key]
if chosen_version:
# Find the 'version' number of the closest match, and check that it fits.
closest_match_version = closest_match_key.partition("-v")[-1]
if not closest_match_version:
assert isinstance(closest_match, str)
closest_match_version = closest_match.partition("-v")[-1]
if chosen_version == closest_match_version:
return closest_match
raise gym.error.UnregisteredEnv(
f"Can't find any matching entries for chosen dataset {dataset} "
f"with that same version (closest entries: {closest_matches}) "
)
warnings.warn(
RuntimeWarning(
f"Can't find matching entry for chosen dataset {dataset}, using "
f"closest match: {closest_match}"
)
)
return closest_match
# raise RuntimeError(f"Can't find any matching entries for chosen dataset {dataset}. "
# f"Closest entries: {closest_matches}")
raise gym.error.UnregisteredEnv(
f"Can't find any matching entries for chosen dataset {dataset}."
)
# assert False, (dataset, closest_matches)
================================================
FILE: sequoia/settings/rl/continual/setting_test.py
================================================
import dataclasses
from dataclasses import asdict, is_dataclass, replace
from functools import partial, singledispatch
from pathlib import Path
from typing import Any, Callable, Union, ClassVar, Dict, List, Optional, Sequence, Type
import typing
import gym
import matplotlib.pyplot as plt
import numpy as np
import pytest
from gym import spaces
from gym.vector.utils import batch_space
from sequoia.common.config import Config
from sequoia.common.spaces import TypedDictSpace
from sequoia.common.spaces.sparse import Sparse
from sequoia.conftest import (
MUJOCO_INSTALLED,
mujoco_required,
param_requires_monsterkong,
param_requires_mujoco,
)
from sequoia.settings.assumptions.incremental_test import DummyMethod as _DummyMethod
from sequoia.settings.base.setting_test import SettingTests
from sequoia.settings.rl.incremental.setting import IncrementalRLSetting
from sequoia.settings.rl.setting_test import DummyMethod
from sequoia.utils.utils import pairwise, take
from sequoia.settings.base import Setting
from .setting import ContinualRLSetting
@pytest.mark.parametrize(
"dataset",
[
"CartPole-v8",
"Breakout-v9",
param_requires_mujoco("Ant-v0"),
param_requires_monsterkong("MetaMonsterKong-v0"),
],
)
def test_passing_unsupported_dataset_raises_error(dataset: Any):
with pytest.raises((gym.error.Error, NotImplementedError)):
_ = ContinualRLSetting(dataset=dataset)
def test_acrobot_attributes_change_over_time():
from sequoia.settings.rl.setting_test import CheckAttributesWrapper
from sequoia.settings.rl.wrappers import MeasureRLPerformanceWrapper
from sequoia.settings.rl.continual.environment import GymDataLoader
from sequoia.common.gym_wrappers.env_dataset import EnvDataset
from sequoia.settings.rl.wrappers import TypedObjectsWrapper
from sequoia.common.gym_wrappers.action_limit import ActionLimit
from sequoia.settings.rl.wrappers import HideTaskLabelsWrapper
from sequoia.common.gym_wrappers.smooth_environment import SmoothTransitions
task_schedule = {
0: {
"LINK_LENGTH_1": 1.0,
"LINK_LENGTH_2": 1.0,
"LINK_MASS_1": 1.0,
"LINK_MASS_2": 1.0,
"LINK_COM_POS_1": 0.5,
"LINK_COM_POS_2": 0.5,
"LINK_MOI": 1.0,
},
100: {
"LINK_LENGTH_1": 1.077662352662672,
"LINK_LENGTH_2": 1.0029158956681965,
"LINK_MASS_1": 1.284506509206828,
"LINK_MASS_2": 1.3452415995540132,
"LINK_COM_POS_1": 0.3838164987591757,
"LINK_COM_POS_2": 0.6022014573018389,
"LINK_MOI": 0.866228909018773,
},
200: {
"LINK_LENGTH_1": 0.9787461324812216,
"LINK_LENGTH_2": 1.1761685623559348,
"LINK_MASS_1": 1.0598898754474704,
"LINK_MASS_2": 1.1760598598046939,
"LINK_COM_POS_1": 0.4523967193123413,
"LINK_COM_POS_2": 0.4100516516032442,
"LINK_MOI": 1.010250702300972,
},
}
from .objects import Observations
attributes = list(task_schedule[0].keys())
assert Observations is ContinualRLSetting.Observations
max_steps = 200
max_episode_steps = 10
# List of w
wrapper_fns = []
from gym.envs.classic_control.acrobot import AcrobotEnv
from gym.wrappers import TimeLimit
base_env: AcrobotEnv = gym.make("Acrobot-v1") # type: ignore
base_env = AcrobotEnv()
base_env = TimeLimit(base_env, max_episode_steps=max_episode_steps)
env = wrap(
base_env,
lambda env: SmoothTransitions(
env,
task_schedule=task_schedule,
add_task_id_to_obs=True,
only_update_on_episode_end=False,
),
HideTaskLabelsWrapper,
lambda env: ActionLimit(env, max_steps=10_000),
lambda env: TypedObjectsWrapper(
env,
observations_type=ContinualRLSetting.Observations,
# observation_space=TypedDictSpace(x:Box([ -1. -1. -1. -1. -12.566371 -28.274334], [ 1. 1. 1. ...one:Sparse(Box(False, True, (), bool), sparsity=1), dtype=)
observation_space=TypedDictSpace(
x=spaces.Box(
np.asfarray([-1.0, -1.0, -1.0, -1.0, -12.566371, -28.274334]),
np.asfarray([1.0, 1.0, 1.0, 1.0, 12.566371, 28.274334]),
(6,),
np.float32,
),
task_labels=Sparse(spaces.Box(0.0, 1.0, (), np.float32), sparsity=1),
done=Sparse(spaces.Box(False, True, (), bool), sparsity=1),
dtype=Observations,
),
action_space=spaces.Discrete(3),
actions_type=ContinualRLSetting.Actions,
rewards_type=ContinualRLSetting.Rewards,
reward_space=spaces.Box(-np.inf, np.inf, (), np.float32),
),
EnvDataset,
GymDataLoader,
MeasureRLPerformanceWrapper,
lambda env: CheckAttributesWrapper(env, attributes=attributes),
)
import itertools
env.seed(123)
episodes = max_steps // max_episode_steps
done = False
total_steps = 0
for episode in range(episodes):
obs = env.reset()
done = False
step: int = 0
for step in itertools.count():
action = env.action_space.sample()
obs, reward, done, info = env.step(action)
total_steps += 1
link_length_1 = env.LINK_LENGTH_1
if done:
break
current_values = env.values[max(env.values)]
# assert current_values == env.current_task # NOTE: A bit too fine-grained. This is slightly different.
print(
f"End of episode {episode} at step {total_steps} (lasted {step} steps): \n\t{current_values}"
)
values_at_each_step = env.values
for attribute in attributes:
train_values: List[float] = [
values_dict[attribute] for step, values_dict in values_at_each_step.items()
]
# We store the values before and after each step, so it's fine if they are the same at that last
# step.
assert train_values[0] == train_values[1]
assert len(train_values) == len(set(train_values)) + 1
from typing import TypeVar
E = TypeVar("E", bound=gym.Env)
W = TypeVar("W", bound=gym.Wrapper)
def wrap(
env: E, *wrapper_fns: Union[Type[W], Callable[[Union[E, W]], W]]
) -> Union[E, W, Union[W, E]]:
"""Wraps the environment `env` with the provided wrapper types or wrapper functions.
The wrapper functions are applied in order to `env`, meaning the first item is the innermost
wrapper, and the last item in `wrapper_fns` is the outermost wrapper.
Parameters
----------
env : E
[description]
Returns
-------
Union[W, E]
[description]
"""
wrapped_env: Union[W, E] = env
for wrapper_fn in wrapper_fns:
wrapped_env = wrapper_fn(wrapped_env)
if typing.TYPE_CHECKING:
assert isinstance(wrapped_env, (E, W))
return wrapped_env
def wrap_reversed(
env: E, *wrapper_fns: Union[Type[W], Callable[[Union[E, W]], W]]
) -> Union[E, W, Union[W, E]]:
return wrap(env, *reversed(wrapper_fns))
@singledispatch
def _equal(a: Any, b: Any) -> bool:
"""Utility function used to check if two thing are equal.
NOTE: This is only really useful/necessary because `functools.partial` objects can be present
as attributes on the setting, usually either in the task schedule (or in the
[train/val/test]_envs for the IncrementalRLSetting subclasses).
The `functools.partial` class doesn't support equality: two partial objects with the same funcs,
args and kwargs are still not considered equal for some reason.
This function has a special handler for `partial` objects, so that they are considered equal if
and only if their funcs, args and keywords are the same.
This makes it possible to easily check for equality between settings, which is used for example
in the tests below.
"""
if is_dataclass(a):
return is_dataclass(b) and _equal(asdict(a), asdict(b))
return a == b
@_equal.register
def _partials_equal(a: partial, b: partial) -> bool:
# NOTE: Using the recursive call so we can compare nested partials.
return (
isinstance(b, partial)
and _equal(a.func, b.func)
and _equal(a.args, b.args)
and _equal(a.keywords, b.keywords)
)
# NOTE: Need to also register handlers for list and dict, since they might have partials as
# items.
@_equal.register(list)
def _lists_equal(a: List, b: List) -> bool:
return len(a) == len(b) and all(_equal(v_a, v_b) for v_a, v_b in zip(a, b))
@_equal.register(dict)
def _dicts_equal(a: Dict, b: Dict) -> bool:
if a.keys() != b.keys():
return False
for k in a:
v_a, v_b = a[k], b[k]
if not _equal(v_a, v_b):
print(f"Values differ at key {k}: {v_a}, {v_b}")
return False
return True
def all_different_from_next(sequence: Sequence) -> bool:
"""Returns True if each value in the sequence is different from the next."""
return not any(_equal(v, next_v) for v, next_v in pairwise(sequence))
class TestContinualRLSetting(SettingTests):
Setting: ClassVar[Type[Setting]] = ContinualRLSetting
dataset: pytest.fixture
@pytest.fixture()
def setting_kwargs(self, dataset: str, config: Config):
"""Fixture used to pass keyword arguments when creating a Setting."""
return {"dataset": dataset, "config": config}
def test_passing_supported_dataset(self, setting_kwargs: Dict):
setting = self.Setting(**setting_kwargs)
assert setting.train_task_schedule
assert setting.val_task_schedule
assert setting.test_task_schedule
# Passing the dataset created a task schedule.
assert all(setting.train_task_schedule.values()), "Should have non-empty tasks."
assert all(setting.val_task_schedule.values()), "Should have non-empty tasks."
assert all(setting.test_task_schedule.values()), "Should have non-empty tasks."
@pytest.mark.parametrize("seed", [123, 456])
def test_task_schedule_is_reproducible(self, dataset: str, seed: Optional[int]):
setting_a = self.Setting(dataset=dataset, config=Config(seed=seed))
setting_b = self.Setting(dataset=dataset, config=Config(seed=seed))
assert setting_a.train_task_schedule == setting_b.train_task_schedule
assert setting_a.val_task_schedule == setting_b.val_task_schedule
assert setting_a.test_task_schedule == setting_b.test_task_schedule
@pytest.mark.xfail(
reason="Reworking/removing this mechanism, makes things a bit too complicated."
)
def test_using_deprecated_fields(self):
# BUG: It's tough to get this to raise a warning, because it's happening
# inside the constructor in the dataclasses.py file, so we have to mess with
# descriptors etc, which isn't great.
# with pytest.raises(DeprecationWarning):
# setting = self.Setting(nb_tasks=5, max_steps=123)
setting = self.Setting(nb_tasks=5, max_steps=123)
assert setting.train_max_steps == 123
with pytest.warns(DeprecationWarning):
setting.max_steps = 456
assert setting.train_max_steps == 456
with pytest.warns(DeprecationWarning):
setting = self.Setting(nb_tasks=5, test_max_steps=123)
assert setting.test_max_steps == 123
with pytest.warns(DeprecationWarning):
setting.test_steps = 456
assert setting.test_max_steps == 456
def test_tasks_are_different(self, setting_kwargs: Dict[str, Any], config: Config):
"""Check that the tasks different from the next."""
config = setting_kwargs.pop("config", config)
assert config.seed is not None
setting = self.Setting(**setting_kwargs, config=config)
# Check that each task is different from the next.
assert all_different_from_next(setting.train_task_schedule.values())
assert all_different_from_next(setting.val_task_schedule.values())
assert all_different_from_next(setting.test_task_schedule.values())
def test_settings_attributes_are_the_same_for_given_seed(
self, setting_kwargs: Dict[str, Any], config: Config
):
"""Make sure that the settings' attributes are the same if passed the same seed."""
# Make sure that there is a random seed set, otherwise use the one present in `config`.
config: Config = setting_kwargs.pop("config", config)
assert config.seed is not None
setting_1 = self.Setting(**setting_kwargs, config=config)
# Uses the same config and seed, and check that the attributes of the two settings are
# identical.
setting_2 = self.Setting(**setting_kwargs, config=config)
# Check that the settings have the same attributes.
assert _equal(dataclasses.asdict(setting_1), dataclasses.asdict(setting_2))
# These next lines are redundant, but just to be clear:
assert setting_1.train_task_schedule == setting_2.train_task_schedule
assert setting_1.val_task_schedule == setting_2.val_task_schedule
assert setting_1.test_task_schedule == setting_2.test_task_schedule
def test_tasks_are_different_when_seed_is_different(
self, setting_kwargs: Dict[str, Any], config: Config
):
# Create another setting with a different seed, and check that at least the generated tasks
# are different.
config = setting_kwargs.pop("config", config)
assert config.seed is not None
setting_1 = self.Setting(**setting_kwargs, config=config)
assert setting_1.train_task_schedule
different_seed = config.seed + 123
setting_3 = self.Setting(**setting_kwargs, config=replace(config, seed=different_seed))
setting_1_dict = dataclasses.asdict(setting_1)
setting_3_dict = dataclasses.asdict(setting_3)
# Remove the seeds, which are obviously different, and then check that the dicts from the
# two settings are still different.
assert setting_1_dict["config"].pop("seed") == config.seed
assert setting_3_dict["config"].pop("seed") == different_seed
if "LPG-FTW" in setting_1.dataset:
# NOTE: The rest of the setting's attributes might be identical (they currently are, but
# this could change), so skipping these datasets seems like the right thing to do.
pytest.skip("LPG-FTW datasets always create the same tasks, no matter the seed.")
assert not _equal(setting_1_dict, setting_3_dict)
# Additionally, explicitly check that either the train schedule or the train envs are
# different, since the check above could have passed due to some other attribute being
# different between the two settings.
if isinstance(setting_1, IncrementalRLSetting) and setting_1.train_envs:
assert isinstance(setting_3, IncrementalRLSetting)
# Using custom envs for each task.
assert not _equal(setting_1.train_envs, setting_3.train_envs)
assert not _equal(setting_1.val_envs, setting_3.val_envs)
assert not _equal(setting_1.test_envs, setting_3.test_envs)
else:
# Using a single env with a task schedule.
assert not _equal(setting_1.train_task_schedule, setting_3.train_task_schedule)
assert not _equal(setting_1.val_task_schedule, setting_3.val_task_schedule)
assert not _equal(setting_1.test_task_schedule, setting_3.test_task_schedule)
def test_env_attributes_change(self, setting_kwargs: Dict[str, Any], config: Config):
"""Check that the values of the given attributes do change at each step during
training.
"""
setting_kwargs.setdefault("nb_tasks", 2)
setting_kwargs.setdefault("train_max_steps", 1000)
setting_kwargs.setdefault("max_episode_steps", 50)
setting_kwargs.setdefault("test_max_steps", 1000)
setting = self.Setting(**setting_kwargs)
assert setting.train_task_schedule
# NOTE: Have to check for `setting.train_envs` because in that case the task schedule won't
# be used.
from sequoia.settings.rl.incremental.setting import IncrementalRLSetting
if isinstance(setting, IncrementalRLSetting) and setting._using_custom_envs_foreach_task:
# It would be pretty hard to check for the "task values" in this case, because the
# custom envs for each task might not be just the same env type but with different
# attributes!
pytest.skip("Using custom envs for each task instead of a task schedule.")
assert all(setting.train_task_schedule.values())
assert setting.nb_tasks == setting_kwargs["nb_tasks"]
assert setting.train_steps_per_task == setting_kwargs["train_max_steps"] // setting.nb_tasks
assert setting.train_max_steps == setting_kwargs["train_max_steps"]
attributes = set().union(*[task.keys() for task in setting.train_task_schedule.values()])
method = DummyMethod()
results = setting.apply(method, config=config)
assert results
self.validate_results(setting, method, results)
# TODO: Need to limit the episodes per step in MonsterKong.
# In MonsterKong, we might have 0 reward, since this might not even
# constitute a full episode.
# assert results.objective
for attribute in attributes:
train_values: List[float] = [
values[attribute]
for values_dict in method.all_train_values
for step, values in values_dict.items()
]
assert train_values
task_schedule_values: List[float] = {
step: task[attribute] for step, task in setting.train_task_schedule.items()
}
self.validate_env_value_changes(
setting=setting,
attribute=attribute,
task_schedule_for_attr=task_schedule_values,
train_values=train_values,
)
@staticmethod
def validate_env_value_changes(
setting: ContinualRLSetting,
attribute: str,
task_schedule_for_attr: Dict[str, float],
train_values: List[float],
):
"""Given an attribute name, and the values of that attribute in the
task schedule, check that the actual values for that attribute
encountered during training make sense, based on the type of
non-stationarity present in this Setting.
"""
assert len(set(task_schedule_for_attr.values())) == setting.nb_tasks + 1, (
f"Task schedule should have had {setting.nb_tasks + 1} distinct values for "
f"attribute {attribute}: {task_schedule_for_attr}"
)
if setting.smooth_task_boundaries:
# Should have one (unique) value for the attribute at each step during training
# This is the truth condition for the ContinualRLSetting.
# NOTE: There's an offset by 1 here because of when the env is closed.
# NOTE: This test won't really work with integer values, but that doesn't matter
# right now because we don't/won't support changing the values of integer
# parameters in this "continuous" task setting.
assert len(set(train_values)) == setting.train_max_steps, (
f"Should have encountered {setting.train_max_steps} distinct values "
f"for attribute {attribute}: during training!"
)
else:
from ..discrete.setting import DiscreteTaskAgnosticRLSetting
setting: DiscreteTaskAgnosticRLSetting
train_tasks = setting.nb_tasks
unique_attribute_values = set(train_values)
assert setting.train_task_schedule.keys() == task_schedule_for_attr.keys()
for k, v in task_schedule_for_attr.items():
task_dict = setting.train_task_schedule[k]
assert attribute in task_dict
assert task_dict[attribute] == v
assert len(unique_attribute_values) == train_tasks, (
type(setting),
attribute,
unique_attribute_values,
task_schedule_for_attr,
setting.nb_tasks,
)
def validate_results(
self,
setting: ContinualRLSetting,
method: DummyMethod,
results: ContinualRLSetting.Results,
) -> None:
assert results
assert results.objective
assert method.n_task_switches == 0
assert method.n_fit_calls == 1
assert not method.received_task_ids
assert not method.received_while_training
@pytest.mark.parametrize(
"batch_size",
[None, 1, 3],
)
@pytest.mark.timeout(60)
def test_check_iterate_and_step(
self,
setting_kwargs: Dict[str, Any],
batch_size: Optional[int],
):
"""Test that the observations are of the right type and shape, regardless
of wether we iterate on the env by calling 'step' or by using it as a
DataLoader.
"""
setting_kwargs.setdefault("num_workers", 0)
dataset: str = setting_kwargs["dataset"]
from gym.envs.registration import registry
if dataset in registry.env_specs:
with gym.make(dataset) as temp_env:
expected_x_space = temp_env.observation_space
expected_action_space = temp_env.action_space
else:
# NOTE: Not ideal: Have to create a setting just to get the observation space
temp_setting = self.Setting(**setting_kwargs)
# NOTE: Using the test dataloader so the task labels space is a Sparse(Discrete(n)) in
# the worst case, and so all observations (None or integers) are valid samples.
with temp_setting.test_dataloader() as temp_env:
# e = temp_env
# while e.unwrapped is not e:
# print(f"Wrapper of type {type(e)} has obs space of {e.observation_space}")
# e = e.env
# print(f"Unwrapped obs space is {e.observation_space}")
# assert False, temp_env
expected_x_space = temp_env.observation_space.x
expected_action_space = temp_env.action_space
del temp_setting
setting = self.Setting(**setting_kwargs)
if batch_size is not None:
expected_batched_x_space = batch_space(expected_x_space, batch_size)
expected_batched_action_space = batch_space(setting.action_space, batch_size)
else:
expected_batched_x_space = expected_x_space
expected_batched_action_space = expected_action_space
assert setting.observation_space.x == expected_x_space
assert setting.action_space == expected_action_space
# TODO: This is changing:
assert setting.train_transforms == []
# assert setting.train_transforms == [Transforms.to_tensor, Transforms.three_channels]
def check_env_spaces(env: gym.Env) -> None:
if env.batch_size is not None:
# TODO: This might not be totally accurate, for example because the
# TransformObservation wrapper applied to a VectorEnv doesn't change the
# single_observation_space, AFAIR.
assert env.single_observation_space.x == expected_x_space
assert env.single_action_space == expected_action_space
assert isinstance(env.observation_space, TypedDictSpace), (
env,
env.observation_space,
)
assert env.observation_space.x == expected_batched_x_space
assert env.action_space == expected_batched_action_space
else:
assert env.observation_space.x == expected_x_space
assert env.action_space == expected_action_space
# FIXME: Move this to an instance method on the test class so that subclasses
# can change stuff in it.
def check_obs(obs: ContinualRLSetting.Observations) -> None:
if isinstance(self.Setting, partial):
# NOTE: This Happens when we sneakily switch out the self.Setting
# attribute in other tests (for the SettingProxy for example).
assert isinstance(obs, self.Setting.args[0].Observations)
else:
assert isinstance(obs, self.Setting.Observations)
assert obs.x in expected_batched_x_space
# In this particular case here, the task labels should be None.
# FIXME: For InrementalRL, this isn't correct! TestIncrementalRL should
# therefore have its own version of this function.
if self.Setting is ContinualRLSetting:
assert obs.task_labels is None or all(
task_label == None for task_label in obs.task_labels
)
with setting.train_dataloader(batch_size=batch_size, num_workers=0) as env:
assert env.batch_size == batch_size
check_env_spaces(env)
# BUG: The dataset's observation space has task_labels as a Discrete, but the task
# labels are None.
setting: ContinualRLSetting
if setting.task_labels_at_train_time:
if batch_size is not None:
assert isinstance(env.observation_space.task_labels, spaces.MultiDiscrete)
else:
assert isinstance(env.observation_space.task_labels, spaces.Discrete)
elif setting.known_task_boundaries_at_train_time:
assert isinstance(env.observation_space.task_labels, Sparse)
obs = env.reset()
# BUG: TODO: The observation space that we use should actually check with
# isinstance and over the fields that fit in the space. Here there is a bug
# because the env observations also have a `done` field, while the space
# doesnt.
# assert obs in env.observation_space
assert obs.x in env.observation_space.x # this works though.
# BUG: This doesn't currently work: (would need a tuple value rather than an
# array.
# assert obs.task_labels in env.observation_space.task_labels
assert obs.task_labels in env.observation_space.task_labels
if batch_size:
assert obs.x[0] in setting.observation_space.x
assert (
obs.task_labels is None
or obs.task_labels[0] in setting.observation_space.task_labels
)
else:
assert obs in setting.observation_space
reset_obs = env.reset()
check_obs(reset_obs)
# BUG: Environment is closed? (batch_size = 3, dataset = 'CartPole-v0')
step_obs, *_ = env.step(env.action_space.sample())
check_obs(step_obs)
for iter_obs in take(env, 3):
check_obs(iter_obs)
_ = env.send(env.action_space.sample())
with setting.val_dataloader(batch_size=batch_size, num_workers=0) as env:
assert env.batch_size == batch_size
check_env_spaces(env)
reset_obs = env.reset()
check_obs(reset_obs)
step_obs, *_ = env.step(env.action_space.sample())
check_obs(step_obs)
for iter_obs in take(env, 3):
check_obs(iter_obs)
_ = env.send(env.action_space.sample())
# NOTE: Limitting the batch size at test time to None (i.e. a single env)
# because of how the Monitor class works atm.
batch_size = None
expected_batched_x_space = expected_x_space
expected_batched_action_space = expected_action_space
# NOTE: Need to make sure that the 'directory' passed to the Monitor
# wrapper is a temp dir. Should be the case, but just checking.
assert setting.config.log_dir != Path("results")
with setting.test_dataloader(batch_size=batch_size, num_workers=0) as env:
assert env.batch_size is None
check_env_spaces(env)
reset_obs = env.reset()
check_obs(reset_obs)
step_obs, *_ = env.step(env.action_space.sample())
check_obs(step_obs)
# NOTE: Can't do this here, unless the episode is over, because the Monitor
# doesn't want us to end an episode early!
# for iter_obs in take(env, 3):
# check_obs(iter_obs)
# _ = env.send(env.action_space.sample())
with setting.test_dataloader(batch_size=batch_size) as env:
assert not env.is_closed()
# NOTE: Can't do this here, unless the episode is over, because the Monitor
# doesn't want us to end an episode early!
for iter_obs in take(env, 3):
check_obs(iter_obs)
_ = env.send(env.action_space.sample())
@pytest.mark.no_xvfb
@pytest.mark.timeout(20)
@pytest.mark.skipif(
(not Path("temp").exists()),
reason="Need temp dir for saving the figure this test creates.",
)
@mujoco_required
def test_show_distributions(self, config: Config):
setting = self.Setting(
dataset="half_cheetah",
max_steps=1_000,
max_episode_steps=100,
config=config,
)
fig, axes = plt.subplots(2, 3)
name_to_env_fn = {
"train": setting.train_dataloader,
"valid": setting.val_dataloader,
"test": setting.test_dataloader,
}
for i, (name, env_fn) in enumerate(name_to_env_fn.items()):
env = env_fn(batch_size=None, num_workers=None)
gravities: List[float] = []
task_labels: List[Optional[int]] = []
total_steps = 0
while not env.is_closed():
obs = env.reset()
done = False
steps_in_episode = 0
while not done:
t = obs.task_labels
obs, reward, done, info = env.step(env.action_space.sample())
total_steps += 1
steps_in_episode += 1
y = reward.y
gravities.append(env.gravity)
print(total_steps, env.gravity)
if total_steps > 100:
assert env.gravity != -9.81
task_labels.append(t)
x = np.arange(len(gravities))
axes[0, i].plot(x, gravities, label="gravities")
axes[0, i].legend()
axes[0, i].set_title(f"{name} gravities")
axes[0, i].set_xlabel("Step index")
axes[0, i].set_ylabel("Value")
# for task_id in task_ids:
# y = [t_counter.get(task_id) for t_counter in t_counters]
# axes[1, i].plot(x, y, label=f"task_id={task_id}")
# axes[1, i].legend()
# axes[1, i].set_title(f"{name} task_id")
# axes[1, i].set_xlabel("Batch index")
# axes[1, i].set_ylabel("Count in batch")
plt.legend()
Path("temp").mkdir(exist_ok=True)
fig.set_size_inches((6, 4), forward=False)
plt.savefig(f"temp/{self.Setting.__name__}.png")
# plt.waitforbuttonpress(10)
# plt.show()
# @pytest.mark.xfail(reason="TODO: pl_bolts DQN only accepts string environment names..")
# def test_dqn_on_env(tmp_path: Path):
# """ TODO: Would be nice if we could have the models work directly on the
# gym envs..
# """
# from pl_bolts.models.rl import DQN
# from pytorch_lightning import Trainer
# setting = ContinualRLSetting()
# env = setting.train_dataloader(batch_size=None)
# model = DQN(env)
# trainer = Trainer(fast_dev_run=True, default_root_dir=tmp_path)
# success = trainer.fit(model)
# assert success == 1
def test_passing_task_schedule_sets_other_attributes_correctly():
# TODO: Figure out a way to test that the tasks are switching over time.
setting = ContinualRLSetting(
dataset="CartPole-v0",
train_task_schedule={
0: {"gravity": 5.0},
100: {"gravity": 10.0},
200: {"gravity": 20.0},
},
test_max_steps=10_000,
)
assert setting.phases == 1
assert setting.nb_tasks == 2
# assert setting.steps_per_task == 100
assert setting.test_task_schedule == {
0: {"gravity": 5.0},
5_000: {"gravity": 10.0},
10_000: {"gravity": 20.0},
}
assert setting.test_max_steps == 10_000
# assert setting.test_steps_per_task == 5_000
setting = ContinualRLSetting(
dataset="CartPole-v0",
train_task_schedule={
0: {"gravity": 5.0},
100: {"gravity": 10.0},
200: {"gravity": 20.0},
},
test_max_steps=2000,
# test_steps_per_task=100,
)
assert setting.phases == 1
# assert setting.nb_tasks == 2
# assert setting.steps_per_task == 100
assert setting.test_task_schedule == {
0: {"gravity": 5.0},
1000: {"gravity": 10.0},
2000: {"gravity": 20.0},
}
assert setting.test_max_steps == 2000
# assert setting.test_steps_per_task == 100
def test_fit_and_on_task_switch_calls():
setting = ContinualRLSetting(
dataset="CartPole-v0",
# nb_tasks=5,
# train_steps_per_task=100,
train_max_steps=500,
test_max_steps=500,
# test_steps_per_task=100,
train_transforms=[],
test_transforms=[],
val_transforms=[],
)
method = _DummyMethod()
_ = setting.apply(method)
# == 30 task switches in total.
if MUJOCO_INSTALLED:
from sequoia.settings.rl.envs.mujoco import (
ContinualHalfCheetahEnv,
ContinualHalfCheetahV2Env,
ContinualHalfCheetahV3Env,
ContinualHopperEnv,
ContinualHopperV2Env,
ContinualHopperV3Env,
ContinualWalker2dV2Env,
ContinualWalker2dV3Env,
)
@mujoco_required
@pytest.mark.parametrize(
"dataset, expected_env_type",
[
("half_cheetah", ContinualHalfCheetahEnv),
("halfcheetah", ContinualHalfCheetahEnv),
("HalfCheetah-v2", ContinualHalfCheetahV2Env),
("HalfCheetah-v3", ContinualHalfCheetahV3Env),
("ContinualHalfCheetah-v2", ContinualHalfCheetahV2Env),
("ContinualHalfCheetah-v3", ContinualHalfCheetahV3Env),
("ContinualHopper-v2", ContinualHopperEnv),
("hopper", ContinualHopperEnv),
("Hopper-v2", ContinualHopperV2Env),
("Hopper-v3", ContinualHopperV3Env),
("walker2d", ContinualWalker2dV3Env),
("Walker2d-v2", ContinualWalker2dV2Env),
("Walker2d-v3", ContinualWalker2dV3Env),
("ContinualWalker2d-v2", ContinualWalker2dV2Env),
("ContinualWalker2d-v3", ContinualWalker2dV3Env),
],
)
def test_mujoco_env_name_maps_to_continual_variant(
dataset: str, expected_env_type: Type[gym.Env]
):
setting = ContinualRLSetting(dataset=dataset, train_max_steps=10_000, test_max_steps=10_000)
train_env = setting.train_dataloader()
assert isinstance(train_env.unwrapped, expected_env_type)
================================================
FILE: sequoia/settings/rl/continual/tasks.py
================================================
""" Handlers for creating tasks in different environments.
TODO: Add more envs:
- [ ] PyBullet!
- [ ] Box2d!
- [ ] ProcGen!
- [ ] dm_control!
from gym.envs.box2d import BipedalWalker, BipedalWalkerHardcore
"""
import difflib
import inspect
import warnings
from functools import partial, singledispatch
from typing import Any, Callable, Dict, List, Type, TypeVar, Union
import gym
import numpy as np
from gym.envs.classic_control import (
AcrobotEnv,
CartPoleEnv,
Continuous_MountainCarEnv,
MountainCarEnv,
PendulumEnv,
)
from gym.envs.registration import EnvRegistry, EnvSpec, load, registry
from sequoia.common.gym_wrappers.multi_task_environment import make_env_attributes_task
from sequoia.settings.rl.envs import MUJOCO_INSTALLED, sequoia_registry
from sequoia.utils.utils import camel_case
# Idea: Create a true 'Task' class?
Task = Any
ContinuousTask = Dict[str, float]
TaskType = TypeVar("TaskType", bound=ContinuousTask)
# TODO: Create a fancier class for the TaskSchedule, as described in the test file.
# IDEA: Have the Task Schedule be a 'list' of Task objects, each of which has a
# 'duration' parameter, which are accumulated to create the 'keys' of the task schedule!
# TaskSchedule = Dict[int, TaskType]
class TaskSchedule(Dict[int, TaskType]):
pass
class EnvironmentNotSupportedError(gym.error.UnregisteredEnv):
"""Error raised when we don't know how to create a task for the given environment."""
def names_match(name_a: str, name_b: str) -> bool:
a_variants = (name_a, name_a.lower(), camel_case(name_a))
b_variants = (name_b, name_b.lower(), camel_case(name_b))
# TODO: Not sure about this 'endswith' stuff, e.g. with MountainCarContinuous vs MountainCar?
return (
name_a in b_variants or name_b in a_variants
) # or name_a.endswith(b_variants) or name_b.endswith(a_variants)
def _is_supported(
env_id: str,
_make_task_function: Callable[..., ContinuousTask],
env_registry: EnvRegistry = registry,
) -> bool:
"""Returns wether Sequoia is able to create (continuous) tasks for the given
environment.
WIP: It is better not to use this directly, and instead use the equivalent
`is_supported` function which is created dynamically below.
"""
def _has_handler(some_env_type: Type[gym.Env]) -> bool:
"""Returns wether the "make task" function has a registered handler for the
given envs.
"""
return some_env_type in _make_task_function.registry or (
not inspect.isfunction(some_env_type)
and _make_task_function.dispatch(some_env_type)
is not _make_task_function.dispatch(object)
)
if isinstance(env_id, str):
env_spec = env_registry.spec(env_id)
elif isinstance(env_id, EnvSpec):
env_spec = env_id
env_id = env_spec.id
elif inspect.isclass(env_id) and issubclass(env_id, gym.Env):
env_type = env_id
env_spec = None
if _has_handler(env_type):
return True
env_id = env_type.__name__
class_name = env_type.__name__
else:
raise NotImplementedError(env_id, type(env_id))
assert isinstance(env_id, str)
if env_spec:
assert isinstance(env_spec, EnvSpec)
if callable(env_spec.entry_point):
if _has_handler(env_spec.entry_point):
return True
class_name = env_spec.entry_point.__name__
else:
assert isinstance(env_spec.entry_point, str)
_module, _, class_name = env_spec.entry_point.partition(":")
registered_class_names = tuple(c.__name__ for c in _make_task_function.registry)
if class_name in registered_class_names:
return True
elif class_name.startswith(registered_class_names):
return True
close_matches = difflib.get_close_matches(class_name, registered_class_names)
if not close_matches:
return False
return False
def task_sampling_function(
env_registry: EnvRegistry = registry, based_on: Callable[[gym.Env], TaskType] = None
) -> Callable[[gym.Env], TaskType]:
"""Decorator for a "make_task" function (e.g. `make_continuous_task`,
`make_discrete_task`, etc.) that does the following:
1. Creates a singledispatch callable from the given function, if necessary;
2. Registers three useful handlers, for strings, environment types, and wrappers to
the new function.
3. Adds a 'is_supported' function on that function (see NOTE below);
4. Adds all the registered handlers from the `based_on` function, if passed;
NOTE (@lebrice): not sure about this is_supported being created and set on the
function itself. It would probably be cleaner to create a class like TaskCreator or
something that has the same methods as the underlying singledispatch callable.
NOTE: A task sampling function should give back the same task when given the same
seed, step and change_steps.
"""
def _wrapper(make_task_fn: Callable[[gym.Env], TaskType]) -> Callable[[gym.Env], TaskType]:
if not hasattr(make_task_fn, "registry"):
make_task_fn = singledispatch(make_task_fn)
@make_task_fn.register(type)
def make_discrete_task_from_type(env_type: Type[gym.Env], **kwargs) -> ContinuousTask:
try:
# Try to create a task without actually instantiating the env, by passing the
# type of env as the 'env' argument, rather than an env instance.
env_handler_function = make_task_fn.dispatch(env_type)
return env_handler_function(env_type, **kwargs)
except Exception as exc:
raise RuntimeError(
f"Unable to create a task based only on the env type {env_type}: {exc}\n"
) from exc
@make_task_fn.register(str)
def make_discrete_task_by_id(
env: str,
**kwargs,
) -> Union[Dict[str, Any], Any]:
# Load the entry-point class, and use it to determine what handler to use.
# TODO: Actually instantiate the env here? or just dispatch based on the env class?
if env not in env_registry.env_specs:
raise RuntimeError(
f"Can't create a task for env id {env}, since it isn't a registered env id."
)
env_spec: EnvSpec = env_registry.env_specs[env]
env_entry_point: Callable[..., gym.Env] = load(env_spec.entry_point)
# import inspect
try:
task: ContinuousTask = make_discrete_task_from_type(env_entry_point, **kwargs)
return task
except RuntimeError as exc:
warnings.warn(
RuntimeWarning(
f"A temporary environment will have to be created in order to make a task: {exc}"
)
)
with gym.make(env) as temp_env:
# IDEA: Could avoid re-creating the env between calls to this function, for
# instance by saving a single temp env in a global variable and overwriting
# it if `env` is of a different type.
return make_task_fn(temp_env, **kwargs)
@make_task_fn.register
def make_discrete_for_wrapped_env(
env: gym.Wrapper,
step: int,
change_steps: List[int] = None,
**kwargs,
) -> Union[Dict[str, Any], Any]:
# NOTE: Not sure if this is totally a good idea...
# If someone registers a handler for some kind of Wrapper, than all envs wrapped
# with that wrapper will use that handler, instead of their base environment type.
return make_task_fn(env.env, step=step, change_steps=change_steps, **kwargs)
if based_on is not None:
for registered_type, registered_handler in based_on.registry.items():
# NOTE: Skipping these types since we register new handlers above. Not
# sure if it's necessary, since it might just overwrite an old handler
# to register a new one for the same type?
if registered_type not in [object, str, type, gym.Wrapper]:
make_task_fn.register(registered_type, registered_handler)
make_task_fn.is_supported = partial(_is_supported, _make_task_fn=make_task_fn)
return make_task_fn
return _wrapper
@singledispatch
def make_continuous_task(
env: gym.Env,
step: int,
change_steps: List[int],
seed: int = None,
**kwargs,
) -> ContinuousTask:
"""Generic function used by Sequoia's RL settings to create a "task" that will be
applied to an environment like `env`.
To add support for a new type of environment, simply register a handler function:
```
@make_continuous_task.register(SomeGymEnvClass)
def make_task_for_my_env(env: SomeGymEnvClass, step: int, change_steps: List[int], **kwargs,):
return {"my_attribute": random.random()}
```
NOTE: In order to create tasks for an environment through its string 'id', and to
avoid having to actually instantiate an environment, `env` could perhaps be a type
of environment rather than an actual environment instance. If your function can't
handle this (raises an exception somehow), then a temporary environment will be
created, and a warning will be raised.
TODO: remove / rename this 'change_steps' to 'max_steps' instead.
"""
raise NotImplementedError(f"Don't currently know how to create tasks for env {env}")
make_continuous_task = task_sampling_function(env_registry=sequoia_registry)(make_continuous_task)
is_supported = partial(_is_supported, _make_task_function=make_continuous_task)
# from functools import _SingleDispatchCallable
# Dictionary mapping from environment type to a dict of environment values which can be
# modified with multiplicative gaussian noise.
_ENV_TASK_ATTRIBUTES: Dict[Union[Type[gym.Env]], Dict[str, float]] = {
CartPoleEnv: {
"gravity": 9.8,
"masscart": 1.0,
"masspole": 0.1,
"length": 0.5,
"force_mag": 10.0,
"tau": 0.02,
},
PendulumEnv: {
"max_speed": 8.0,
"max_torque": 2.0,
# "dt" = .05
"g": 10.0,
"m": 1.0,
"l": 1.0,
},
MountainCarEnv: {
"gravity": 0.0025,
"goal_position": 0.45, # was 0.5 in gym, 0.45 in Arnaud de Broissia's version
# BUG: Since we use multiplicative noise, this won't change over time.
# "goal_velocity": 0,
},
Continuous_MountainCarEnv: {
"goal_position": 0.45, # was 0.5 in gym, 0.45 in Arnaud de Broissia's version
# BUG: Since we use multiplicative noise, this won't change over time.
# "goal_velocity": 0,
},
# TODO: Test AcrobotEnv
AcrobotEnv: {
"LINK_LENGTH_1": 1.0, # [m]
"LINK_LENGTH_2": 1.0, # [m]
"LINK_MASS_1": 1.0, #: [kg] mass of link 1
"LINK_MASS_2": 1.0, #: [kg] mass of link 2
"LINK_COM_POS_1": 0.5, #: [m] position of the center of mass of link 1
"LINK_COM_POS_2": 0.5, #: [m] position of the center of mass of link 2
"LINK_MOI": 1.0, #: moments of inertia for both links
},
# TODO: Add more of the classic control envs here.
# TODO: Need to get the attributes to modify in each environment type and
# add them here.
# AtariEnv: [
# # TODO: Maybe have something like the difficulty as the CL 'task' ?
# # difficulties = temp_env.ale.getAvailableDifficulties()
# # "game_difficulty",
# ],
}
@make_continuous_task.register(CartPoleEnv)
@make_continuous_task.register(PendulumEnv)
@make_continuous_task.register(MountainCarEnv)
@make_continuous_task.register(Continuous_MountainCarEnv)
@make_continuous_task.register(AcrobotEnv)
def make_task_for_classic_control_env(
env: gym.Env,
step: int,
change_steps: List[int] = None,
task_params: Union[List[str], Dict[str, Any]] = None,
seed: int = None,
noise_std: float = 0.2,
):
# NOTE: `step` doesn't matter here, all tasks are independant.
task_params = task_params or _ENV_TASK_ATTRIBUTES[type(env.unwrapped)]
if step == 0:
# Use the 'default' task as the first task.
return task_params.copy()
# Make this more reproducible: When given the same seed and same step, return the
# same task.
if seed is not None:
rng = np.random.default_rng(seed + step)
else:
rng = None
# Default back to the 'env attributes' task, which multiplies the default values
# with normally distributed scaling coefficients.
# TODO: Need to refactor the whole MultiTaskEnv/SmoothTransition wrappers / tasks
# etc.
return make_env_attributes_task(
env,
task_params=task_params,
rng=rng,
noise_std=noise_std,
)
# IDEA: Could probably not have these big ugly IF statements since we have the stubs for
# the different mujoco env classes anyway.
if MUJOCO_INSTALLED:
from sequoia.settings.rl.envs.mujoco import (
ContinualHalfCheetahV2Env,
ContinualHalfCheetahV3Env,
ContinualHopperV2Env,
ContinualHopperV3Env,
ContinualWalker2dV2Env,
ContinualWalker2dV3Env,
ModifiedGravityEnv,
)
default_mujoco_gravity = -9.81
@make_continuous_task.register(ContinualHopperV2Env)
@make_continuous_task.register(ContinualHopperV3Env)
@make_continuous_task.register(ContinualWalker2dV2Env)
@make_continuous_task.register(ContinualWalker2dV3Env)
@make_continuous_task.register(ContinualHalfCheetahV2Env)
@make_continuous_task.register(ContinualHalfCheetahV3Env)
def make_task_for_modified_gravity_env(
env: ModifiedGravityEnv,
step: int,
change_steps: List[int],
seed: int = None,
**kwargs,
) -> Union[Dict[str, Any], Any]:
step_seed = seed * step if seed is not None else None
# NOTE: np.random.default_rng(None) will NOT give the same result every first
# time it is called, so this won't cause any issues with the same gravity being
# sampled for all tasks if `seed` is None.
rng = np.random.default_rng(step_seed)
if step == 0:
coefficient = 1
else:
coefficient = rng.uniform() + 0.5
# TODO: Do we want to start with normal gravity?
gravity = coefficient * default_mujoco_gravity
return {"gravity": gravity}
================================================
FILE: sequoia/settings/rl/continual/tasks_test.py
================================================
from typing import Type
import pytest
from sequoia.conftest import mujoco_required
from sequoia.settings.rl.envs import (
ContinualHalfCheetahEnv,
ContinualHalfCheetahV2Env,
ContinualHalfCheetahV3Env,
ContinualHopperEnv,
ContinualWalker2dEnv,
MujocoEnv,
)
from .tasks import is_supported, make_continuous_task
@mujoco_required
@pytest.mark.parametrize(
"env_type",
[
ContinualHalfCheetahV2Env,
ContinualHalfCheetahV3Env,
ContinualHopperEnv,
ContinualWalker2dEnv,
ContinualHalfCheetahEnv,
],
)
def test_mujoco_tasks(env_type: Type[MujocoEnv]):
assert is_supported("HalfCheetah-v2")
from gym.envs.mujoco import HalfCheetahEnv
# We shouldn't mark the *original* envs as supported, rather, we should only mark
# our variants as supported.
assert not is_supported(HalfCheetahEnv)
assert is_supported(env_type)
task = make_continuous_task(env_type, step=0, change_steps=[0, 100, 200])
assert task == {"gravity": -9.81}
task_a = make_continuous_task(env_type, step=100, change_steps=[0, 100, 200], seed=123)
task_b = make_continuous_task(env_type, step=100, change_steps=[0, 100, 200], seed=123)
task_c = make_continuous_task(env_type, step=100, change_steps=[0, 100, 200], seed=456)
# NOTE: Not sure that this will always give exactly the same result, since idk how
# seeding is dependant on the machine running the code.
# assert task == {'gravity': -10.134188877055529}
assert task_a == task_b
assert task_a != task_c
================================================
FILE: sequoia/settings/rl/continual/test_environment.py
================================================
import itertools
import math
from typing import Dict
from sequoia.common.metrics.rl_metrics import EpisodeMetrics
from sequoia.settings.assumptions.continual import ContinualResults, TestEnvironment
# TODO: Refactor those so they are based on the MeasureRLPerformanceWrapper, which works
# with vectorized envs.
class ContinualRLTestEnvironment(TestEnvironment):
def __init__(self, *args, task_schedule: Dict, **kwargs):
super().__init__(*args, **kwargs)
self.task_schedule = task_schedule
self.boundary_steps = [step // (self.batch_size or 1) for step in self.task_schedule.keys()]
def __len__(self):
return math.ceil(self.step_limit / (getattr(self.env, "batch_size", 1) or 1))
def get_results(self) -> ContinualResults[EpisodeMetrics]:
# TODO: Place the metrics in the right 'bin' at the end of each episode during
# testing depending on the task at that time, rather than what's happening here,
# where we're getting all the rewards and episode lengths at the end and then
# sort it out into the bins based on the task schedule. ALSO: this would make it
# easier to support monitoring batched RL environments, since these `Monitor`
# methods (get_episode_rewards, get_episode_lengths, etc) assume the environment
# isn't batched.
rewards = self.get_episode_rewards()
lengths = self.get_episode_lengths()
task_schedule: Dict[int, Dict] = self.task_schedule
task_steps = sorted(task_schedule.keys())
assert 0 in task_steps
test_results = ContinualResults()
for step, episode_reward, episode_length in zip(
itertools.accumulate(lengths), rewards, lengths
):
# Given the step, find the task id.
episode_metric = EpisodeMetrics(
n_samples=1,
mean_episode_reward=episode_reward,
mean_episode_length=episode_length,
)
test_results.metrics.append(episode_metric)
return test_results
def render(self, mode="human", **kwargs):
# TODO: This might not be setup right. Need to check.
image_batch = super().render(mode=mode, **kwargs)
if mode == "rgb_array" and self.batch_size:
return tile_images(image_batch)
return image_batch
def _after_reset(self, observation):
# Is this going to work fine when the observations are batched though?
return super()._after_reset(observation)
================================================
FILE: sequoia/settings/rl/discrete/__init__.py
================================================
from .setting import DiscreteTaskAgnosticRLSetting
from .tasks import make_discrete_task
================================================
FILE: sequoia/settings/rl/discrete/multienv_wrappers.py
================================================
""" Wrappers that around multiple environments.
These wrappers can be used to get different kinds of multi-task environments, or even to
concatenate environments.
"""
from abc import ABC, abstractmethod
from typing import Any, Callable, List, Optional, Sequence, Union
import gym
import numpy as np
from gym import spaces
from sequoia.common.gym_wrappers import IterableWrapper
from sequoia.common.gym_wrappers.multi_task_environment import add_task_labels
from sequoia.common.gym_wrappers.utils import MayCloseEarly
from sequoia.utils.generic_functions import concatenate
from sequoia.utils.logging_utils import get_logger
logger = get_logger(__name__)
def instantiate_env(env: Union[str, gym.Env, Callable[[], gym.Env]]) -> gym.Env:
if isinstance(env, gym.Env):
return env
if isinstance(env, str):
return gym.make(env)
assert callable(env)
return env()
class MultiEnvWrapper(IterableWrapper, ABC):
"""TODO: Wrapper like that iterates over the envs.
Could look a little bit like this:
https://github.com/rlworkgroup/garage/blob/master/src/garage/envs/multi_env_wrapper.py
"""
def __init__(self, envs: List[gym.Env], add_task_ids: bool = False):
self._envs = envs.copy()
self._current_task_id = 0
self.nb_tasks = len(envs)
self._envs_is_closed: Sequence[bool] = np.zeros([self.nb_tasks], dtype=bool)
self._add_task_labels = add_task_ids
self.rng: np.random.Generator = np.random.default_rng()
self._instantiate_env(self._current_task_id)
super().__init__(env=self._envs[self._current_task_id])
self.task_label_space = spaces.Discrete(self.nb_tasks)
if self._add_task_labels:
self.observation_space = add_task_labels(
self.env.observation_space, self.task_label_space
)
def _instantiate_env(self, index: int) -> None:
self._envs[index] = instantiate_env(self._envs[index])
def set_task(self, task_id: int) -> None:
if self.is_closed(env_index=None):
raise gym.error.ClosedEnvironmentError(
f"Can't call set_task on the env, since it's already closed."
)
self._current_task_id = task_id
# Use super().__init__() to reset the `self.env` attribute in gym.Wrapper.
# TODO: This also resets the '_is_closed' on self.
# TODO: This resets the 'observation_' and 'action_' etc objects that are saved
# in the constructor of the 'IterableWrapper'
self._instantiate_env(self._current_task_id)
gym.Wrapper.__init__(self, env=self._envs[self._current_task_id])
if self._add_task_labels:
self.observation_space = add_task_labels(
self.env.observation_space, self.task_label_space
)
@abstractmethod
def next_task(self) -> int:
pass
def reset(self):
if all(self._envs_is_closed):
self.close()
elif isinstance(self.env, MayCloseEarly) and self.env.is_closed():
self._envs_is_closed[self._current_task_id] = True
self.set_task(self.next_task())
obs = super().reset()
return self.observation(obs)
def step(self, action):
obs, rewards, done, info = super().step(action)
obs = self.observation(obs)
return obs, rewards, done, info
def is_closed(self, env_index: int = None):
"""returns `True` if the environment at index `env_index` is closed, otherwise
if `env_index` is None, returns `True` if `close()` was called on the wrapper.
(todo: or if all envs are closed.)
"""
if env_index is None:
# Return wether this wrapper itself was closed manually (from outside).
# TODO: Should we also check if all envs are closed? If so, should we close
# this env manually?
if self._is_closed:
return True
elif all(self.is_closed(env_id) for env_id in range(self.nb_tasks)):
self.close(env_index=None)
return True
return False
assert isinstance(env_index, int)
# Return wether the env at that index is closed.
if isinstance(self._envs[env_index], MayCloseEarly):
env_is_closed = self._envs[env_index].is_closed()
# NOTE: These shouls always be the same, but just in case:
self._envs_is_closed[env_index] = env_is_closed
return self._envs_is_closed[env_index]
def close(self, env_index: int = None) -> None:
"""Close the environment for the given index, or of all envs if `env_index` is
`None`.
"""
if env_index is None:
logger.info(f"Closing all envs")
for env_index, (env_is_closed, env) in enumerate(zip(self._envs_is_closed, self._envs)):
if not env_is_closed:
self._envs_is_closed[env_index] = True
env.close()
# BUG: Not sure why this is actually causing a recursion error.. The idea
# was to call `MayCloseEarly.close()`.
# super().close()
self._is_closed = True
else:
if self._envs_is_closed[env_index]:
raise RuntimeError(f"Env at index {env_index} is already closed...")
self._envs_is_closed[env_index] = True
self._envs[env_index].close()
def seed(self, seed: Optional[int] = None) -> List[int]:
"""Sets the seed for this env's random number generator(s).
Note:
Some environments use multiple pseudorandom number generators.
We want to capture all such seeds used in order to ensure that
there aren't accidental correlations between multiple generators.
Returns:
list: Returns the list of seeds used in this env's random
number generators. The first value in the list should be the
"main" seed, or the value which a reproducer should pass to
'seed'. Often, the main seed equals the provided 'seed', but
this won't be true if seed=None, for example.
"""
self.rng = np.random.default_rng(seed)
env_seeds = self.rng.integers(0, 1e8, size=len(self._envs)).tolist()
seeds = env_seeds.copy()
for index, env_seed in enumerate(env_seeds):
# NOTE: Would be nice to be able to NOT instantiate all the envs and just
# seed them when they get created, but then we wouldn't be able to return
# the seeds from all envs here (which I'm not 100% sure its thaaat useful..)
self._instantiate_env(index)
env = self._envs[index]
env_seeds: Optional[List[int]] = env.seed(env_seed)
seeds.extend(env_seeds or [])
return seeds
def observation(self, observation):
if self._add_task_labels:
return add_task_labels(observation, task_labels=self._current_task_id)
return observation
class ConcatEnvsWrapper(MultiEnvWrapper):
"""Wrapper that exhausts the current environment before moving onto the next."""
def __init__(
self,
envs: List[gym.Env],
add_task_ids: bool = False,
on_task_switch_callback: Callable[[Optional[int]], Any] = None,
):
super().__init__(envs, add_task_ids=add_task_ids)
self.on_task_switch_callback = on_task_switch_callback
def set_task(self, task_id: int) -> None:
# NOTE: If any wrappers try to store things onto the unwrapped env, then those
# would need to be transfered over to the new env here.
super().set_task(task_id)
def reset(self):
old_task = self._current_task_id
observation = super().reset()
new_task = self._current_task_id
if self.on_task_switch_callback and old_task != new_task:
self.on_task_switch_callback(new_task if self._add_task_labels else None)
return observation
def next_task(self) -> int:
assert not all(self._envs_is_closed)
if not self._envs_is_closed[self._current_task_id]:
return self._current_task_id
# TODO: Close the env when we reach the end? or leave that up to the wrapper?
return (self._current_task_id + 1) % self.nb_tasks
def __iter__(self):
return super().__iter__()
def send(self, action):
return super().send(action)
# Register this as a 'concat' handler for gym environments!
@concatenate.register(gym.Env)
def _concatenate_gym_envs(first_env: gym.Env, *other_envs: gym.Env) -> ConcatEnvsWrapper:
return ConcatEnvsWrapper([first_env, *other_envs])
class RoundRobinWrapper(MultiEnvWrapper):
"""MultiEnvWrapper that alternates between the non-closed environments in a
round-robin fashion.
"""
def __init__(self, envs, add_task_ids=False):
super().__init__(envs, add_task_ids=add_task_ids)
self._current_task_id = -1
def next_task(self) -> int:
assert not all(self._envs_is_closed)
next_task = (self._current_task_id + 1) % self.nb_tasks
while self._envs_is_closed[next_task]:
next_task += 1
next_task %= self.nb_tasks
return next_task
class RandomMultiEnvWrapper(MultiEnvWrapper):
def next_task(self) -> int:
assert not all(self._envs_is_closed)
available_ids = np.arange(self.nb_tasks)[~self._envs_is_closed].tolist()
return self.rng.choice(available_ids)
class CustomMultiEnvWrapper(MultiEnvWrapper):
"""MultiEnvWrapper that uses a custom callable to determine which env to use next."""
def __init__(
self,
envs: List[gym.Env],
add_task_ids: bool = False,
custom_new_task_fn: Callable[[MultiEnvWrapper], int] = None,
):
super().__init__(envs, add_task_ids=add_task_ids)
assert custom_new_task_fn, "Must pass a custom function to this wrapper."
self._custom_new_task_fn = custom_new_task_fn
def next_task(self):
return self._custom_new_task_fn
return super().next_task()
================================================
FILE: sequoia/settings/rl/discrete/multienv_wrappers_test.py
================================================
from collections import Counter
from functools import partial
from typing import List, Optional
import gym
import pytest
from gym import spaces
from gym.wrappers import TimeLimit
from sequoia.common.gym_wrappers.env_dataset import EnvDataset
from sequoia.common.gym_wrappers.episode_limit import EpisodeLimit
from sequoia.common.spaces import TypedDictSpace
from sequoia.settings.rl.continual.make_env import wrap
from sequoia.utils.utils import unique_consecutive_with_index
from .multienv_wrappers import ConcatEnvsWrapper, RandomMultiEnvWrapper, RoundRobinWrapper
class TestMultiEnvWrappers:
@pytest.fixture()
def iterable_env(self) -> gym.Env:
return EnvDataset(gym.make("CartPole-v0"))
@pytest.mark.parametrize("add_task_ids", [False, True])
@pytest.mark.parametrize("nb_tasks", [5, 1])
@pytest.mark.parametrize("pass_fn_instead_of_env", [False, True])
def test_concat(self, add_task_ids: bool, nb_tasks: int, pass_fn_instead_of_env: bool):
def set_attributes(env: gym.Env, **attributes) -> gym.Env:
for k, v in attributes.items():
setattr(env.unwrapped, k, v)
return env
max_episodes_per_task = 5
envs = [
partial(
EpisodeLimit,
TimeLimit(
set_attributes(gym.make("CartPole-v0"), length=0.1 + 0.2 * i),
max_episode_steps=10,
),
max_episodes=max_episodes_per_task,
)
for i in range(nb_tasks)
]
if not pass_fn_instead_of_env:
envs = [env_fn() for env_fn in envs]
env = ConcatEnvsWrapper(envs, add_task_ids=add_task_ids)
assert env.nb_tasks == nb_tasks
if add_task_ids:
assert env.observation_space["task_labels"] == spaces.Discrete(env.nb_tasks)
lengths = []
for episode in range(nb_tasks * max_episodes_per_task):
print(f"Episode: {episode}, length: {round(env.unwrapped.length, 5)}")
obs = env.reset()
lengths.append(env.unwrapped.length)
env_id = episode // max_episodes_per_task
assert env._current_task_id == env_id, episode
if add_task_ids:
assert obs["task_labels"] == env_id
step = 0
done = False
while not done:
obs, rewards, done, info = env.step(env.action_space.sample())
step += 1
if step == 10:
assert done
assert step <= 10
# NOTE: It's pretty cool that we actually recover something like the task
# schedule here! :D
episode_task_schedule = dict(unique_consecutive_with_index(lengths))
assert episode_task_schedule == {
i * max_episodes_per_task: 0.1 + 0.2 * i for i in range(nb_tasks)
}
assert env.is_closed()
# TODO: This does the same with an additional StepLimit (ActionLimit) wrapper,
# and isn't stable because it depends on each episode being 10 long, and
# CartPole ends earlier sometimes.
# envs = [
# ActionLimit(TimeLimit(gym.make("CartPole-v0"), max_episode_steps=10), max_steps=50)
# for i in range(5)
# ]
# env = ConcatEnvsWrapper(envs)
# assert env.nb_tasks == 5
# for episode in range(25):
# print(f"Episode: {episode}")
# print(env.max_steps, env.step_count())
# obs = env.reset()
# env_id = episode // 5
# assert env._current_task_id == env_id, episode
# step = 0
# done = False
# while not done:
# print(step)
# obs, rewards, done, info = env.step(env.action_space.sample())
# step += 1
# if step == 10:
# assert done
# assert step <= 10
# assert env.is_closed()
@pytest.mark.parametrize("add_task_ids", [False, True])
@pytest.mark.parametrize("nb_tasks", [5, 1])
def test_roundrobin(self, add_task_ids: bool, nb_tasks: int):
max_episodes_per_task = 5
max_episode_steps = 10
envs = [
EpisodeLimit(
TimeLimit(gym.make("CartPole-v0"), max_episode_steps=max_episode_steps),
max_episodes=max_episodes_per_task,
)
for i in range(nb_tasks)
]
env = RoundRobinWrapper(envs, add_task_ids=add_task_ids)
assert env.nb_tasks == nb_tasks
if add_task_ids:
assert env.observation_space["task_labels"] == spaces.Discrete(env.nb_tasks)
else:
assert env.observation_space == env._envs[0].observation_space
for episode in range(nb_tasks * max_episodes_per_task):
print(f"Episode: {episode}")
obs = env.reset()
env_id = episode % nb_tasks
assert env._current_task_id == env_id, episode
step = 0
done = False
while not done:
print(step)
obs, rewards, done, info = env.step(env.action_space.sample())
step += 1
if step == max_episode_steps:
assert done
assert step <= max_episode_steps
assert env.is_closed()
def test_random(self):
episodes_per_task = 5
max_episode_steps = 10
nb_tasks = 5
envs = [
EpisodeLimit(
TimeLimit(gym.make("CartPole-v0"), max_episode_steps=max_episode_steps),
max_episodes=episodes_per_task,
)
for i in range(nb_tasks)
]
env = RandomMultiEnvWrapper(envs)
env.seed(123)
assert env.nb_tasks == nb_tasks
task_ids: List[int] = []
for episode in range(nb_tasks * episodes_per_task):
print(f"Episode: {episode}")
obs = env.reset()
env_id = episode // nb_tasks
task_ids.append(env._current_task_id)
step = 0
done = False
print(env._envs_is_closed)
while not done:
print(step)
obs, rewards, done, info = env.step(env.action_space.sample())
step += 1
if step == max_episode_steps:
assert done
assert step <= max_episode_steps
assert env.is_closed()
from collections import Counter
# Assert that the task ids are 'random':
import torch
assert len(torch.unique_consecutive(torch.as_tensor(task_ids))) > nb_tasks
assert Counter(task_ids) == {i: episodes_per_task for i in range(nb_tasks)}
def test_iteration(self, iterable_env: gym.Env):
"""TODO: Interesting bug! Might be because when switching between envs, we're
setting the 'cached' attributes onto the unwrapped env, and so when we move to
another env, we all of a sudden don't have those attributes!
"""
max_episode_steps = 10
episodes_per_task = 5
add_task_ids = True
nb_tasks = 5
def set_attributes(env: gym.Env, **attributes) -> gym.Env:
for k, v in attributes.items():
setattr(env.unwrapped, k, v)
return env
from functools import partial
envs = [
wrap(
gym.make("CartPole-v0"),
[
partial(TimeLimit, max_episode_steps=max_episode_steps),
partial(set_attributes, length=0.1 + 0.2 * i),
partial(EpisodeLimit, max_episodes=episodes_per_task),
],
)
for i in range(nb_tasks)
]
on_task_switch_received_task_ids: List[Optional[int]] = []
def on_task_switch(task_id: Optional[int]) -> None:
print(f"On task switch: {task_id}.")
on_task_switch_received_task_ids.append(task_id)
env = ConcatEnvsWrapper(
envs, add_task_ids=add_task_ids, on_task_switch_callback=on_task_switch
)
env = EnvDataset(env)
env.seed(123)
assert env.nb_tasks == nb_tasks
if add_task_ids:
assert env.observation_space == TypedDictSpace(
x=env.env._envs[0].observation_space,
task_labels=spaces.Discrete(nb_tasks),
)
else:
assert env.observation_space == env.env._envs[0].observation_space
assert env.observation_space.sample() in env.observation_space
task_ids: List[int] = []
lengths_at_each_step = []
lengths_at_each_episode = []
for episode in range(nb_tasks * episodes_per_task):
env_id = episode // episodes_per_task
episode_task_ids: List[int] = []
for step, obs in enumerate(env):
assert obs in env.observation_space
print(f"Episode {episode}, Step {step}: obs: {obs}, length: {env.unwrapped.length}")
if step == 0:
lengths_at_each_episode.append(env.unwrapped.length)
lengths_at_each_step.append(env.unwrapped.length)
if add_task_ids:
assert list(obs.keys()) == ["x", "task_labels"]
obs_task_id = obs["task_labels"]
episode_task_ids.append(obs_task_id)
print(f"obs Task id: {obs_task_id}")
rewards = env.send(env.action_space.sample())
if step > max_episode_steps:
assert False, "huh?"
if add_task_ids:
assert (
len(set(episode_task_ids)) == 1
), f"all observations within an episode should have the same task id.: {episode_task_ids}"
# Add the unique task id from this episode to the list of all task ids.
task_ids.extend(set(episode_task_ids))
actual_task_schedule = dict(unique_consecutive_with_index(lengths_at_each_step))
assert len(actual_task_schedule) == nb_tasks
assert env.is_closed()
if add_task_ids:
assert task_ids == sum([[i] * episodes_per_task for i in range(nb_tasks)], [])
# should have received one per boundary
assert on_task_switch_received_task_ids == list(range(1, nb_tasks))
assert Counter(task_ids) == {i: episodes_per_task for i in range(nb_tasks)}
else:
assert on_task_switch_received_task_ids == [None] * (nb_tasks - 1)
def test_adding_envs(self):
from sequoia.common.gym_wrappers.env_dataset import EnvDataset
env_1 = EnvDataset(
EpisodeLimit(TimeLimit(gym.make("CartPole-v1"), max_episode_steps=10), max_episodes=5)
)
env_2 = EnvDataset(
EpisodeLimit(TimeLimit(gym.make("CartPole-v1"), max_episode_steps=10), max_episodes=5)
)
chained_env = env_1 + env_2
assert chained_env._envs[0] is env_1
assert chained_env._envs[1] is env_2
# TODO: Do we add a 'len' attribute?
# assert False, len(chained_env)
# assert
def test_batched_envs():
"""TODO: Not sure how this will work with batched envs, but if it did, we could
allow batch_size > 1 in Discrete, or batched custom envs in Incremental.
"""
================================================
FILE: sequoia/settings/rl/discrete/results.py
================================================
from typing import ClassVar, TypeVar
from sequoia.common.metrics.rl_metrics import EpisodeMetrics
from sequoia.settings.assumptions.discrete_results import TaskSequenceResults
MetricType = TypeVar("MetricsType", bound=EpisodeMetrics)
class DiscreteTaskAgnosticRLResults(TaskSequenceResults[MetricType]):
"""Results for a sequence of tasks in an RL Setting
This can be seen as one row of a transfer matrix.
NOTE: This is not the entire transfer matrix because in the Discrete settings we don't
evaluate after learning each task.
"""
# Higher mean reward / episode => better
lower_is_better: ClassVar[bool] = False
objective_name: ClassVar[str] = "Mean reward per episode"
# Minimum runtime considered (in hours).
# (No extra points are obtained for going faster than this.)
min_runtime_hours: ClassVar[float] = 1.5
# Maximum runtime allowed (in hours).
max_runtime_hours: ClassVar[float] = 12.0
================================================
FILE: sequoia/settings/rl/discrete/setting.py
================================================
from dataclasses import dataclass
from typing import Any, Callable, ClassVar, Dict, Optional, Type, Union
from gym.envs.registration import EnvSpec, registry
from simple_parsing import field
from simple_parsing.helpers import choice
from sequoia.common.gym_wrappers.utils import is_monsterkong_env
from sequoia.settings.assumptions.context_discreteness import DiscreteContextAssumption
from sequoia.settings.rl.continual.tasks import TaskSchedule, registry
from sequoia.utils.logging_utils import get_logger
from sequoia.utils.utils import dict_union
from ..continual.setting import ContinualRLSetting
from ..continual.setting import supported_envs as _parent_supported_envs
from .tasks import DiscreteTask, is_supported, make_discrete_task
from .test_environment import DiscreteTaskAgnosticRLTestEnvironment
logger = get_logger(__name__)
supported_envs: Dict[str, EnvSpec] = dict_union(
_parent_supported_envs,
{
spec.id: spec
for env_id, spec in registry.env_specs.items()
if spec.id not in _parent_supported_envs and is_supported(env_id)
},
)
available_datasets: Dict[str, str] = {env_id: env_id for env_id in supported_envs}
from .results import DiscreteTaskAgnosticRLResults
@dataclass
class DiscreteTaskAgnosticRLSetting(DiscreteContextAssumption, ContinualRLSetting):
"""Continual Reinforcement Learning Setting where there are clear task boundaries,
but where the task information isn't available.
"""
# TODO: Update the type or results that we get for this Setting.
Results: ClassVar[Type[Results]] = DiscreteTaskAgnosticRLResults
# The type wrapper used to wrap the test environment, and which produces the
# results.
TestEnvironment: ClassVar[Type[TestEnvironment]] = DiscreteTaskAgnosticRLTestEnvironment
# The function used to create the tasks for the chosen env.
_task_sampling_function: ClassVar[Callable[..., DiscreteTask]] = make_discrete_task
# Class variable that holds the dict of available environments.
available_datasets: ClassVar[Dict[str, Union[str, Any]]] = available_datasets
# Which environment (a.k.a. "dataset") to learn on.
# The dataset could be either a string (env id or a key from the
# available_datasets dict), a gym.Env, or a callable that returns a
# single environment.
dataset: str = choice(available_datasets, default="CartPole-v0")
# The number of "tasks" that will be created for the training, valid and test
# environments. When left unset, will use a default value that makes sense
# (something like 5).
nb_tasks: int = field(5, alias=["n_tasks", "num_tasks"])
# Maximum number of training steps per task.
train_steps_per_task: Optional[int] = None
# Number of test steps per task.
test_steps_per_task: Optional[int] = None
# # Maximum number of episodes in total.
# train_max_episodes: Optional[int] = None
# # TODO: Add tests for this 'max episodes' and 'episodes_per_task'.
# train_max_episodes_per_task: Optional[int] = None
# # Total number of steps in the test loop. (Also acts as the "length" of the testing
# # environment.)
# test_max_steps_per_task: int = 10_000
# test_max_episodes_per_task: Optional[int] = None
# # Max number of steps per training task. When left unset and when `train_max_steps`
# # is set, takes the value of `train_max_steps` divided by `nb_tasks`.
# train_max_steps_per_task: Optional[int] = None
# # (WIP): Maximum number of episodes per training task. When left unset and when
# # `train_max_episodes` is set, takes the value of `train_max_episodes` divided by
# # `nb_tasks`.
# train_max_episodes_per_task: Optional[int] = None
# # Maximum number of steps per task in the test loop. When left unset and when
# # `test_max_steps` is set, takes the value of `test_max_steps` divided by `nb_tasks`.
# test_max_steps_per_task: Optional[int] = None
# # (WIP): Maximum number of episodes per test task. When left unset and when
# # `test_max_episodes` is set, takes the value of `test_max_episodes` divided by
# # `nb_tasks`.
# test_max_episodes_per_task: Optional[int] = None
# def warn(self, warning: Warning):
# logger.warning(warning)
# warnings.warn(warning)
def __post_init__(self):
# TODO: Rework all the messy fields from before by just considering these as eg.
# the maximum number of steps per task, rather than the fixed number of steps
# per task.
assert not self.smooth_task_boundaries
super().__post_init__()
if self.max_episode_steps is None:
if is_monsterkong_env(self.dataset):
self.max_episode_steps = 500
def create_train_task_schedule(self) -> TaskSchedule[DiscreteTask]:
# IDEA: Could convert max_episodes into max_steps if max_steps_per_episode is
# set.
return super().create_train_task_schedule()
def create_val_task_schedule(self) -> TaskSchedule[DiscreteTask]:
# Always the same as train task schedule for now.
return super().create_val_task_schedule()
def create_test_task_schedule(self) -> TaskSchedule[DiscreteTask]:
return super().create_test_task_schedule()
================================================
FILE: sequoia/settings/rl/discrete/setting_test.py
================================================
from dataclasses import fields
from typing import Any, ClassVar, Dict, Optional, Type
import gym
import pytest
from sequoia.common.config import Config
from sequoia.conftest import monsterkong_required, param_requires_monsterkong
from sequoia.methods import Method
from sequoia.settings.assumptions.incremental_test import DummyMethod as _DummyMethod
from sequoia.settings.rl.envs import MetaMonsterKongEnv
from ..continual.setting_test import TestContinualRLSetting as ContinualRLSettingTests
from .setting import DiscreteTaskAgnosticRLSetting
class TestDiscreteTaskAgnosticRLSetting(ContinualRLSettingTests):
Setting: ClassVar[Type[Setting]] = DiscreteTaskAgnosticRLSetting
dataset: pytest.fixture
@pytest.fixture(params=[1, 3])
def nb_tasks(self, request):
n = request.param
return n
@pytest.fixture()
def setting_kwargs(self, dataset: str, nb_tasks: int, config: Config):
"""Fixture used to pass keyword arguments when creating a Setting."""
return {"dataset": dataset, "nb_tasks": nb_tasks, "config": config}
@pytest.mark.parametrize(
"dataset, expected_resulting_name",
[
param_requires_monsterkong("monsterkong", "MetaMonsterKong-v0"),
param_requires_monsterkong("monsterkong-v0", "MetaMonsterKong-v0"),
param_requires_monsterkong("meta_monsterkong", "MetaMonsterKong-v0"),
("cartpole", "CartPole-v1"),
],
)
def test_passing_name_variant_works(self, dataset: str, expected_resulting_name: str):
assert self.Setting(dataset=dataset).dataset == expected_resulting_name
def validate_results(
self,
setting: DiscreteTaskAgnosticRLSetting,
method: Method,
results: DiscreteTaskAgnosticRLSetting.Results,
) -> None:
assert results
assert results.objective
assert len(results.task_results) == setting.nb_tasks
assert [
sum(task_result.metrics) == task_result.average_metrics
for task_result in results.task_results
]
assert (
sum(task_result.average_metrics for task_result in results.task_results)
== results.average_metrics
)
@pytest.mark.parametrize("give_nb_tasks", [True, False])
@pytest.mark.parametrize("give_train_max_steps", [True, False])
@pytest.mark.parametrize(
"give_train_task_schedule, ids_instead_of_steps",
[(True, False), (True, True), (False, False)],
)
@pytest.mark.parametrize(
"nb_tasks, train_max_steps, train_task_schedule",
[
(1, 10_000, {0: {"gravity": 5.0}, 10_000: {"gravity": 10}}),
(
4,
100_000,
{
0: {"gravity": 5.0},
25_000: {"gravity": 10},
50_000: {"gravity": 10},
75_000: {"gravity": 10},
100_000: {"gravity": 20},
},
),
],
)
def test_fields_are_consistent(
self,
nb_tasks: Optional[int],
train_max_steps: Optional[int],
train_task_schedule: Optional[Dict[str, Any]],
give_nb_tasks: bool,
give_train_max_steps: bool,
give_train_task_schedule: bool,
ids_instead_of_steps: bool,
):
# give_nb_tasks = True
# give_max_steps = True
# give_task_schedule = True
defaults = {f.name: f.default for f in fields(self.Setting)}
default_max_train_steps = defaults["train_max_steps"]
default_nb_tasks = defaults["nb_tasks"]
# TODO: Same test for test_max_steps?
full_kwargs = dict(
nb_tasks=nb_tasks,
train_max_steps=train_max_steps,
train_task_schedule=train_task_schedule,
)
# TODO: Should also pass nothing, and expect an error to be raised?
kwargs = full_kwargs.copy()
if not give_nb_tasks:
kwargs.pop("nb_tasks")
if not give_train_max_steps:
kwargs.pop("train_max_steps")
if not give_train_task_schedule:
kwargs.pop("train_task_schedule")
elif ids_instead_of_steps:
kwargs["train_task_schedule"] = {
i: task for i, (step, task) in enumerate(train_task_schedule.items())
}
setting = self.Setting(**kwargs)
assert (
setting.nb_tasks == nb_tasks
if give_nb_tasks
else len(train_task_schedule)
if give_train_task_schedule
else default_nb_tasks
)
assert (
setting.train_max_steps == train_max_steps
if give_train_max_steps
else max(train_task_schedule)
if give_train_task_schedule
else default_max_train_steps
)
assert list(setting.train_task_schedule.keys()) == [
i * (setting.train_max_steps / setting.nb_tasks) for i in range(0, setting.nb_tasks + 1)
]
assert list(setting.val_task_schedule.keys()) == [
i * (setting.train_max_steps / setting.nb_tasks) for i in range(0, setting.nb_tasks + 1)
]
assert list(setting.test_task_schedule.keys()) == [
i * (setting.test_max_steps / setting.nb_tasks) for i in range(0, setting.nb_tasks + 1)
]
# When giving only the number of tasks:
from typing import Any, Dict, Optional
def test_fit_and_on_task_switch_calls(config: Config):
setting = DiscreteTaskAgnosticRLSetting(
dataset="CartPole-v0",
# nb_tasks=5,
# train_steps_per_task=100,
train_max_steps=500,
test_max_steps=500,
# test_steps_per_task=100,
train_transforms=[],
test_transforms=[],
val_transforms=[],
config=config,
)
method = _DummyMethod()
_ = setting.apply(method)
# == 30 task switches in total.
assert method.n_task_switches == 0
assert method.n_fit_calls == 1
assert not method.received_task_ids
assert not method.received_while_training
@monsterkong_required
@pytest.mark.parametrize(
"dataset, expected_env_type",
[
("MetaMonsterKong-v0", MetaMonsterKongEnv),
("monsterkong", MetaMonsterKongEnv),
("PixelMetaMonsterKong-v0", MetaMonsterKongEnv),
("monster_kong", MetaMonsterKongEnv),
("monster_kong", MetaMonsterKongEnv),
# ("halfcheetah", ContinualHalfCheetahEnv),
# ("HalfCheetah-v2", ContinualHalfCheetahV2Env),
# ("HalfCheetah-v3", ContinualHalfCheetahV3Env),
# ("ContinualHalfCheetah-v2", ContinualHalfCheetahV2Env),
# ("ContinualHalfCheetah-v3", ContinualHalfCheetahV3Env),
# ("ContinualHopper-v2", ContinualHopperEnv),
# ("hopper", ContinualHopperEnv),
# ("Hopper-v2", ContinualHopperEnv),
# ("walker2d", ContinualWalker2dV3Env),
# ("Walker2d-v2", ContinualWalker2dV2Env),
# ("Walker2d-v3", ContinualWalker2dV3Env),
# ("ContinualWalker2d-v2", ContinualWalker2dV2Env),
# ("ContinualWalker2d-v3", ContinualWalker2dV3Env),
],
)
def test_monsterkong_env_name_maps_to_continual_variant(
dataset: str, expected_env_type: Type[gym.Env]
):
setting = DiscreteTaskAgnosticRLSetting(
dataset=dataset, train_max_steps=10_000, test_max_steps=10_000
)
train_env = setting.train_dataloader()
assert isinstance(train_env.unwrapped, expected_env_type)
================================================
FILE: sequoia/settings/rl/discrete/tasks.py
================================================
""" Functions that create 'discrete' tasks for an environment.
TODO: Once we have a wrapper that can seamlessly switch from one env to the next, then
move the "incremental" tasks from `incremental/tasks.py` to this level.
"""
import warnings
from functools import partial, singledispatch
from typing import Any, Callable, Dict, List, Optional, Union
import gym
import numpy as np
from sequoia.settings.rl.envs import MONSTERKONG_INSTALLED, MetaMonsterKongEnv, sequoia_registry
from ..continual.tasks import (
ContinuousTask,
_is_supported,
make_continuous_task,
task_sampling_function,
)
DiscreteTask = Union[ContinuousTask, Callable[[gym.Env], Any]]
@task_sampling_function(env_registry=sequoia_registry, based_on=make_continuous_task)
@singledispatch
def make_discrete_task(
env: gym.Env,
*,
step: int,
change_steps: List[int],
seed: int = None,
**kwargs,
) -> DiscreteTask:
"""Generic function used by Sequoia's `DiscreteTaskAgnosticRLSetting` (and its
descendants) to create a "task" that will be applied to an environment like `env`.
To add support for a new type of environment, simply register a handler function:
```
@make_discrete_task.register(SomeGymEnvClass)
def make_discrete_task_for_my_env(env: SomeGymEnvClass, step: int, change_steps: List[int], **kwargs,):
return {"my_attribute": random.random()}
```
"""
raise NotImplementedError(f"Don't currently know how to create a discrete task for env {env}")
# return make_continuous_task(
# env, step=step, change_steps=change_steps, seed=seed, **kwargs
# )
is_supported = partial(_is_supported, _make_task_function=make_discrete_task)
if MONSTERKONG_INSTALLED:
# In MonsterKong the tasks can be changed on-the-fly, whereas they can't in the
# size-based MUJOCO envs.
@make_discrete_task.register
def make_task_for_monsterkong_env(
env: MetaMonsterKongEnv,
step: int,
change_steps: List[int] = None,
seed: int = None,
**kwargs,
) -> Union[Dict[str, Any], Any]:
"""Samples a task for the MonsterKong environment.
TODO: When given a seed, sample the task randomly (but deterministicly) using
the seed.
"""
assert change_steps is not None, "Need task boundaries to construct the task schedule."
if step not in change_steps:
raise RuntimeError(
f"Monsterkong's has discrete tasks, {step} should be in {change_steps}!"
)
task_index = change_steps.index(step)
# TODO: double-check with @mattriemer on this:
n_supported_levels = 30
# IDEA: Could also have a list of supported levels
levels = list(range(n_supported_levels))
nb_tasks = len(change_steps)
rng: Optional[np.random.Generator] = None
if seed is not None:
# perform a deterministic shuffling of the 'task ids'
rng = np.random.default_rng(seed)
rng.shuffle(levels)
level: int
if task_index >= n_supported_levels:
warnings.warn(
RuntimeWarning(
f"The given task id ({task_index}) is greater than the number of "
f"levels currently available in MonsterKong "
f"({n_supported_levels})!\n"
f"Multiple tasks may therefore use the same level!"
)
)
# Option 1: Loop back around, using the same task as the first task?
# (Probably not a good idea, since then we might get to train on the first
# tasks right before testing begins! (which isnt great as a CL evaluation)
# task_index %= n_supported_levels
# Option 2 (better): Sample levels at random after all other levels have been
# exhausted.
# NOTE: Other calls to this should not get the same value!
rng = rng or np.random.default_rng(seed)
random_extra_levels = rng.integers(
0, n_supported_levels, size=nb_tasks - n_supported_levels
)
level = int(random_extra_levels[task_index - n_supported_levels])
else:
level = levels[task_index]
return {"level": level}
================================================
FILE: sequoia/settings/rl/discrete/tasks_test.py
================================================
import pytest
from sequoia.conftest import monsterkong_required
from sequoia.settings.rl.envs import MetaMonsterKongEnv
from .tasks import make_discrete_task
@monsterkong_required
def test_monsterkong_tasks():
# assert make_discrete_task.is_supported(MetaMonsterKongEnv)
task = make_discrete_task(MetaMonsterKongEnv, step=0, change_steps=[0, 100, 200])
assert task == {"level": 0}
task = make_discrete_task(MetaMonsterKongEnv, step=100, change_steps=[0, 100, 200])
assert task == {"level": 1}
with pytest.raises(RuntimeError):
_ = make_discrete_task(MetaMonsterKongEnv, step=123, change_steps=[0, 100, 200])
================================================
FILE: sequoia/settings/rl/discrete/test_environment.py
================================================
import itertools
import math
from typing import Dict
from sequoia.common.metrics.rl_metrics import EpisodeMetrics
from sequoia.settings.assumptions.discrete_results import TaskSequenceResults
from sequoia.settings.assumptions.iid_results import TaskResults
from ..continual.test_environment import ContinualRLTestEnvironment
class DiscreteTaskAgnosticRLTestEnvironment(ContinualRLTestEnvironment):
def __init__(self, *args, task_schedule: Dict, **kwargs):
super().__init__(*args, task_schedule=task_schedule, **kwargs)
self.task_schedule = task_schedule
self.boundary_steps = [step // (self.batch_size or 1) for step in self.task_schedule.keys()]
# TODO: Removing the last entry since it's the terminal state.
self.boundary_steps.pop(-1)
def __len__(self):
return math.ceil(self.step_limit / (getattr(self.env, "batch_size", 1) or 1))
def get_results(self) -> TaskSequenceResults[EpisodeMetrics]:
# TODO: Place the metrics in the right 'bin' at the end of each episode during
# testing depending on the task at that time, rather than what's happening here,
# where we're getting all the rewards and episode lengths at the end and then
# sort it out into the bins based on the task schedule. ALSO: this would make it
# easier to support monitoring batched RL environments, since these `Monitor`
# methods (get_episode_rewards, get_episode_lengths, etc) assume the environment
# isn't batched.
rewards = self.get_episode_rewards()
lengths = self.get_episode_lengths()
task_schedule: Dict[int, Dict] = self.task_schedule
task_steps = sorted(task_schedule.keys())
# TODO: Removing the last entry since it's the terminal state.
task_steps.pop(-1)
assert 0 in task_steps
import bisect
nb_tasks = len(task_steps)
assert nb_tasks >= 1
test_results = TaskSequenceResults([TaskResults() for _ in range(nb_tasks)])
# TODO: Fix this, since the task id might not be related to the steps!
for step, episode_reward, episode_length in zip(
itertools.accumulate(lengths), rewards, lengths
):
# Given the step, find the task id.
task_id = bisect.bisect_right(task_steps, step) - 1
episode_metric = EpisodeMetrics(
n_samples=1,
mean_episode_reward=episode_reward,
mean_episode_length=episode_length,
)
test_results.task_results[task_id].metrics.append(episode_metric)
return test_results
def render(self, mode="human", **kwargs):
# TODO: This might not be setup right. Need to check.
image_batch = super().render(mode=mode, **kwargs)
if mode == "rgb_array" and self.batch_size:
return tile_images(image_batch)
return image_batch
def _after_reset(self, observation):
# Is this going to work fine when the observations are batched though?
return super()._after_reset(observation)
================================================
FILE: sequoia/settings/rl/environment.py
================================================
from typing import *
from torch.utils.data import DataLoader, Dataset, IterableDataset
from sequoia.settings.base.environment import ActionType, Environment, ObservationType, RewardType
from sequoia.utils.logging_utils import get_logger
logger = get_logger(__name__)
from typing_extensions import Final
from .objects import ActionType, ObservationType, RewardType
# TODO: Instead of using a 'y' field for both the supervised learning labels/target and
# for the reward in RL, instead use a 'reward' field in RL, and a 'y' field in SL, where
# in SL the reward could actually be wether the chosen action was correct or not, and
# 'y' could contain the correct prediction for each action.
class RLEnvironment(DataLoader, Environment[ObservationType, ActionType, RewardType]):
"""Environment in an RL Setting.
Extends DataLoader to support sending back actions to the 'dataset'.
This could be useful for modeling RL or Active Learning, for instance, where
the predictions (actions) have an impact on the data generation process.
TODO: Not really used at the moment besides as the base class for the GymDataLoader.
TODO: Maybe add a custom `map` class for generators?
Iterating through an RL Environment is different than when iterating on an SL
environment:
- Batches only contain the observations, rather than (observations, rewards)
- The rewards are given back after an action is sent to the environment using
`send`.
TODO: maybe change this class into something like a `FakeActiveEnvironment`.
"""
actions_influence_future_observations: Final[bool] = True
def __init__(self, dataset: Union[Dataset, IterableDataset], **dataloader_kwargs):
super().__init__(dataset, **dataloader_kwargs)
self.observation: ObservationType = None
self.action: ActionType = None
self.reward: RewardType = None
# def __next__(self) -> ObservationType:
# return self.observation
def send(self, action: ActionType) -> RewardType:
"""Sends an action to the 'dataset'/'Environment'.
Does nothing when the environment is a simple Dataset (when it isn't an
instance of EnvironmentBase).
TODO: Figure out the interactions with num_workers and send, if any.
"""
self.action = action
if hasattr(self.dataset, "send"):
self.reward = self.dataset.send(self.action)
# TODO: Clean this up, this is taken care of in the GymDataLoader class.
# if hasattr(self.dataset, "step"):
# self.observation, self.reward, self.done, self.info = self.dataset.step(self.action)
else:
assert (
False
), "TODO: ActiveDataloader dataset should always have a `send` attribute for now."
return self.reward
# Deprecated names for the same thing:
ActiveDataLoader = RLEnvironment
ActiveEnvironment = RLEnvironment
================================================
FILE: sequoia/settings/rl/environment_test.py
================================================
from typing import Generator
from torch import Tensor
from torchvision.datasets import MNIST
from sequoia.utils.logging_utils import log_calls
from .environment import ActiveEnvironment
class ActiveMnistEnvironment(ActiveEnvironment[Tensor, Tensor, Tensor]):
"""An Mnist environment which will keep showing the same class until a
correct prediction is made, and then switch to another class.
Which will keep giving the same class until the right prediction is made.
"""
def __init__(self, start_class: int = 0, **kwargs):
self.current_class: int = 0
dataset = MNIST("data")
super().__init__(dataset, batch_size=None, **kwargs)
self.observation: Tensor = None
self.reward: Tensor = None
self.action: Tensor = None
@log_calls
def __next__(self) -> Tensor:
for x, y in self.dataset:
# keep iterating while the example isn't of the right type.
if y == self.current_class:
self.observation = x
self.reward = y
break
print(f"next obs: {self.observation}, next reward = {self.reward}")
return self.observation
@log_calls
def __iter__(self) -> Generator[Tensor, Tensor, None]:
while True:
action = yield next(self)
if action is not None:
logger.debug(f"Received an action of {action} while iterating..")
self.reward = self.send(action)
@log_calls
def send(self, action: Tensor) -> Tensor:
print(f"received action {action}, returning current label {self.reward}")
self.action = action
if action == self.current_class:
print("Switching classes since the prediction was right!")
self.current_class += 1
self.current_class %= 10
else:
print("Prediction was wrong, staying on the same class.")
return self.reward
def test_active_mnist_environment():
"""Test the active mnist env, which will keep giving the same class until the right prediction is made."""
env = ActiveMnistEnvironment()
# So in this test, the env will only give samples of class 0, until a correct
# prediction is made, then it will switch to giving samples of class 1, etc.
# what the current class is (just for testing)
_current_class = 0
# first loop, where we always predict the right label.
for i, x in enumerate(env):
print(f"x: {x}")
y_pred = i % 10
print(f"Sending prediction of {y_pred}")
y_true = env.send(y_pred)
print(f"Received back {y_true}")
assert y_pred == y_true
if i == 9:
break
# current class should be 0 as last prediction was 9 and correct.
_current_class = 0
# Second loop, where we always predict the wrong label.
for i, x in enumerate(env):
print(f"x: {x}")
y_pred = 1
y_true = env.send(y_pred)
assert y_true == 0
if i > 2:
break
x = next(env)
y_pred = 0
y_true = env.send(y_pred)
assert y_true == 0
x = next(env)
y_true = env.send(1)
assert y_true == 1
================================================
FILE: sequoia/settings/rl/envs/__init__.py
================================================
import copy
import json
from abc import ABC
from contextlib import redirect_stdout
from io import StringIO
from pathlib import Path
from typing import Dict, List, Type, Union
import gym
from gym.envs.registration import EnvSpec, registry
from sequoia.utils import get_logger
logger = get_logger(__name__)
# IDEA: Modify a copy of the gym registry?
# sequoia_registry = copy.deepcopy(registry)
sequoia_registry = registry
from .classic_control import PixelObservationWrapper, register_classic_control_variants
from .variant_spec import EnvVariantSpec
register_classic_control_variants(sequoia_registry)
ATARI_PY_INSTALLED = False
try:
from ale_py.gym.environment import ALGymEnv
AtariEnv = ALGymEnv
ATARI_PY_INSTALLED = True
except (gym.error.DependencyNotInstalled, ImportError):
class AtariEnv(gym.Env):
pass
MONSTERKONG_INSTALLED = False
try:
# Redirecting stdout because this import prints stuff.
from .monsterkong import MetaMonsterKongEnv, register_monsterkong_variants
register_monsterkong_variants(sequoia_registry)
MONSTERKONG_INSTALLED = True
except ImportError:
class MetaMonsterKongEnv(gym.Env):
pass
MTENV_INSTALLED = False
mtenv_envs = []
try:
from mtenv import MTEnv
from mtenv.envs.registration import mtenv_registry
mtenv_envs = [env_spec.id for env_spec in mtenv_registry.all()]
MTENV_INSTALLED = True
except ImportError:
# Create a 'dummy' class so we can safely use MTEnv in the type hints below.
# Additionally, isinstance(some_env, MTEnv) will always fail when mtenv isn't
# installed, which is good.
class MTEnv(gym.Env):
pass
MUJOCO_INSTALLED = False
try:
import mujoco_py
mj_path, _ = mujoco_py.utils.discover_mujoco()
from gym.envs.mujoco import MujocoEnv
from .mujoco import (
ContinualHalfCheetahEnv,
ContinualHalfCheetahV2Env,
ContinualHalfCheetahV3Env,
ContinualHopperEnv,
ContinualHopperV2Env,
ContinualHopperV3Env,
ContinualWalker2dEnv,
ContinualWalker2dV2Env,
ContinualWalker2dV3Env,
register_mujoco_variants,
)
register_mujoco_variants(env_registry=sequoia_registry)
MUJOCO_INSTALLED = True
except (
ImportError,
AttributeError,
ValueError,
gym.error.DependencyNotInstalled,
) as exc:
logger.debug(f"Couldn't import mujoco: ({exc})")
# Create a 'dummy' class so we can safely use type hints everywhere.
# Additionally, `isinstance(some_env, )`` will always fail when the
# dependency isn't installed, which is good.
class MujocoEnv(gym.Env):
pass
class ContinualHalfCheetahEnv(MujocoEnv):
pass
class ContinualHalfCheetahV2Env(MujocoEnv):
pass
class ContinualHalfCheetahV3Env(MujocoEnv):
pass
class ContinualHopperEnv(MujocoEnv):
pass
class ContinualHopperV2Env(MujocoEnv):
pass
class ContinualHopperV3Env(MujocoEnv):
pass
class ContinualWalker2dEnv(MujocoEnv):
pass
class ContinualWalker2dV2Env(MujocoEnv):
pass
class ContinualWalker2dV3Env(MujocoEnv):
pass
METAWORLD_INSTALLED = False
metaworld_envs: List[Type[gym.Env]] = []
try:
if not MUJOCO_INSTALLED:
# Skip the stuff below, since metaworld requires mujoco anyway.
raise ImportError
import metaworld
from metaworld import MetaWorldEnv
# TODO: Use mujoco from metaworld? or from mujoco_py?
from metaworld.envs.mujoco.mujoco_env import MujocoEnv as MetaWorldMujocoEnv
from metaworld.envs.mujoco.sawyer_xyz.sawyer_xyz_env import SawyerXYZEnv
# from metaworld.envs.mujoco.mujoco_env import MujocoEnv
METAWORLD_INSTALLED = True
# metaworld_dir = getsourcefile(metaworld)
# mujoco_dir = Path("~/.mujoco").expanduser()
# TODO: Cache the names of the metaworld envs to a file, just so we don't take about
# 10 seconds to import metaworld every time?
# TODO: Make sure this also works on a cluster.
# TODO: When updating metaworld, need to remove this file.
envs_cache_file = Path("temp/metaworld_envs.json")
envs_cache_file.parent.mkdir(exist_ok=True)
all_metaworld_envs: Dict[str, List[str]] = {}
if envs_cache_file.exists():
with open(envs_cache_file, "r") as f:
all_metaworld_envs = json.load(f)
else:
print(
"Loading up the list of available envs from metaworld for the first time, "
"this might take a while (usually ~10 seconds)."
)
if "ML10" not in all_metaworld_envs:
ML10_envs = list(metaworld.ML10().train_classes.keys())
all_metaworld_envs["ML10"] = ML10_envs
with open(envs_cache_file, "w") as f:
json.dump(all_metaworld_envs, f)
metaworld_envs = sum([list(envs) for envs in all_metaworld_envs.values()], [])
except (ImportError, AttributeError, gym.error.DependencyNotInstalled) as e:
logger.debug(f"Unable to import metaworld: {e}")
# raise e
if not METAWORLD_INSTALLED:
# Create a 'dummy' class so we can safely use MetaWorldEnv in the type hints below.
# Additionally, isinstance(some_env, MetaWorldEnv) will always fail when metaworld
# isn't installed, which is good.
class MetaWorldEnv(gym.Env, ABC):
pass
class MetaWorldMujocoEnv(gym.Env, ABC):
pass
class SawyerXYZEnv(gym.Env, ABC):
pass
================================================
FILE: sequoia/settings/rl/envs/classic_control.py
================================================
""" Registers variants of the classic-control envs that are used by sequoia. """
# TODO: Add Pixel???-v? variants for the classic-control envs.
from typing import Dict
from gym.envs.registration import EnvRegistry, EnvSpec, registry
from sequoia.common.gym_wrappers.pixel_observation import PixelObservationWrapper
from .variant_spec import EnvVariantSpec
def register_classic_control_variants(env_registry: EnvRegistry = registry) -> None:
"""Adds pixel variants for the classic-control envs to the given registry in-place."""
classic_control_env_specs: Dict[str, EnvSpec] = {
spec.id: spec
for env_id, spec in env_registry.env_specs.items()
if isinstance(spec.entry_point, str)
and spec.entry_point.startswith("gym.envs.classic_control")
}
for env_id, env_spec in classic_control_env_specs.items():
new_id = "Pixel" + env_id
if new_id not in env_registry.env_specs:
new_spec = EnvVariantSpec.of(
env_spec, new_id=new_id, wrappers=[PixelObservationWrapper]
)
env_registry.env_specs[new_id] = new_spec
================================================
FILE: sequoia/settings/rl/envs/monsterkong.py
================================================
from contextlib import redirect_stdout
from io import StringIO
import numpy as np
from gym import spaces
from gym.envs.registration import EnvRegistry, EnvSpec, registry
# Avoid print statements from pygame package.
with redirect_stdout(StringIO()):
from meta_monsterkong.make_env import MetaMonsterKongEnv
from .variant_spec import EnvVariantSpec
def observe_state(env: MetaMonsterKongEnv) -> MetaMonsterKongEnv:
if not env.observe_state:
env.unwrapped.observe_state = True
env.unwrapped.observation_space = spaces.Box(
0,
292,
[
402,
],
np.int16,
)
return env
def register_monsterkong_variants(env_registry: EnvRegistry = registry) -> None:
for env_id in ["MetaMonsterKong-v0", "MetaMonsterKong-v1"]:
spec: EnvSpec = env_registry.spec(env_id)
# Add an explicit 'State' variant of the envs.
new_env_id = "State" + env_id
new_spec = EnvVariantSpec.of(
spec,
new_id=new_env_id,
new_max_episode_steps=500,
new_kwargs={"observe_state": True},
)
if new_env_id not in env_registry.env_specs:
env_registry.env_specs[new_env_id] = new_spec
# Add an explicit 'Pixel' variant of the envs (even though by default we currently
# always observe the state).
new_env_id = "Pixel" + env_id
new_spec = EnvVariantSpec.of(
spec,
new_id=new_env_id,
new_max_episode_steps=500,
new_kwargs={"observe_state": False},
)
if new_env_id not in env_registry.env_specs:
env_registry.env_specs[new_env_id] = new_spec
================================================
FILE: sequoia/settings/rl/envs/mujoco/__init__.py
================================================
""" CL environments based on the mujoco envs.
NOTE: This is based on https://github.com/Breakend/gym-extensions
"""
# from sequoia.conftest import mujoco_required
# pytestmark = mujoco_required
import os
from pathlib import Path
from typing import Callable, Dict, List, Type, Union
import gym
from gym.envs import register
from gym.envs.mujoco import MujocoEnv
from gym.envs.mujoco.half_cheetah_v3 import HalfCheetahEnv
from gym.envs.registration import EnvRegistry, EnvSpec, load, registry
from sequoia.utils.logging_utils import get_logger
from ..variant_spec import EnvVariantSpec
from .half_cheetah import (
ContinualHalfCheetahV2Env,
ContinualHalfCheetahV3Env,
HalfCheetahV2Env,
HalfCheetahV3Env,
)
from .hopper import ContinualHopperV2Env, ContinualHopperV3Env, HopperV2Env, HopperV3Env
from .modified_gravity import ModifiedGravityEnv
from .modified_size import ModifiedSizeEnv
from .walker2d import ContinualWalker2dV2Env, ContinualWalker2dV3Env, Walker2dV2Env, Walker2dV3Env
logger = get_logger(__name__)
# NOTE: Prefer the 'V3' variants
# HalfCheetahEnv = HalfCheetahV3Env
# Walker2dEnv = Walker2dV3Env
ContinualHalfCheetahEnv = ContinualHalfCheetahV3Env
ContinualHopperEnv = ContinualHopperV3Env
ContinualWalker2dEnv = ContinualWalker2dV3Env
SOURCE_DIR = Path(os.path.dirname(os.path.abspath(__file__)))
__all__ = [
"ContinualHalfCheetahEnv",
"ContinualHalfCheetahV2Env",
"ContinualHalfCheetahV3Env",
"ContinualHopperV2Env",
"ContinualHopperV3Env",
"ContinualWalker2dEnv",
"ContinualWalker2dV2Env",
"ContinualWalker2dV3Env",
"ModifiedGravityEnv",
"ModifiedSizeEnv",
"MujocoEnv",
]
def get_entry_point(Env: Type[gym.Env]) -> str:
# TODO: Make sure this also works when Sequoia is installed in non-editable mode.
return f"{Env.__module__}:{Env.__name__}"
# The list of mujoco envs which we explicitly have support for.
# TODO: Should probably use a Wrapper rather than a new base class (at least for the
# GravityEnv and the modifications that can be made to an already-instantiated env.
# NOTE: Using the same version tag as the
CURRENTLY_SUPPORTED_MUJOCO_ENVS: Dict[str, Type[MujocoEnv]] = {
"HalfCheetah-v2": ContinualHalfCheetahV2Env,
"HalfCheetah-v3": ContinualHalfCheetahV3Env,
"Hopper-v2": ContinualHopperV2Env,
"Hopper-v3": ContinualHopperV3Env,
"Walker2d-v2": ContinualWalker2dV2Env,
"Walker2d-v3": ContinualWalker2dV3Env,
}
# TODO: Register the 'continual' variants automatically by finding the entries in the
# registry that can be wrapped, and wrapping them.
# IDEA: Actually swap out the entries for these envs, rather than overwrite them?
def register_mujoco_variants(env_registry: EnvRegistry = registry) -> None:
"""Adds pixel variants for the classic-control envs to the given registry in-place."""
# Dict from the env id to the original spec
original_mujoco_env_specs: Dict[str, EnvSpec] = {
original_env_id: env_registry.spec(original_env_id)
for original_env_id in CURRENTLY_SUPPORTED_MUJOCO_ENVS
}
# Dict from the
# TODO: Add broader support for mujoco envs
new_entry_points = CURRENTLY_SUPPORTED_MUJOCO_ENVS
# NOTE: Currently we do two things: Register a new spec with a different name, like
# `ContinualWalker2d-v2`, as well as 'overwrite' the entry-point of the original
# spec ("Walker2d-v2") to point to our custom subclass (ContinualWalker2dV2Env)
prefixes = ["Continual", ""]
# NOTE: It could actually make more sense to only register our variants, and
# then have the Setting map one to the other intelligently, but it causes a bit more
# trouble
# prefixes = ["Continual"]
for prefix in prefixes:
for env_id, original_env_spec in original_mujoco_env_specs.items():
# TODO: Use the same ID, or a different one?
new_id = prefix + env_id
if (new_id not in env_registry.env_specs or new_id == env_id) and not isinstance(
original_env_spec, EnvVariantSpec
):
new_spec = EnvVariantSpec.of(
original=original_env_spec,
new_id=new_id,
new_entry_point=new_entry_points[env_id],
)
env_registry.env_specs[new_id] = new_spec
if new_id != env_id:
logger.debug(
f"Registering MuJoCO Environment variant of {env_id} at id {new_id}."
)
else:
logger.debug(f"Overwriting the existing EnvSpec at id {env_id}")
# Replace the entry-point for these mujoco envs.
# IMPORTANT: This doesn't change anything about the envs, apart from making it possible
# to explicitly change the gravity or mass etc if you want.
# TODO: Should probably still only modify a custom/copied registry, so that importing
# Sequoia doesn't modify the gym registry when Sequoia isn't being used explicitly.
# registry.env_specs["HalfCheetah-v2"].entry_point = ContinualHalfCheetahV2Env
# registry.env_specs["HalfCheetah-v3"].entry_point = ContinualHalfCheetahV3Env
# registry.env_specs["Hopper-v2"].entry_point = ContinualHopperEnv
# registry.env_specs["Walker2d-v2"].entry_point = ContinualWalker2dEnv
# EnvSpec(
# "HalfCheetah-v2",
# entry_point=get_entry_point(Continu),
# reward_threshold=None,
# nondeterministic=False,
# max_episode_steps=None,
# kwargs=None,
# )
# gym.envs.register(
# id="ContinualHalfCheetah-v2",
# entry_point=get_entry_point(ContinualHalfCheetahV2Env),
# max_episode_steps=1000,
# reward_threshold=4800.0,
# )
# gym.envs.register(
# id="ContinualHalfCheetah-v3",
# entry_point=get_entry_point(ContinualHalfCheetahV3Env),
# max_episode_steps=1000,
# reward_threshold=4800.0,
# )
# gym.envs.register(
# id="ContinualHopper-v2",
# entry_point=get_entry_point(ContinualHopperEnv),
# max_episode_steps=1000,
# reward_threshold=4800.0,
# )
# gym.envs.register(
# id="ContinualWalker2d-v3",
# entry_point=get_entry_point(ContinualWalker2dEnv),
# max_episode_steps=1000,
# reward_threshold=4800.0,
# )
================================================
FILE: sequoia/settings/rl/envs/mujoco/half_cheetah.py
================================================
from typing import ClassVar, Dict, List
import numpy as np
from gym.envs.mujoco import MujocoEnv
from gym.envs.mujoco.half_cheetah import HalfCheetahEnv as _HalfCheetahV2Env
# TODO: Use HalfCheetah-v3 instead, which allows explicitly to change the model file!
from gym.envs.mujoco.half_cheetah_v3 import HalfCheetahEnv as _HalfCheetahV3Env
from .modified_gravity import ModifiedGravityEnv
from .modified_mass import ModifiedMassEnv
from .modified_size import ModifiedSizeEnv
class HalfCheetahV2Env(_HalfCheetahV2Env):
"""
Simply allows changing of XML file, probably not necessary if we pull request the
xml name as a kwarg in openai gym
"""
BODY_NAMES: ClassVar[List[str]] = [
"torso",
"bthigh",
"bshin",
"bfoot",
"fthigh",
"fshin",
"ffoot",
]
def __init__(self, model_path: str = "half_cheetah.xml", frame_skip: int = 5):
MujocoEnv.__init__(self, model_path=model_path, frame_skip=frame_skip)
# Q: Why isn't HalfCheetahV3 based on HalfCheetahV2 in gym ?!
class HalfCheetahV3Env(_HalfCheetahV3Env):
BODY_NAMES: ClassVar[List[str]] = [
"torso",
"bthigh",
"bshin",
"bfoot",
"fthigh",
"fshin",
"ffoot",
]
def __init__(
self,
model_path="half_cheetah.xml",
forward_reward_weight: float = 1.0,
ctrl_cost_weight: float = 0.1,
reset_noise_scale: float = 0.1,
exclude_current_positions_from_observation: bool = True,
xml_file: str = None,
frame_skip: int = 5,
):
if frame_skip != 5:
raise NotImplementedError("todo: Add a frame_skip arg to the gym class.")
super().__init__(
xml_file=xml_file or model_path,
forward_reward_weight=forward_reward_weight,
ctrl_cost_weight=ctrl_cost_weight,
reset_noise_scale=reset_noise_scale,
exclude_current_positions_from_observation=exclude_current_positions_from_observation,
)
# class HalfCheetahGravityEnv(ModifiedGravityEnv, HalfCheetahEnv):
# # NOTE: This environment could be used in ContinualRL!
# def __init__(
# self,
# model_path: str = "half_cheetah.xml",
# frame_skip: int = 5,
# gravity: float = -9.81,
# ):
# super().__init__(model_path=model_path, frame_skip=frame_skip, gravity=gravity)
class HalfCheetahWithSensorEnv(HalfCheetahV2Env):
"""NOTE: unused for now.
Adds empty sensor readouts, this is to be used when transfering to WallEnvs where we
get sensor readouts with distances to the wall
"""
def __init__(self, model_path: str, frame_skip: int = 5, n_bins: int = 10):
super().__init__(model_path=model_path, frame_skip=frame_skip)
self.n_bins = n_bins
def _get_obs(self):
obs = np.concatenate(
[
super()._get_obs(),
np.zeros(self.n_bins), # NOTE: @lebrice HUH? what's the point of doing this?
# goal_readings
]
)
return obs
# TODO: Rename these base classes to 'ModifyGravityMixin', 'ModifySizeMixin', etc.
class ContinualHalfCheetahV2Env(
ModifiedGravityEnv, ModifiedSizeEnv, ModifiedMassEnv, HalfCheetahV2Env
):
def __init__(
self,
model_path: str = "half_cheetah.xml",
frame_skip: int = 5,
gravity=-9.81,
body_name_to_size_scale: Dict[str, float] = None,
body_name_to_mass_scale: Dict[str, float] = None,
):
super().__init__(
model_path=model_path,
frame_skip=frame_skip,
gravity=gravity,
body_name_to_size_scale=body_name_to_size_scale,
body_name_to_mass_scale=body_name_to_mass_scale,
)
class ContinualHalfCheetahV3Env(
ModifiedGravityEnv, ModifiedSizeEnv, ModifiedMassEnv, HalfCheetahV3Env
):
def __init__(
self,
model_path: str = "half_cheetah.xml",
frame_skip: int = 5,
forward_reward_weight: float = 1.0,
ctrl_cost_weight: float = 0.1,
reset_noise_scale: float = 0.1,
exclude_current_positions_from_observation: bool = True,
gravity=-9.81,
body_name_to_size_scale: Dict[str, float] = None,
body_name_to_mass_scale: Dict[str, float] = None,
xml_file: str = None,
):
super().__init__(
model_path=xml_file or model_path,
frame_skip=frame_skip,
forward_reward_weight=forward_reward_weight,
ctrl_cost_weight=ctrl_cost_weight,
reset_noise_scale=reset_noise_scale,
exclude_current_positions_from_observation=exclude_current_positions_from_observation,
gravity=gravity,
body_name_to_size_scale=body_name_to_size_scale,
body_name_to_mass_scale=body_name_to_mass_scale,
)
================================================
FILE: sequoia/settings/rl/envs/mujoco/half_cheetah_test.py
================================================
from typing import ClassVar, Type
from sequoia.conftest import mujoco_required
pytestmark = mujoco_required
from .half_cheetah import ContinualHalfCheetahV2Env, ContinualHalfCheetahV3Env
from .modified_gravity_test import ModifiedGravityEnvTests
from .modified_mass_test import ModifiedMassEnvTests
from .modified_size_test import ModifiedSizeEnvTests
@mujoco_required
class TestHalfCheetahV2(ModifiedGravityEnvTests, ModifiedSizeEnvTests, ModifiedMassEnvTests):
Environment: ClassVar[Type[ContinualHalfCheetahV2Env]] = ContinualHalfCheetahV2Env
@mujoco_required
class TestHalfCheetahV3(ModifiedGravityEnvTests, ModifiedSizeEnvTests, ModifiedMassEnvTests):
Environment: ClassVar[Type[ContinualHalfCheetahV3Env]] = ContinualHalfCheetahV3Env
================================================
FILE: sequoia/settings/rl/envs/mujoco/hopper.py
================================================
# TODO: Should we use HopperV3 instead?
from typing import ClassVar, Dict, List, Tuple
from gym.envs.mujoco import MujocoEnv
from gym.envs.mujoco.hopper import HopperEnv as _HopperV2Env
# TODO: Use HalfCheetah-v3 instead, which allows explicitly to change the model file!
from gym.envs.mujoco.hopper_v3 import HopperEnv as _HopperV3Env
from .modified_gravity import ModifiedGravityEnv
from .modified_mass import ModifiedMassEnv
from .modified_size import ModifiedSizeEnv
# NOTE: Removed the `utils.EzPickle` base class (since it wasn't being passed any kwargs
# (and therefore wasn't saving any of the 'state') anyway.
class HopperV2Env(_HopperV2Env):
"""
Simply allows changing of XML file, probably not necessary if we pull request the
xml name as a kwarg in openai gym
"""
BODY_NAMES: ClassVar[List[str]] = ["torso", "thigh", "leg", "foot"]
def __init__(self, model_path: str = "hopper.xml", frame_skip: int = 4):
MujocoEnv.__init__(self, model_path=model_path, frame_skip=frame_skip)
# utils.EzPickle.__init__(self)
class HopperV3Env(_HopperV3Env):
BODY_NAMES: ClassVar[List[str]] = ["torso", "thigh", "leg", "foot"]
def __init__(
self,
model_path="hopper.xml",
forward_reward_weight: float = 1.0,
ctrl_cost_weight: float = 1e-3,
healthy_reward: float = 1.0,
terminate_when_unhealthy: bool = True,
healthy_state_range: Tuple[float, float] = (-100.0, 100.0),
healthy_z_range: Tuple[float, float] = (0.7, float("inf")),
healthy_angle_range: Tuple[float, float] = (-0.2, 0.2),
reset_noise_scale: float = 5e-3,
exclude_current_positions_from_observation: bool = True,
xml_file: str = None,
frame_skip: int = 4,
):
if frame_skip != 4:
raise NotImplementedError("todo: Add a frame_skip arg to the gym class.")
super().__init__(
xml_file=xml_file or model_path,
forward_reward_weight=forward_reward_weight,
ctrl_cost_weight=ctrl_cost_weight,
healthy_reward=healthy_reward,
terminate_when_unhealthy=terminate_when_unhealthy,
healthy_state_range=healthy_state_range,
healthy_z_range=healthy_z_range,
healthy_angle_range=healthy_angle_range,
reset_noise_scale=reset_noise_scale,
exclude_current_positions_from_observation=exclude_current_positions_from_observation,
)
class HopperV2GravityEnv(ModifiedGravityEnv, HopperV2Env):
# NOTE: This environment could be used in ContinualRL!
def __init__(
self,
model_path: str = "hopper.xml",
frame_skip: int = 4,
gravity: float = -9.81,
):
super().__init__(model_path=model_path, frame_skip=frame_skip, gravity=gravity)
class ContinualHopperV2Env(ModifiedGravityEnv, ModifiedSizeEnv, ModifiedMassEnv, HopperV2Env):
def __init__(
self,
model_path: str = "hopper.xml",
frame_skip: int = 4,
gravity=-9.81,
body_name_to_size_scale: Dict[str, float] = None,
body_name_to_mass_scale: Dict[str, float] = None,
):
super().__init__(
model_path=model_path,
frame_skip=frame_skip,
gravity=gravity,
body_name_to_size_scale=body_name_to_size_scale,
body_name_to_mass_scale=body_name_to_mass_scale,
)
class ContinualHopperV3Env(ModifiedGravityEnv, ModifiedSizeEnv, ModifiedMassEnv, HopperV3Env):
def __init__(
self,
model_path="hopper.xml",
forward_reward_weight: float = 1.0,
ctrl_cost_weight: float = 1e-3,
healthy_reward: float = 1.0,
terminate_when_unhealthy: bool = True,
healthy_state_range: Tuple[float, float] = (-100.0, 100.0),
healthy_z_range: Tuple[float, float] = (0.7, float("inf")),
healthy_angle_range: Tuple[float, float] = (-0.2, 0.2),
reset_noise_scale: float = 5e-3,
exclude_current_positions_from_observation: bool = True,
# xml_file: str = None,
frame_skip: int = 4,
gravity=-9.81,
body_name_to_size_scale: Dict[str, float] = None,
body_name_to_mass_scale: Dict[str, float] = None,
):
super().__init__(
model_path=model_path,
frame_skip=frame_skip,
# xml_file=xml_file or model_path,
forward_reward_weight=forward_reward_weight,
ctrl_cost_weight=ctrl_cost_weight,
healthy_reward=healthy_reward,
terminate_when_unhealthy=terminate_when_unhealthy,
healthy_state_range=healthy_state_range,
healthy_z_range=healthy_z_range,
healthy_angle_range=healthy_angle_range,
reset_noise_scale=reset_noise_scale,
exclude_current_positions_from_observation=exclude_current_positions_from_observation,
gravity=gravity,
body_name_to_size_scale=body_name_to_size_scale,
body_name_to_mass_scale=body_name_to_mass_scale,
)
# ------------- NOTE (@lebrice) -------------------------------
# Everything below this is unused.
# The idea was to do some kind of inverse-kinematics-ish math to fix the placement of the joints
# when the size of one of the parts of the model is changed.
#
# from typing import Dict
# def get_parent(tree: ElementTree, node: Element) -> Element:
# parent_map: Dict[Element, Element] = {c: p for p in tree.iter() for c in p}
# return parent_map[node]
# def update_world(
# tree: ElementTree,
# world_body: Element,
# new_torso_max: Pos,
# size_scaling_factor: float = 1.0,
# **kwargs,
# ) -> None:
# """propagate the changes from the body to the world, if need be."""
# # TODO: Maybe move the camera etc?
# def update_torso(
# tree: ElementTree = None,
# torso_body: Element = None,
# new_torso_min: Pos = None,
# size_scaling_factor: float = 1.0,
# geom_suffix="torso_geom",
# **kwargs,
# ) -> None:
# """'move' the torso body and its endpoints, after another bodypart has been
# scaled.
# This moves all relevant geoms and
# joints and bodies,
# Normally, this can update the
# (through possibly recursive calls to one of `update_torso`,
# `update_thigh`, `update_leg`, `update_foot`.)
# """
# assert size_scaling_factor != 0.0
# body_name = "torso"
# # Get the elements to be modified.
# if torso_body is None:
# assert tree is not None, "need the tree if torso_body is not given!"
# if isinstance(tree, Element) and tree.tag == "body" and tree.get("name") == body_name:
# torso_body = tree
# tree = None
# else:
# torso_body = tree.find(f".//body[@name='{body_name}']")
# assert torso_body is not None, "can't find the torso body!"
# torso_geom = torso_body.find(f"./geom[@name='{body_name}']")
# if torso_geom is None:
# torso_geom = torso_body.find(f"./geom[@name='{body_name}_geom']")
# if torso_geom is None:
# raise RuntimeError(f"Can't find the geom for body part '{body_name}'!")
# rooty_joint = torso_body.find("./joint[@name='rooty']")
# rootz_joint = torso_body.find("./joint[@name='rootz']")
# torso_body_pos = Pos.of_element(torso_body)
# torso_geom_size = float(torso_geom.get("size"))
# torso_geom_fromto = FromTo.of_element(torso_geom)
# rootz_joint_ref = float(rootz_joint.get("ref"))
# rooty_joint_pos = Pos.of_element(rooty_joint)
# torso_max = torso_geom_fromto.start
# torso_min = torso_geom_fromto.end
# torso_length = torso_max - torso_min
# assert torso_body_pos == torso_geom_fromto.center
# # This happens to coincide with torso's pos.
# assert rootz_joint_ref == torso_body_pos.z
# assert rooty_joint_pos == torso_body_pos
# if new_torso_min is None:
# # Assume that the location of the base of the torso doesn't change, i.e. that
# # this was called in order to JUST scale the torso and nothing else.
# new_torso_min = torso_min
# # new_torso_min is already given, calculate the other two:
# new_torso_length = torso_length * (1 if size_scaling_factor is None else size_scaling_factor)
# new_torso_max = new_torso_min + new_torso_length
# # NOTE: fromto is from top to bottom here (maybe also everywhere else, not sure).
# new_torso_geom_size = torso_geom_size * size_scaling_factor
# new_torso_geom_fromto = FromTo(start=new_torso_max, end=new_torso_min)
# new_torso_pos = (new_torso_max + new_torso_min) / 2
# new_rootz_joint_ref = new_torso_pos.z
# new_rooty_joint_pos = new_torso_pos
# # Update the fields of the different elements.
# torso_body.set("pos", new_torso_pos.to_str())
# torso_geom.set("fromto", new_torso_geom_fromto.to_str())
# torso_geom.set("size", new_torso_geom_size)
# # TODO: Not sure if this makes sense: The rooty joint has a Pos that coincides
# # with the torso pos.
# new_torso_pos.set_in_element(rooty_joint)
# # TODO: rootz has a 'ref' which also coincides with the torso pos.
# rootz_joint.set("ref", str(new_rootz_joint_ref))
# rooty_joint.set("pos", new_rooty_joint_pos)
# new_torso_pos = new_torso_geom_fromto.center
# # TODO: Also move the camera?
# world_body: Optional[Element] = None
# if tree is not None:
# assert tree is not None, "need the tree if torso_body is not given!"
# world_body = get_parent(tree, torso_body)
# # Don't change the scaling of the parent, if this body part was scaled!
# parent_scale_factor = 1 if size_scaling_factor != 1 else size_scaling_factor
# update_world(
# tree=tree,
# world_body=world_body,
# new_torso_min=new_torso_min,
# new_torso_max=new_torso_max,
# size_scaling_factor=parent_scale_factor,
# **kwargs,
# )
# def update_thigh(
# tree: ElementTree = None,
# thigh_body: Element = None,
# new_thigh_min: Pos = None,
# new_thigh_max: Pos = None,
# size_scaling_factor: float = None,
# **kwargs,
# ) -> None:
# """'move' the thigh and its endpoints. This moves all relevant geoms and
# joints and then moves the torso by calling `update_torso`.
# """
# # TODO:
# new_torso_min = new_thigh_max
# new_torso_max = todo
# torso_body = get_parent(tree, thigh_body)
# update_torso(
# torso_body,
# new_torso_min=new_torso_min,
# new_torso_max=new_torso_max,
# size_scaling_factor=size_scaling_factor,
# new_thigh_min=new_thigh_min,
# new_thigh_max=new_thigh_max,
# **kwargs,
# )
# def update_thigh(
# tree: ElementTree = None,
# thigh_body: Element = None,
# new_thigh_min: Pos = None,
# new_thigh_max: Pos = None,
# size_scaling_factor: float = None,
# **kwargs,
# ) -> None:
# """'move' the thigh and its endpoints. This moves all relevant geoms and
# joints and then moves the torso by calling `update_torso`.
# """
# new_torso_min = NotImplemented
# new_thigh_max = NotImplemented
# torso_body = get_parent(tree, thigh_body)
# update_torso(
# torso_body,
# new_torso_min=new_torso_min,
# size_scaling_factor=size_scaling_factor,
# new_thigh_min=new_thigh_min,
# new_thigh_max=new_thigh_max, # Pass it in case the above components need it.
# **kwargs,
# )
# def scale_size(tree: ElementTree, body_name: str, scale: float) -> str:
# tree = copy.deepcopy(tree)
# target_body: Element = tree.find(f".//body[@name='{body_name}']")
# parent_map: Dict[Element, Element] = {c: p for p in tree.iter() for c in p}
# if body_name == "torso":
# update_torso(tree, torso_body=target_body, size_scaling_factor=scale)
# raise NotImplementedError(f"WIP")
================================================
FILE: sequoia/settings/rl/envs/mujoco/hopper_test.py
================================================
from sequoia.conftest import mujoco_required
pytestmark = mujoco_required
import inspect
import itertools
import os
from pathlib import Path
from typing import ClassVar, Type
from xml.etree.ElementTree import ElementTree, fromstring
import pytest
from gym.envs.mujoco import MujocoEnv
from sequoia.conftest import mujoco_required
from .hopper import ContinualHopperV2Env, ContinualHopperV3Env
from .modified_gravity_test import ModifiedGravityEnvTests
from .modified_mass_test import ModifiedMassEnvTests
from .modified_size_test import ModifiedSizeEnvTests
# # TODO: There is a bug in the way the hopper XML is generated, where the sticks / joints don't seem to follow.
# bob = ContinualHopperEnv(body_name_to_size_scale={"thigh": 2})
# assert False, bob
@mujoco_required
class TestContinualHopperV2Env(ModifiedGravityEnvTests, ModifiedSizeEnvTests, ModifiedMassEnvTests):
Environment: ClassVar[Type[ContinualHopperV2Env]] = ContinualHopperV2Env
@mujoco_required
class TestContinualHopperV3Env(ModifiedGravityEnvTests, ModifiedSizeEnvTests, ModifiedMassEnvTests):
Environment: ClassVar[Type[ContinualHopperV3Env]] = ContinualHopperV3Env
def load_tree(model_path: Path) -> ElementTree:
# model_path = "hopper.xml"
if model_path.startswith("/"):
full_path = model_path
else:
full_path = os.path.join(
os.path.dirname(inspect.getsourcefile(MujocoEnv)), "assets", model_path
)
if not os.path.exists(full_path):
raise IOError(f"File {full_path} does not exist")
with open(model_path, "r") as f:
return f.read()
default_hopper_body_xml = f"""\
"""
def elements_equal(e1, e2) -> bool:
"""Taken from https://stackoverflow.com/a/24349916/6388696"""
assert e1.tag == e2.tag
assert e1.text == e2.text
assert e1.tail == e2.tail
assert e1.attrib == e2.attrib
assert len(e1) == len(e2)
assert all(elements_equal(c1, c2) for c1, c2 in zip(e1, e2))
@pytest.mark.xfail(reason="Dropping this for now, XML is really annoying.")
@pytest.mark.parametrize(
"input_xml_str, scale_factor, output_xml_str",
[
(
default_hopper_body_xml,
1.0,
default_hopper_body_xml,
),
(
default_hopper_body_xml,
2.0,
f"""\
""",
),
],
ids=(f"param{i}" for i in itertools.count()),
)
def test_change_torso(input_xml_str: str, scale_factor: float, output_xml_str: str):
# # TODO: Get rid of annoying whitespace issues!
pass
input_tree = fromstring(input_xml_str)
expected = fromstring(output_xml_str)
# from io import StringIO
# in_file = StringIO(input_xml_str)
# out_file = StringIO(output_xml_str)
# input_tree = parse(in_file)
# expected = parse(out_file)
update_torso(tree=input_tree, size_scale_factor=scale_factor)
# import textwrap
# from xml.dom import minidom
# result = minidom.parseString(tostring(input_tree, method="text")).toprettyxml()
result = input_tree
assert elements_equal(result, expected)
# expected = minidom.parseString().toprettyxml()
assert result == expected
================================================
FILE: sequoia/settings/rl/envs/mujoco/modified_friction.py
================================================
""" TODO: Wrapper that modifies the friction, if possible on-the-fly. """
from typing import ClassVar
from gym.envs.mujoco import MujocoEnv
class ModifiedFrictionEnv(MujocoEnv):
"""
Allows the gravity to be changed.
Adapted from https://github.com/Breakend/gym-extensions/blob/master/gym_extensions/continuous/mujoco/gravity_envs.py
"""
# IDEA: Use somethign like this to tell appart modifications which can be applied
# on-the-fly on a given env to get multiple tasks, vs those that require creating a
# new environment for each task.
CAN_BE_UPDATED_IN_PLACE: ClassVar[bool] = True
================================================
FILE: sequoia/settings/rl/envs/mujoco/modified_friction_test.py
================================================
""" TODO: Tests for the 'modified friction' mujoco envs. """
================================================
FILE: sequoia/settings/rl/envs/mujoco/modified_gravity.py
================================================
import warnings
from typing import ClassVar
from gym.envs.mujoco import MujocoEnv
from sequoia.utils.logging_utils import get_logger
logger = get_logger(__name__)
class ModifiedGravityEnv(MujocoEnv):
"""
Allows the gravity to be changed.
Adapted from https://github.com/Breakend/gym-extensions/blob/master/gym_extensions/continuous/mujoco/gravity_envs.py
"""
# IDEA: Use somethign like this to tell appart modifications which can be applied
# on-the-fly on a given env to get multiple tasks, vs those that require creating a
# new environment for each task.
CAN_BE_UPDATED_IN_PLACE: ClassVar[bool] = True
def __init__(self, model_path: str, frame_skip: int, gravity: float = -9.81, **kwargs):
super().__init__(model_path=model_path, frame_skip=frame_skip, **kwargs)
# self.model.opt.gravity = (mujoco_py.mjtypes.c_double * 3)(*[0., 0., gravity])
if gravity != -9.81:
self.model.opt.gravity[2] = gravity
# self.model._compute_subtree()
# self.model.forward()
self.sim.forward()
# self.sim: MjSim
logger.debug(f"Setting initial gravity to {self.gravity}")
@property
def gravity(self) -> float:
return self.model.opt.gravity[2]
@gravity.setter
def gravity(self, value: float) -> None:
# TODO: Seems to be bad practice to modify memory in-place for some reason?
self.model.opt.gravity[2] = value
# self.model.opt.gravity[2] = - abs(value)
def set_gravity(self, value: float) -> None:
if value >= 0:
warnings.warn(
RuntimeWarning(
"Not a good idea to use a positive value! (things will start to float)"
)
)
# IDEA: always convert to negative value in the setter?
self.gravity = value
================================================
FILE: sequoia/settings/rl/envs/mujoco/modified_gravity_test.py
================================================
""" TODO: Tests for the 'modified gravity' mujoco envs. """
from typing import ClassVar, Type, TypeVar
from gym.wrappers import TimeLimit
from sequoia.conftest import mujoco_required
pytestmark = mujoco_required
from .modified_gravity import ModifiedGravityEnv
EnvType = TypeVar("EnvType", bound=ModifiedGravityEnv)
class ModifiedGravityEnvTests:
Environment: ClassVar[Type[EnvType]]
# @pytest.mark.xfail(reason="The condition doesn't always work.")
def test_change_gravity_each_step(self):
env: ModifiedGravityEnv = self.Environment()
max_episode_steps = 50
n_episodes = 3
# NOTE: Interestingly, the renderer will show
# `env.frame_skip * max_episode_steps` frames per episode, even when
# "Ren[d]er every frame" is set to False.
env = TimeLimit(env, max_episode_steps=max_episode_steps)
total_steps = 0
for episode in range(n_episodes):
initial_state = env.reset()
done = False
episode_steps = 0
start_y = initial_state[1]
moved_up = 0
previous_state = initial_state
state = initial_state
while not done:
previous_state = state
state, reward, done, info = env.step(env.action_space.sample())
env.render("human")
episode_steps += 1
total_steps += 1
# decrease the gravity continually over time.
# By the end, things should be floating.
env.set_gravity(-10 + 5 * total_steps / max_episode_steps)
moved_up += state[1] > previous_state[1]
# print(f"Moving upward? {obs[1] > state[1]}")
if episode_steps != max_episode_steps:
print(f"Episode ended early?")
print(f"Gravity at end of episode: {env.gravity}")
# TODO: Check that the position (in the observation) is obeying gravity?
# if env.gravity <= 0:
# # Downward force, so should not have any significant preference for
# # moving up vs moving down.
# assert 0.4 <= (moved_up / max_episode_steps) <= 0.6, env.gravity
# # if env.gravity == 0:
# # assert 0.5 <= (moved_up / max_episode_steps) <= 1.0
# if env.gravity > 0:
# assert 0.5 <= (moved_up / max_episode_steps) <= 1.0, env.gravity
assert total_steps <= n_episodes * max_episode_steps
initial_z = env.init_qpos[1]
final_z = env.sim.data.qpos[1]
if env.gravity > 0:
assert final_z > initial_z
# TODO: These checks aren't deterministic, and only really "work" with
# half-cheetah.
# assert initial_z == 0
# Check that the robot is high up in the sky! :D
# assert final_z > 3
# assert False, (env.init_qpos, env.sim.data.qpos)
def test_task_schedule(self):
# TODO: Reuse this test (and perhaps others from multi_task_environment_test.py)
# but with this continual_half_cheetah instead of cartpole.
original = self.Environment()
starting_gravity = original.gravity
task_schedule = {
10: dict(gravity=starting_gravity),
20: dict(gravity=-12.0),
30: dict(gravity=0.9),
}
from sequoia.common.gym_wrappers import MultiTaskEnvironment
env = MultiTaskEnvironment(original, task_schedule=task_schedule)
env.seed(123)
env.reset()
for step in range(100):
_, _, done, _ = env.step(env.action_space.sample())
# env.render()
if done:
env.reset()
if 0 <= step < 10:
assert env.gravity == starting_gravity
elif 10 <= step < 20:
assert env.gravity == starting_gravity
elif 20 <= step < 30:
assert env.gravity == -12.0
elif step >= 30:
assert env.gravity == 0.9
env.close()
================================================
FILE: sequoia/settings/rl/envs/mujoco/modified_mass.py
================================================
from functools import partial
from typing import ClassVar, Dict, List, TypeVar, Union
import numpy as np
from gym.envs.mujoco import MujocoEnv
V = TypeVar("V")
class ModifiedMassEnv(MujocoEnv):
"""
Allows the mass of body parts to be changed.
NOTE: Haven't yet checked how this affects the physics simulation! Might not be 100% working.
"""
# IDEA: Use somethign like this to tell appart modifications which can be applied
# on-the-fly on a given env to get multiple tasks, vs those that require creating a
# new environment for each task.
CAN_BE_UPDATED_IN_PLACE: ClassVar[bool] = True
BODY_NAMES: ClassVar[List[str]]
def __init__(
self,
model_path: str,
frame_skip: int,
body_name_to_mass_scale: Dict[str, float] = None,
**kwargs,
):
super().__init__(
model_path=model_path,
frame_skip=frame_skip,
**kwargs,
)
self.body_name_to_mass_scale = body_name_to_mass_scale or {}
self.default_masses_dict: Dict[str, float] = {
body_name: self.model.body_mass[i] for i, body_name in enumerate(self.model.body_names)
}
self.default_masses: np.ndarray = np.copy(self.model.body_mass)
# dict(zip(body_parts, mass_scales))
self.scale_masses(**self.body_name_to_mass_scale)
# self.model.body_mass = self.get_and_modify_bodymass(body_part, mass_scale)
# self.model._compute_subtree()
# self.model.forward()
def __init_subclass__(cls):
super().__init_subclass__()
# Add auto-generated properties for getting and setting the mass of the bodyparts.
for body_part in cls.BODY_NAMES:
property_name = f"{body_part}_mass"
mass_property = property(
fget=partial(cls.get_mass, body_part=body_part),
fset=partial(cls._mass_setter, body_part),
)
setattr(cls, property_name, mass_property)
def _update(self) -> None:
"""'Update' the model, if necessary, after a change has occured to the mass.
TODO: Not sure if this is entirely correct
"""
# self.model._compute_subtree()
# self.model.forward()
def reset_masses(self) -> None:
"""Resets the masses to their default values."""
# NOTE: Use [:] to modify in-place, just in case there are any
# pointer-shenanigans going on on the C side.
self.model.body_mass[:] = self.default_masses
# self.model._compute_subtree() #TODO: Not sure about this call
# self.model.forward()
def get_masses_dict(self) -> Dict[str, float]:
return {
body_name: self.model.body_masses[i]
for i, body_name in enumerate(self.model.body_names)
}
def set_mass(self, **body_name_to_mass: Dict[str, Union[int, float]]) -> None:
# Will raise an IndexError if the body part isnt found.
# _set_mass(self, body_part=body_part, mass=mass)
for body_part, mass in body_name_to_mass.items():
idx = self.model.body_names.index(body_part)
self.model.body_mass[idx] = mass
def get_mass(self, body_part: str) -> float:
# Will raise an IndexError if the body part isnt found.
if body_part not in self.model.body_names:
raise ValueError(
f"No body named {body_part} in this mujoco model! (body names: "
f"{self.model.body_names})."
)
idx = self.model.body_names.index(body_part)
return self.model.body_mass[idx]
def scale_masses(
self,
body_parts: List[str] = None,
mass_scales: List[float] = None,
**body_name_to_mass_scale,
) -> Dict[str, float]:
"""Scale the (original) mass of body parts of the Mujoco model.
Returns a dictionary with the new masses.
"""
new_masses: Dict[str, float] = {}
body_parts = body_parts or []
mass_scales = mass_scales or []
body_name_to_mass_scale = body_name_to_mass_scale or {}
self.reset_masses()
body_name_to_mass_scale.update(zip(body_parts, mass_scales))
for body_name, mass_scale in body_name_to_mass_scale.items():
current_mass = self.get_mass(body_name)
new_mass = mass_scale * current_mass
self.set_mass(**{body_name: new_mass})
new_masses[body_name] = new_mass
# Not sure if we need to do this?
self._update()
return new_masses
def get_and_modify_bodymass(self, body_name: str, scale: float):
idx = self.model.body_names.index(body_name)
temp = np.copy(self.model.body_mass)
temp[idx] *= scale
return temp
@staticmethod
def _mass_setter(body_part: str, env: MujocoEnv, mass: float) -> None:
"""Function used to set the mass of a body part. This is used as the setter of the
generated `_mass` properties.
"""
# Will raise an IndexError if the body part isnt found.
idx = env.model.body_names.index(body_part)
env.model.body_mass[idx] = mass
# def _get_mass(env: MujocoEnv, /, body_part: str) -> float:
# # Will raise an IndexError if the body part isnt found.
# idx = env.model.body_names.index(body_part)
# return env.model.body_mass[idx]
================================================
FILE: sequoia/settings/rl/envs/mujoco/modified_mass_test.py
================================================
""" TODO: Tests for the 'modified gravity' mujoco envs. """
import operator
from typing import ClassVar, List, Type
from gym.wrappers import TimeLimit
from sequoia.conftest import mujoco_required
pytestmark = mujoco_required
from .modified_mass import ModifiedMassEnv
class ModifiedMassEnvTests:
Environment: ClassVar[Type[ModifiedMassEnv]]
# names of the parts of the model which can be changed.
body_names: ClassVar[List[str]]
def test_generated_properties_change_the_actual_mass(self):
env = self.Environment()
for body_name in self.Environment.BODY_NAMES:
# Get the value directly from the mujoco model.
model_value = env.model.body_mass[env.model.body_names.index(body_name)]
assert getattr(env, f"{body_name}_mass") == model_value
new_value = model_value * 2
setattr(env, f"{body_name}_mass", new_value)
model_value = env.model.body_mass[env.model.body_names.index(body_name)]
assert model_value == new_value
def test_change_mass_each_step(self):
env: ModifiedMassEnv = self.Environment()
max_episode_steps = 200
n_episodes = 3
# NOTE: Interestingly, the renderer will show
# `env.frame_skip * max_episode_steps` frames per episode, even when
# "Ren[d]er every frame" is set to False.
env = TimeLimit(env, max_episode_steps=max_episode_steps)
env: ModifiedMassEnv
total_steps = 0
for episode in range(n_episodes):
initial_state = env.reset()
done = False
episode_steps = 0
start_y = initial_state[1]
moved_up = 0
previous_state = initial_state
state = initial_state
body_part = self.Environment.BODY_NAMES[0]
start_mass = env.get_mass(body_part)
while not done:
previous_state = state
state, reward, done, info = env.step(env.action_space.sample())
env.render("human")
episode_steps += 1
total_steps += 1
env.set_mass(**{body_part: start_mass + 5 * total_steps / max_episode_steps})
moved_up += state[1] > previous_state[1]
print(f"Moving upward? {moved_up}")
initial_z = env.init_qpos[1]
final_z = env.sim.data.qpos[1]
# TODO: Check that the change in mass had an impact
def test_set_mass_with_task_schedule(self):
body_part = "torso"
original = self.Environment()
starting_mass = original.get_mass("torso")
task_schedule = {
10: dict(),
20: operator.methodcaller("set_mass", torso=starting_mass * 2),
30: operator.methodcaller("set_mass", torso=starting_mass * 4),
}
from sequoia.common.gym_wrappers import MultiTaskEnvironment
env = MultiTaskEnvironment(original, task_schedule=task_schedule)
env.seed(123)
env.reset()
for step in range(100):
_, _, done, _ = env.step(env.action_space.sample())
# env.render()
if done:
env.reset()
if 0 <= step < 10:
assert env.get_mass(body_part) == starting_mass, step
elif 10 <= step < 20:
assert env.get_mass(body_part) == starting_mass, step
elif 20 <= step < 30:
assert env.get_mass(body_part) == starting_mass * 2, step
elif step >= 30:
assert env.get_mass(body_part) == starting_mass * 4, step
env.close()
================================================
FILE: sequoia/settings/rl/envs/mujoco/modified_size.py
================================================
import hashlib
import inspect
import os
import tempfile
import xml.etree.ElementTree as ET
from copy import deepcopy
from logging import getLogger as get_logger
from pathlib import Path
from typing import ClassVar, Dict, List
from gym.envs.mujoco import MujocoEnv
logger = get_logger(__name__)
def change_size_in_xml(
tree: ET.ElementTree, **body_name_to_size_scale: Dict[str, float]
) -> ET.ElementTree:
tree = deepcopy(tree)
for body_name, size_scale in body_name_to_size_scale.items():
body = tree.find(f".//body[@name='{body_name}']")
geom = tree.find(f".//geom[@name='{body_name}']")
if geom is None:
geom = tree.find(f".//geom[@name='{body_name}_geom']")
assert geom is not None
assert "size" in geom.attrib
# print(body_name)
# print("Old size: ", geom.attrib["size"])
sizes: List[float] = [float(s) for s in geom.attrib["size"].split(" ")]
new_sizes = [size * size_scale for size in sizes]
geom.attrib["size"] = " ".join(map(str, new_sizes))
# print("New size: ", geom.attrib['size'])
return tree
def get_geom_sizes(tree: ET.ElementTree, body_name: str) -> List[float]:
# body = tree.find(f".//body[@name='{body_name}']")
geom = tree.find(f".//geom[@name='{body_name}']")
if geom is None:
geom = tree.find(f".//geom[@name='{body_name}_geom']")
assert geom is not None
assert "size" in geom.attrib
# print(body_name)
# print("Old size: ", geom.attrib["size"])
sizes: List[float] = [float(s) for s in geom.attrib["size"].split(" ")]
return sizes
class ModifiedSizeEnv(MujocoEnv):
"""
Allows changing the size of the body parts.
TODO: This currently can modify the geometry in-place (at least visually) with the
`self.model.geom_size` ndarray, but the joints don't follow the change in length.
"""
BODY_NAMES: ClassVar[List[str]]
# IDEA: Use somethign like this to tell appart modifications which can be applied
# on-the-fly on a given env to get multiple tasks, vs those that require creating a
# new environment for each task.
CAN_BE_UPDATED_IN_PLACE: ClassVar[bool] = False
def __init__(
self,
model_path: str,
frame_skip: int,
# TODO: IF using one or more of these `Modified` buffers, then we need to
# get each one a distinct argument name, which isn't ideal!
body_parts: List[str] = None, # Has to be the name of a geom, not of a body!
size_scales: List[float] = None,
body_name_to_size_scale: Dict[str, float] = None,
**kwargs,
):
body_parts = body_parts or []
size_scales = size_scales or []
body_name_to_size_scale = body_name_to_size_scale or {}
body_name_to_size_scale.update(zip(body_parts, size_scales))
if model_path.startswith("/"):
full_path = model_path
else:
full_path = os.path.join(
os.path.dirname(inspect.getsourcefile(MujocoEnv)), "assets", model_path
)
if not os.path.exists(full_path):
raise IOError(f"File {full_path} does not exist")
# find the body_part we want
if any(scale_factor == 0 for scale_factor in size_scales):
raise RuntimeError("Can't use a scale_factor of 0!")
logger.debug(f"Default XML path: {full_path}")
self.default_tree = ET.parse(full_path)
self.tree = self.default_tree
if body_name_to_size_scale:
logger.debug(f"Changing parts: {body_name_to_size_scale}")
self.tree = change_size_in_xml(self.default_tree, **body_name_to_size_scale)
# create new xml
# IDEA: Create an XML file with a unique name somewhere, and then write the
hash_str = hashlib.md5((str(self) + str(body_name_to_size_scale)).encode()).hexdigest()
temp_dir = Path(tempfile.gettempdir())
new_xml_path = temp_dir / f"{hash_str}.xml"
if not new_xml_path.parent.exists():
new_xml_path.parent.mkdir(exist_ok=False, parents=True)
self.tree.write(str(new_xml_path))
logger.debug(f"Generated XML path: {new_xml_path}")
# Update the value to be passed to the constructor:
full_path = str(new_xml_path)
self.body_name_to_size_scale = body_name_to_size_scale
# load the modified xml
super().__init__(model_path=full_path, frame_skip=frame_skip, **kwargs)
================================================
FILE: sequoia/settings/rl/envs/mujoco/modified_size_test.py
================================================
""" TODO: Tests for the 'modified size' mujoco envs. """
from typing import ClassVar, List, Type
import numpy as np
from gym.wrappers import TimeLimit
from sequoia.conftest import mujoco_required
pytestmark = mujoco_required
from .modified_size import ModifiedSizeEnv, get_geom_sizes
class ModifiedSizeEnvTests:
Environment: ClassVar[Type[ModifiedSizeEnv]]
def test_change_size_per_task(self):
body_part = self.Environment.BODY_NAMES[0]
nb_tasks = 2
max_episode_steps = 200
n_episodes = 2
scale_factors: List[float] = [
(0.5 + 2 * (task_id / nb_tasks)) for task_id in range(nb_tasks)
]
default_tree = self.Environment().default_tree
default_sizes: List[str] = get_geom_sizes(default_tree, body_part)
task_envs: List[EnvType] = [
# RenderEnvWrapper(
TimeLimit(
self.Environment(body_name_to_size_scale={body_part: scale_factor}),
max_episode_steps=max_episode_steps,
)
# )
for task_id, scale_factor in enumerate(scale_factors)
]
for task_id, task_env in enumerate(task_envs):
task_scale_factor = scale_factors[task_id]
for episode in range(n_episodes):
size = get_geom_sizes(task_env.tree, body_part)
expected_size = [default_size * task_scale_factor for default_size in default_sizes]
print(
f"default sizes: {default_sizes}, Size: {size}, "
f"task_scale_factor: {task_scale_factor}"
)
assert np.allclose(size, expected_size)
state = task_env.reset()
done = False
steps = 0
while not done:
obs, reward, done, info = task_env.step(task_env.action_space.sample())
steps += 1
# NOTE: Uncomment to visually inspect.
task_env.render("human")
task_env.close()
================================================
FILE: sequoia/settings/rl/envs/mujoco/modified_wall.py
================================================
"""
TODO: DO the same for the WallEnv from gym-extensions.
"""
# HalfCheetahWallEnv = lambda *args, **kwargs: WallEnvFactory(ModifiedHalfCheetahEnv)(
# model_path=os.path.dirname(gym.envs.mujoco.__file__) + "/assets/half_cheetah.xml",
# ori_ind=-1,
# *args,
# **kwargs
# )
================================================
FILE: sequoia/settings/rl/envs/mujoco/mujoco_model_utils.py
================================================
from dataclasses import dataclass
from typing import Any, NamedTuple, Sequence, Tuple, Union
from xml.etree.ElementTree import Element
import numpy as np
def pos_to_str(pos: Tuple[float, ...]) -> str:
return " ".join("0" if v == 0 else str(round(v, 5)) for v in pos)
def str_to_pos(pos_str: str) -> "Pos":
return Pos(*[float(v) for v in pos_str.split()])
class Pos(NamedTuple):
x: float
y: float
z: float
def to_str(self) -> str:
"""Return the 'str' version of `self` to be placed in a 'pos' field in the XML."""
return pos_to_str(self)
@classmethod
def from_str(cls, pos_str: str) -> "Pos":
return cls(*[float(v) for v in pos_str.split()])
def __mul__(self, value: Union[int, float, np.ndarray]) -> "Pos":
if isinstance(value, (int, float)):
value = [value for _ in range(len(self))]
if not isinstance(value, (list, tuple, np.ndarray)):
return NotImplemented
assert len(value) == len(self)
return type(self)(*[v * axis_scaling_coef for v, axis_scaling_coef in zip(self, value)])
def __eq__(self, other: Union[Tuple[float, ...], np.ndarray]):
if not isinstance(other, (list, tuple, np.ndarray)):
return NotImplemented
return np.isclose(np.asfarray(self), np.asfarray(other)).all()
def __rmul__(self, value: Any):
return self * value
def __truediv__(self, other: Union[int, float, Sequence[float]]):
if isinstance(other, (int, float)):
other = [other for _ in range(len(self))]
if not isinstance(other, (list, tuple, np.ndarray)):
return NotImplemented
assert len(other) == len(self)
return type(self)(*[v / v_other for v, v_other in zip(self, other)])
def __add__(self, other: Union[int, float, np.ndarray]) -> "Pos":
if isinstance(other, (int, float)):
other = [other for _ in range(len(self))]
if not isinstance(other, (list, tuple, np.ndarray)):
return NotImplemented
assert len(other) == len(self)
return type(self)(*[v + v_other for v, v_other in zip(self, other)])
def __radd__(self, other: Any) -> "Pos":
return self + other
def __neg__(self) -> "Pos":
return type(self)(*[-v for v in self])
def __sub__(self, other: Union[int, float, np.ndarray]) -> "Pos":
if isinstance(other, (int, float)):
other = [other for _ in range(len(self))]
if not isinstance(other, (list, tuple, np.ndarray)):
return NotImplemented
assert len(other) == len(self)
return self + (-other)
# return type(self)(*[v + v_other for v, v_other in zip(self, other)])
def __rsub__(self, other: Any) -> "Pos":
return (-self) + other
@classmethod
def of_element(cls, element: Element, field: str = "pos") -> "Pos":
if field not in element.attrib:
raise RuntimeError(f"Element {element} doesn't have a '{field}' attribute.")
return cls.from_str(element.attrib[field])
def set_in_element(self, element: Element, field: str = "pos") -> None:
if field not in element.attrib:
# NOTE: Refusing to set a new field for now.
raise RuntimeError(f"Element {element} doesn't have a '{field}' attribute.")
element.set(field, self.to_str())
class FromTo(NamedTuple):
start: Pos
end: Pos
def to_str(self) -> str:
"""Return the 'str' version of `self` to be placed in a 'pos' field in the XML."""
return self.start.to_str() + " " + self.end.to_str()
@classmethod
def from_str(cls, fromto: str) -> "FromTo":
values = [float(v) for v in fromto.split()]
assert len(values) == 6
return cls(Pos(*values[:3]), Pos(*values[3:]))
@classmethod
def of_element(cls, element: Element, field: str = "fromto") -> "FromTo":
if field not in element.attrib:
raise RuntimeError(f"Element {element} doesn't have a '{field}' attribute.")
return cls.from_str(element.attrib.get(field))
def set_in_element(self, element: Element, field: str = "fromto") -> None:
if field not in element.attrib:
# NOTE: Refusing to set a new field for now.
raise RuntimeError(f"Element {element} doesn't have a '{field}' attribute.")
element.set(field, self.to_str())
@property
def center(self) -> Pos:
return (self.start + self.end) / 2
import textwrap
@dataclass
class FromTo:
from_x: float
from_y: float
from_z: float
to_x: float
to_y: float
to_z: float
def __str__(self):
return " ".join([self.from_x, self.from_y, self.from_z, self.to_x, self.to_y, self.to_z])
from dataclasses import dataclass
@dataclass
class TorsoGeom:
friction: float = 0.9
fromto = FromTo(0, 0, 1.45, 0, 0, 1.05)
name: str = "torso_geom"
size: float = 0.05
type: str = "capsule"
def render_xml(self) -> str:
return f""""""
@dataclass
class HoperV3Model:
torso_geom: TorsoGeom
def render_xml(self) -> str:
return textwrap.dedent(
"""\
"""
)
================================================
FILE: sequoia/settings/rl/envs/mujoco/walker2d.py
================================================
from typing import ClassVar, Dict, List, Tuple
from gym.envs.mujoco import MujocoEnv
from gym.envs.mujoco.walker2d import Walker2dEnv as _Walker2dV2Env
from gym.envs.mujoco.walker2d_v3 import Walker2dEnv as _Walker2dV3Env
from .modified_gravity import ModifiedGravityEnv
from .modified_mass import ModifiedMassEnv
from .modified_size import ModifiedSizeEnv
class Walker2dV2Env(_Walker2dV2Env):
"""
Simply allows changing of XML file, probably not necessary if we pull request the
xml name as a kwarg in openai gym
"""
BODY_NAMES: ClassVar[List[str]] = [
"torso",
"thigh",
"leg",
"foot",
"thigh_left",
"leg_left",
"foot_left",
]
def __init__(self, model_path: str = "walker2d.xml", frame_skip: int = 4):
MujocoEnv.__init__(self, model_path=model_path, frame_skip=frame_skip)
class Walker2dV3Env(_Walker2dV3Env):
BODY_NAMES: ClassVar[List[str]] = [
"torso",
"thigh",
"leg",
"foot",
"thigh_left",
"leg_left",
"foot_left",
]
def __init__(
self,
model_path: str = "walker2d.xml",
forward_reward_weight: float = 1.0,
ctrl_cost_weight: float = 1e-3,
healthy_reward: float = 1.0,
terminate_when_unhealthy: bool = True,
healthy_z_range: Tuple[float, float] = (0.8, 2.0),
healthy_angle_range: Tuple[float, float] = (-1.0, 1.0),
reset_noise_scale: float = 5e-3,
exclude_current_positions_from_observation: bool = True,
xml_file: str = None,
frame_skip: int = 4,
):
if frame_skip != 4:
raise NotImplementedError("todo: Add a frame_skip arg to the gym class.")
super().__init__(
xml_file=xml_file or model_path,
forward_reward_weight=forward_reward_weight,
ctrl_cost_weight=ctrl_cost_weight,
healthy_reward=healthy_reward,
terminate_when_unhealthy=terminate_when_unhealthy,
healthy_z_range=healthy_z_range,
healthy_angle_range=healthy_angle_range,
reset_noise_scale=reset_noise_scale,
exclude_current_positions_from_observation=exclude_current_positions_from_observation,
)
class Walker2dGravityEnv(ModifiedGravityEnv, Walker2dV2Env):
# NOTE: This environment could be used in ContinualRL!
def __init__(
self,
model_path: str = "walker2d.xml",
frame_skip: int = 4,
gravity: float = -9.81,
):
super().__init__(model_path=model_path, frame_skip=frame_skip, gravity=gravity)
class ContinualWalker2dV2Env(ModifiedGravityEnv, ModifiedSizeEnv, ModifiedMassEnv, Walker2dV2Env):
def __init__(
self,
model_path: str = "walker2d.xml",
frame_skip: int = 4,
gravity=-9.81,
body_name_to_size_scale: Dict[str, float] = None,
body_name_to_mass_scale: Dict[str, float] = None,
):
super().__init__(
model_path=model_path,
frame_skip=frame_skip,
gravity=gravity,
# body_parts=body_parts,
# size_scales=size_scales,
body_name_to_size_scale=body_name_to_size_scale,
body_name_to_mass_scale=body_name_to_mass_scale,
)
class ContinualWalker2dV3Env(ModifiedGravityEnv, ModifiedSizeEnv, ModifiedMassEnv, Walker2dV3Env):
# def __init__(self, model_path, frame_skip, gravity=-9.81, **kwargs):
# super().__init__(model_path, frame_skip, gravity=gravity, **kwargs)
def __init__(
self,
model_path: str = "walker2d.xml",
forward_reward_weight: float = 1.0,
ctrl_cost_weight: float = 1e-3,
healthy_reward: float = 1.0,
terminate_when_unhealthy: bool = True,
healthy_z_range: Tuple[float, float] = (0.8, 2.0),
healthy_angle_range: Tuple[float, float] = (-1.0, 1.0),
reset_noise_scale: float = 5e-3,
exclude_current_positions_from_observation: bool = True,
gravity=-9.81,
body_name_to_size_scale: Dict[str, float] = None,
body_name_to_mass_scale: Dict[str, float] = None,
xml_file: str = None,
frame_skip: int = 4,
):
if frame_skip != 4:
raise NotImplementedError("todo: Add a frame_skip arg to the gym class.")
super().__init__(
model_path=model_path,
frame_skip=frame_skip,
xml_file=xml_file or model_path,
forward_reward_weight=forward_reward_weight,
ctrl_cost_weight=ctrl_cost_weight,
healthy_reward=healthy_reward,
terminate_when_unhealthy=terminate_when_unhealthy,
healthy_z_range=healthy_z_range,
healthy_angle_range=healthy_angle_range,
reset_noise_scale=reset_noise_scale,
exclude_current_positions_from_observation=exclude_current_positions_from_observation,
body_name_to_size_scale=body_name_to_size_scale,
body_name_to_mass_scale=body_name_to_mass_scale,
gravity=gravity,
)
================================================
FILE: sequoia/settings/rl/envs/mujoco/walker2d_test.py
================================================
from typing import ClassVar, Type
from sequoia.conftest import mujoco_required
from .modified_gravity_test import ModifiedGravityEnvTests
from .modified_mass_test import ModifiedMassEnvTests
from .modified_size_test import ModifiedSizeEnvTests
from .walker2d import ContinualWalker2dV2Env, ContinualWalker2dV3Env
pytestmark = mujoco_required
class TestContinualWalker2dV2Env(
ModifiedGravityEnvTests, ModifiedSizeEnvTests, ModifiedMassEnvTests
):
Environment: ClassVar[Type[ContinualWalker2dV2Env]] = ContinualWalker2dV2Env
class TestContinualWalker2dV3Env(
ModifiedGravityEnvTests, ModifiedSizeEnvTests, ModifiedMassEnvTests
):
Environment: ClassVar[Type[ContinualWalker2dV3Env]] = ContinualWalker2dV3Env
================================================
FILE: sequoia/settings/rl/envs/variant_spec.py
================================================
from typing import Any, Callable, Dict, Generic, List, Optional, TypeVar, Union
import gym
from gym.envs.registration import EnvSpec, load
EnvType = TypeVar("EnvType", bound=gym.Env)
_EntryPoint = Union[str, Callable[..., gym.Env]]
class EnvVariantSpec(EnvSpec, Generic[EnvType]):
def __init__(
self,
id: str,
base_spec: EnvSpec,
entry_point: Union[str, Callable[..., EnvType]] = None,
reward_threshold: int = None,
nondeterministic: bool = False,
max_episode_steps=None,
kwargs=None,
):
super().__init__(
id_requested=id,
entry_point=entry_point,
reward_threshold=reward_threshold,
nondeterministic=nondeterministic,
max_episode_steps=max_episode_steps,
kwargs=kwargs,
)
self.base_spec = base_spec
def make(self, **kwargs) -> EnvType:
return super().make(**kwargs)
@classmethod
def of(
cls,
original: EnvSpec,
*,
new_id: str,
new_reward_threshold: Optional[float] = None,
new_nondeterministic: Optional[bool] = None,
new_max_episode_steps: Optional[int] = None,
new_kwargs: Dict[str, Any] = None,
new_entry_point: Union[str, Callable[..., gym.Env]] = None,
wrappers: Optional[List[Callable[[gym.Env], gym.Env]]] = None,
) -> "EnvVariantSpec":
"""Returns a new env spec which uses additional wrappers.
NOTE: The `new_kwargs` update the current kwargs, rather than replacing them.
"""
new_spec_kwargs = original.kwargs
new_spec_kwargs.update(new_kwargs or {})
# Replace the entry-point if desired:
new_spec_entry_point: Union[str, Callable[..., EnvType]] = (
new_entry_point or original.entry_point
)
new_reward_threshold = (
new_reward_threshold if new_reward_threshold is not None else original.reward_threshold
)
new_nondeterministic = (
new_nondeterministic if new_nondeterministic is not None else original.nondeterministic
)
new_max_episode_steps = (
new_max_episode_steps
if new_max_episode_steps is not None
else original.max_episode_steps
)
# Add wrappers if desired.
if wrappers:
# Get the callable that creates the env.
if callable(original.entry_point):
env_fn = original.entry_point
else:
env_fn = load(original.entry_point)
# @lebrice Not sure if there is a cleaner way to do this, maybe using
# functools.reduce or functools.partial?
def _new_entry_point(**kwargs) -> gym.Env:
env = env_fn(**kwargs)
for wrapper in wrappers:
env = wrapper(env)
return env
new_spec_entry_point = _new_entry_point
return cls(
new_id,
base_spec=original,
entry_point=new_spec_entry_point,
reward_threshold=new_reward_threshold,
nondeterministic=new_nondeterministic,
max_episode_steps=new_max_episode_steps,
kwargs=new_spec_kwargs,
)
================================================
FILE: sequoia/settings/rl/incremental/__init__.py
================================================
from .setting import IncrementalRLSetting
from .tasks import make_incremental_task
================================================
FILE: sequoia/settings/rl/incremental/objects.py
================================================
from dataclasses import dataclass
from typing import Optional, Sequence, TypeVar, Union
from torch import Tensor
from sequoia.settings.assumptions.incremental import IncrementalAssumption
from ..discrete import DiscreteTaskAgnosticRLSetting
# IncrementalAssumption, DiscreteTaskAgnosticRLSetting
@dataclass(frozen=True)
class Observations(DiscreteTaskAgnosticRLSetting.Observations, IncrementalAssumption.Observations):
"""Observations from a Continual Reinforcement Learning environment."""
x: Tensor
task_labels: Optional[Tensor] = None
# The 'done' that is normally returned by the 'step' method.
# We add this here in case a method were to iterate on the environments in the
# dataloader-style so they also have access to those (i.e. for the BaseMethod).
done: Optional[Union[bool, Sequence[bool]]] = None
@dataclass(frozen=True)
class Actions(DiscreteTaskAgnosticRLSetting.Actions, IncrementalAssumption.Actions):
"""Actions to be sent to a Continual Reinforcement Learning environment."""
y_pred: Tensor
@dataclass(frozen=True)
class Rewards(DiscreteTaskAgnosticRLSetting.Rewards, IncrementalAssumption.Rewards):
"""Rewards obtained from a Continual Reinforcement Learning environment."""
y: Tensor
ObservationType = TypeVar("ObservationType", bound=Observations)
ActionType = TypeVar("ActionType", bound=Actions)
RewardType = TypeVar("RewardType", bound=Rewards)
================================================
FILE: sequoia/settings/rl/incremental/results.py
================================================
from dataclasses import dataclass
from typing import ClassVar, TypeVar
from sequoia.common.metrics.rl_metrics import EpisodeMetrics
from sequoia.settings.assumptions.incremental_results import IncrementalResults
MetricType = TypeVar("MetricsType", bound=EpisodeMetrics)
@dataclass
class IncrementalRLResults(IncrementalResults[MetricType]):
# Higher mean reward / episode => better
lower_is_better: ClassVar[bool] = False
objective_name: ClassVar[str] = "Mean reward per episode"
# Minimum runtime considered (in hours).
# (No extra points are obtained for going faster than this.)
min_runtime_hours: ClassVar[float] = 1.5
# Maximum runtime allowed (in hours).
max_runtime_hours: ClassVar[float] = 12.0
================================================
FILE: sequoia/settings/rl/incremental/setting.py
================================================
import itertools
import operator
import sys
import warnings
from dataclasses import dataclass, fields
from functools import partial
from itertools import islice
from typing import Callable, ClassVar, Dict, List, Optional, Tuple, Type, Union
import gym
import numpy as np
from gym import spaces
from gym.envs.registration import EnvSpec
from gym.utils import colorize
from gym.vector.utils import batch_space
from simple_parsing import list_field
from simple_parsing.helpers import choice
from typing_extensions import Final
from sequoia.common.gym_wrappers import MultiTaskEnvironment, TransformObservation
from sequoia.common.gym_wrappers.utils import is_monsterkong_env
from sequoia.common.metrics import EpisodeMetrics
from sequoia.common.spaces import Sparse
from sequoia.common.spaces.typed_dict import TypedDictSpace
from sequoia.common.transforms import Transforms
from sequoia.settings.assumptions.iid_results import TaskResults
from sequoia.settings.assumptions.incremental import IncrementalAssumption
from sequoia.settings.base import Method
from sequoia.settings.rl.continual import ContinualRLSetting
from sequoia.settings.rl.envs import (
METAWORLD_INSTALLED,
MTENV_INSTALLED,
MUJOCO_INSTALLED,
MetaWorldEnv,
MTEnv,
metaworld_envs,
mtenv_envs,
)
from sequoia.settings.rl.wrappers.task_labels import FixedTaskLabelWrapper
from sequoia.utils.logging_utils import get_logger
from sequoia.utils.utils import constant, dict_union, pairwise
from ..discrete.setting import DiscreteTaskAgnosticRLSetting
from ..discrete.setting import supported_envs as _parent_supported_envs
from .objects import Actions, Observations, Rewards # type: ignore
from .results import IncrementalRLResults
from .tasks import IncrementalTask, is_supported, make_incremental_task, sequoia_registry
logger = get_logger(__name__)
# A callable that returns an env.
EnvFactory = Callable[[], gym.Env]
# TODO: Move this 'passing custom env for each task' feature up into DiscreteTaskAgnosticRL.
# TODO: Design a better mechanism for extending this task creation. Currently, this dictionary lists
# out the 'supported envs' (envs for which we have an explicit way of creating tasks). However when
# the dataset is set to "MT10" for example, then that does something different: It hard-sets some
# of the values of the fields on the setting!
supported_envs: Dict[str, Union[str, EnvSpec]] = dict_union(
_parent_supported_envs,
{
spec.id: spec
for env_id, spec in sequoia_registry.env_specs.items()
if spec.id not in _parent_supported_envs and is_supported(env_id)
},
)
if METAWORLD_INSTALLED:
supported_envs["MT10"] = "MT10"
supported_envs["MT50"] = "MT50"
supported_envs["CW10"] = "CW10"
supported_envs["CW20"] = "CW20"
if MUJOCO_INSTALLED:
for env_name, modification, version in itertools.product(
["HalfCheetah", "Hopper", "Walker2d"], ["bodyparts", "gravity"], ["v2", "v3"]
):
env_id = f"LPG-FTW-{modification}-{env_name}-{version}"
supported_envs[env_id] = env_id
available_datasets: Dict[str, str] = {env_id: env_id for env_id in supported_envs}
@dataclass
class IncrementalRLSetting(IncrementalAssumption, DiscreteTaskAgnosticRLSetting):
"""Continual RL setting in which:
- Changes in the environment's context occur suddenly (same as in Discrete, Task-Agnostic RL)
- Task boundary information (and task labels) are given at training time
- Task boundary information is given at test time, but task identity is not.
"""
Observations: ClassVar[Type[Observations]] = Observations
Actions: ClassVar[Type[Actions]] = Actions
Rewards: ClassVar[Type[Rewards]] = Rewards
# The function used to create the tasks for the chosen env.
_task_sampling_function: ClassVar[Callable[..., IncrementalTask]] = make_incremental_task
Results: ClassVar[Type[Results]] = IncrementalRLResults
# Class variable that holds the dict of available environments.
available_datasets: ClassVar[Dict[str, str]] = available_datasets
# Which dataset/environment to use for training, validation and testing.
dataset: str = choice(available_datasets, default="CartPole-v0")
# # The number of tasks. By default 0, which means that it will be set
# # depending on other fields in __post_init__, or eventually be just 1.
# nb_tasks: int = field(0, alias=["n_tasks", "num_tasks"])
# (Copied from the assumption, just for clarity:)
# TODO: Shouldn't these kinds of properties be on the class, rather than on the
# instance?
# Wether the task boundaries are smooth or sudden.
smooth_task_boundaries: Final[bool] = constant(False)
# Wether to give access to the task labels at train time.
task_labels_at_train_time: Final[bool] = constant(True)
# Wether to give access to the task labels at test time.
task_labels_at_test_time: bool = False
# NOTE: Specifying the `type` to use for the argparse argument, because of a bug in
# simple-parsing that makes this not work correctly atm.
train_envs: List[Union[str, Callable[[], gym.Env]]] = list_field(type=str)
val_envs: List[Union[str, Callable[[], gym.Env]]] = list_field(type=str)
test_envs: List[Union[str, Callable[[], gym.Env]]] = list_field(type=str)
def __post_init__(self):
defaults = {f.name: f.default for f in fields(self)}
# NOTE: These benchmark functions don't just create the datasets, they actually set most of
# the fields too!
if isinstance(self.dataset, str) and self.dataset.startswith("LPG-FTW"):
self.train_envs, self.val_envs, self.test_envs = make_lpg_ftw_datasets(self.dataset)
# Use fewer tasks, if a custom number was passed. (NOTE: This is not ideal, same as
# everywhere else that has to check against the default value)
if self.nb_tasks not in {None, defaults["nb_tasks"]}:
logger.info(
f"Using a custom number of tasks ({self.nb_tasks}) instead of the default "
f"({len(self.train_envs)})."
)
self.train_envs = self.train_envs[: self.nb_tasks]
self.val_envs = self.val_envs[: self.nb_tasks]
self.test_envs = self.test_envs[: self.nb_tasks]
self.nb_tasks = len(self.train_envs)
self.max_episode_steps = self.max_episode_steps or 1_000
self.train_steps_per_task = 100_000
self.train_max_steps = self.nb_tasks * self.train_steps_per_task
self.test_steps_per_task = 10_000
self.test_max_steps = self.nb_tasks * self.test_steps_per_task
task_label_space = spaces.Discrete(self.nb_tasks)
train_task_label_space = task_label_space
if not self.task_labels_at_train_time:
train_task_label_space = Sparse(train_task_label_space, sparsity=1.0)
# This should be ok for now.
val_task_label_space = train_task_label_space
test_task_label_space = task_label_space
if not self.task_labels_at_test_time:
test_task_label_space = Sparse(test_task_label_space, sparsity=1.0)
train_seed: Optional[int] = None
valid_seed: Optional[int] = None
test_seed: Optional[int] = None
if self.config and self.config.seed is not None:
train_seed = self.config.seed
valid_seed = train_seed + 123
test_seed = train_seed + 456
self.train_envs = [
partial(
create_env,
env_fn=env_fn,
wrappers=[
partial(
FixedTaskLabelWrapper,
task_label=(i if self.task_labels_at_train_time else None),
task_label_space=train_task_label_space,
)
],
seed=train_seed,
)
for i, env_fn in enumerate(self.train_envs)
]
self.val_envs = [
partial(
create_env,
env_fn=env_fn,
wrappers=[
partial(
FixedTaskLabelWrapper,
task_label=(i if self.task_labels_at_train_time else None),
task_label_space=val_task_label_space,
)
],
seed=valid_seed,
)
for i, env_fn in enumerate(self.train_envs)
]
self.test_envs = [
partial(
create_env,
env_fn=env_fn,
wrappers=[
partial(
FixedTaskLabelWrapper,
task_label=(i if self.task_labels_at_test_time else None),
task_label_space=test_task_label_space,
)
],
seed=test_seed,
)
for i, env_fn in enumerate(self.train_envs)
]
# Meta-World datasets:
if self.dataset in ["MT10", "MT50", "CW10", "CW20"]:
from metaworld import MT10, MT50, MetaWorldEnv, Task
benchmarks = {
"MT10": MT10,
"MT50": MT50,
"CW10": MT50,
"CW20": MT50,
}
benchmark_class = benchmarks[self.dataset]
logger.info(
f"Creating metaworld benchmark {benchmark_class}, this might take a "
f"while (~15 seconds)."
)
# NOTE: Saving this attribute on `self` for the time being so that it can be inspected
# by the tests if needed. However it would be best to move this benchmark stuff into a
# function, same as with LPG-FTW.
benchmark = benchmark_class(seed=self.config.seed if self.config else None)
self._benchmark = benchmark
envs: Dict[str, Type[MetaWorldEnv]] = benchmark.train_classes
env_tasks: Dict[str, List[Task]] = {
env_name: [task for task in benchmark.train_tasks if task.env_name == env_name]
for env_name, env_class in benchmark.train_classes.items()
}
train_env_tasks: Dict[str, List[Task]] = {}
val_env_tasks: Dict[str, List[Task]] = {}
test_env_tasks: Dict[str, List[Task]] = {}
test_fraction = 0.1
val_fraction = 0.1
for env_name, env_tasks in env_tasks.items():
n_tasks = len(env_tasks)
n_val_tasks = int(max(1, n_tasks * val_fraction))
n_test_tasks = int(max(1, n_tasks * test_fraction))
n_train_tasks = len(env_tasks) - n_val_tasks - n_test_tasks
if n_train_tasks <= 1:
# Can't create train, val and test tasks.
raise RuntimeError(f"There aren't enough tasks for env {env_name} ({n_tasks}) ")
tasks_iterator = iter(env_tasks)
train_env_tasks[env_name] = list(islice(tasks_iterator, n_train_tasks))
val_env_tasks[env_name] = list(islice(tasks_iterator, n_val_tasks))
test_env_tasks[env_name] = list(islice(tasks_iterator, n_test_tasks))
assert train_env_tasks[env_name]
assert val_env_tasks[env_name]
assert test_env_tasks[env_name]
max_train_steps_per_task = 1_000_000
if self.dataset in ["CW10", "CW20"]:
# TODO: Raise a warning if the number of tasks is non-default and set to
# something different than in the benchmark
# Re-create the [ContinualWorld benchmark](@TODO: Add citation here)
version = 2
env_names = [
f"hammer-v{version}",
f"push-wall-v{version}",
f"faucet-close-v{version}",
f"push-back-v{version}",
f"stick-pull-v{version}",
f"handle-press-side-v{version}",
f"push-v{version}",
f"shelf-place-v{version}",
f"window-close-v{version}",
f"peg-unplug-side-v{version}",
]
if (
self.train_steps_per_task not in [defaults["train_steps_per_task"], None]
and self.train_steps_per_task > max_train_steps_per_task
):
raise RuntimeError(
f"Can't use more than {max_train_steps_per_task} steps per "
f"task in the {self.dataset} benchmark!"
)
# TODO: Decide the number of test steps.
# NOTE: Should we allow using fewer steps?
# NOTE: The default value for this field is 10_000 currently, so this
# check doesn't do anything.
if self.dataset == "CW20":
# CW20 does tasks [0 -> 10] and then [0 -> 10] again.
env_names = env_names * 2
train_env_names = env_names
val_env_names = env_names
test_env_names = env_names
else:
train_env_names = list(train_env_tasks.keys())
val_env_names = list(val_env_tasks.keys())
test_env_names = list(test_env_tasks.keys())
self.nb_tasks = len(train_env_names)
if self.train_max_steps not in [defaults["train_max_steps"], None]:
self.train_steps_per_task = self.train_max_steps // self.nb_tasks
elif self.train_steps_per_task is None:
self.train_steps_per_task = max_train_steps_per_task
self.train_max_steps = self.nb_tasks * self.train_steps_per_task
if self.test_max_steps in [defaults["test_max_steps"], None]:
if self.test_steps_per_task is None:
self.test_steps_per_task = 10_000
self.test_max_steps = self.test_steps_per_task * self.nb_tasks
# TODO: Double-check that the train/val/test wrappers are added to each env.
self.train_envs = [
partial(
make_metaworld_env,
env_class=envs[env_name],
tasks=train_env_tasks[env_name],
)
for env_name in train_env_names
]
self.val_envs = [
partial(
make_metaworld_env,
env_class=envs[env_name],
tasks=val_env_tasks[env_name],
)
for env_name in val_env_names
]
self.test_envs = [
partial(
make_metaworld_env,
env_class=envs[env_name],
tasks=test_env_tasks[env_name],
)
for env_name in test_env_names
]
# if is_monsterkong_env(self.dataset):
# if self.force_pixel_observations:
# # Add this to the kwargs that will be passed to gym.make, to make sure that
# # we observe pixels, and not state.
# self.base_env_kwargs["observe_state"] = False
# elif self.force_state_observations:
# self.base_env_kwargs["observe_state"] = True
self._using_custom_envs_foreach_task: bool = False
if self.train_envs:
self._using_custom_envs_foreach_task = True
if self.dataset == defaults["dataset"]:
# avoid the `dataset` key keeping the default value of "CartPole-v0" when we pass
# envs for each task (and no value for the `dataset` argument).
self.dataset = None
# TODO: Raise a warning if we're going to overwrite a non-default nb_tasks?
self.nb_tasks = len(self.train_envs)
assert self.train_steps_per_task or self.train_max_steps
if self.train_steps_per_task is None:
self.train_steps_per_task = self.train_max_steps // self.nb_tasks
# TODO: Should we use the task schedules to tell the length of each task?
if self.test_steps_per_task in [defaults["test_steps_per_task"], None]:
self.test_steps_per_task = self.test_max_steps // self.nb_tasks
assert self.test_steps_per_task
assert self.train_steps_per_task == self.train_max_steps // self.nb_tasks, (
self.train_max_steps,
self.train_steps_per_task,
self.nb_tasks,
)
task_schedule_keys = np.linspace(
0, self.train_max_steps, self.nb_tasks + 1, endpoint=True, dtype=int
).tolist()
self.train_task_schedule = self.train_task_schedule or {
key: {} for key in task_schedule_keys
}
self.val_task_schedule = self.train_task_schedule.copy()
assert self.test_steps_per_task == self.test_max_steps // self.nb_tasks, (
self.test_max_steps,
self.test_steps_per_task,
self.nb_tasks,
)
test_task_schedule_keys = np.linspace(
0, self.test_max_steps, self.nb_tasks + 1, endpoint=True, dtype=int
).tolist()
self.test_task_schedule = self.test_task_schedule or {
key: {} for key in test_task_schedule_keys
}
if not self.val_envs:
# TODO: Use a wrapper that sets a different random seed?
self.val_envs = self.train_envs.copy()
if not self.test_envs:
# TODO: Use a wrapper that sets a different random seed?
self.test_envs = self.train_envs.copy()
if (
any(self.train_task_schedule.values())
or any(self.val_task_schedule.values())
or any(self.test_task_schedule.values())
):
raise RuntimeError(
"Can't use a non-empty task schedule when passing the " "train/valid/test envs."
)
self.train_dataset: Union[str, Callable[[], gym.Env]] = self.train_envs[0]
self.val_dataset: Union[str, Callable[[], gym.Env]] = self.val_envs[0]
self.test_dataset: Union[str, Callable[[], gym.Env]] = self.test_envs[0]
# TODO: Add wrappers with the fixed task id for each env, if necessary, right?
else:
if self.val_envs or self.test_envs:
raise RuntimeError(
"Can't pass `val_envs` or `test_envs` without passing `train_envs`."
)
# Call super().__post_init__() (delegates up the chain: IncrementalAssumption->DiscreteRL->ContinualRL)
# NOTE: This deep inheritance isn't ideal. Should probably use composition instead somehow.
super().__post_init__()
if self._using_custom_envs_foreach_task:
# TODO: Use 'no-op' task schedules for now.
# self.train_task_schedule.clear()
# self.val_task_schedule.clear()
# self.test_task_schedule.clear()
pass
# TODO: Check that all the envs have the same observation spaces!
# (If possible, find a way to check this without having to instantiate all
# the envs.)
# TODO: If the dataset has a `max_path_length` attribute, then it's probably
# a Mujoco / metaworld / etc env, and so we set a limit on the episode length to
# avoid getting an error.
max_path_length: Optional[int] = getattr(self._temp_train_env, "max_path_length", None)
if self.max_episode_steps is None and max_path_length is not None:
assert max_path_length > 0
logger.info(
f"Setting the max episode steps to {max_path_length} because a 'max_path_length' "
f"attribute is present on the train env."
)
self.max_episode_steps = max_path_length
# if self.dataset == "MetaMonsterKong-v0":
# # TODO: Limit the episode length in monsterkong?
# # TODO: Actually end episodes when reaching a task boundary, to force the
# # level to change?
# self.max_episode_steps = self.max_episode_steps or 500
# FIXME: Really annoying little bugs with these three arguments!
# self.nb_tasks = self.max_steps // self.steps_per_task
@property
def current_task_id(self) -> int:
return self._current_task_id
@current_task_id.setter
def current_task_id(self, value: int) -> None:
if value != self._current_task_id:
# Set those to False so we re-create the wrappers for each task.
self._has_setup_fit = False
self._has_setup_validate = False
self._has_setup_test = False
# TODO: No idea what the difference is between `predict` and test.
self._has_setup_predict = False
# TODO: There are now also teardown hooks, maybe use them?
self._current_task_id = value
@property
def train_task_lengths(self) -> List[int]:
"""Gives the length of each training task (in steps for now)."""
return [
task_b_step - task_a_step
for task_a_step, task_b_step in pairwise(sorted(self.train_task_schedule.keys()))
]
@property
def train_phase_lengths(self) -> List[int]:
"""Gives the length of each training 'phase', i.e. the maximum number of (steps
for now) that can be taken in the training environment, in a single call to .fit
"""
return [
task_b_step - task_a_step
for task_a_step, task_b_step in pairwise(sorted(self.train_task_schedule.keys()))
]
@property
def current_train_task_length(self) -> int:
"""Deprecated field, gives back the max number of steps per task."""
if self.stationary_context:
return sum(self.train_task_lengths)
return self.train_task_lengths[self.current_task_id]
@property
def task_label_space(self) -> gym.Space:
# TODO: Explore an alternative design for the task sampling, based more around
# gym spaces rather than the generic function approach that's currently used?
# IDEA: Might be cleaner to put this in the assumption class
task_label_space = spaces.Discrete(self.nb_tasks)
if not self.task_labels_at_train_time or not self.task_labels_at_test_time:
sparsity = 1
if self.task_labels_at_train_time ^ self.task_labels_at_test_time:
# We have task labels "50%" of the time, ish:
sparsity = 0.5
task_label_space = Sparse(task_label_space, sparsity=sparsity)
return task_label_space
def setup(self, stage: str = None) -> None:
# Called before the start of each task during training, validation and
# testing.
super().setup(stage=stage)
# What's done in ContinualRLSetting:
# if stage in {"fit", None}:
# self.train_wrappers = self.create_train_wrappers()
# self.valid_wrappers = self.create_valid_wrappers()
# elif stage in {"test", None}:
# self.test_wrappers = self.create_test_wrappers()
if self._using_custom_envs_foreach_task:
logger.debug(
f"Using custom environments from `self.[train/val/test]_envs` for task "
f"{self.current_task_id}."
)
if self.stationary_context:
from sequoia.settings.rl.discrete.multienv_wrappers import (
ConcatEnvsWrapper,
RandomMultiEnvWrapper,
RoundRobinWrapper,
)
# NOTE: Here is how this supports passing custom envs for each task: We
# just switch out the value of these properties, and let the
# `train/val/test_dataloader` methods work as usual!
wrapper_type = RandomMultiEnvWrapper
if self.task_labels_at_train_time or "pytest" in sys.modules:
# A RoundRobin wrapper can be used when task labels are available,
# because the task labels are available anyway, so it doesn't matter
# if the Method figures out the pattern in the task IDs.
# A RoundRobinWrapper is also used during testing, because it
# makes it easier to check that things are working correctly: for example that
# each task is visited equally, even when the number of total steps is small.
wrapper_type = RoundRobinWrapper
# NOTE: Not instantiating all the train/val/test envs here. Instead, the multienv
# wrapper will lazily instantiate the envs as needed.
# self.train_envs = instantiate_all_envs_if_needed(self.train_envs)
# self.val_envs = instantiate_all_envs_if_needed(self.val_envs)
# self.test_envs = instantiate_all_envs_if_needed(self.test_envs)
self.train_dataset = wrapper_type(
self.train_envs, add_task_ids=self.task_labels_at_train_time
)
self.val_dataset = wrapper_type(
self.val_envs, add_task_ids=self.task_labels_at_train_time
)
self.test_dataset = ConcatEnvsWrapper(
self.test_envs, add_task_ids=self.task_labels_at_test_time
)
elif self.known_task_boundaries_at_train_time:
self.train_dataset = self.train_envs[self.current_task_id]
self.val_dataset = self.val_envs[self.current_task_id]
# TODO: The test loop goes through all the envs, hence this doesn't really
# work.
self.test_dataset = self.test_envs[self.current_task_id]
else:
self.train_dataset = ConcatEnvsWrapper(
self.train_envs, add_task_ids=self.task_labels_at_train_time
)
self.val_dataset = ConcatEnvsWrapper(
self.val_envs, add_task_ids=self.task_labels_at_train_time
)
self.test_dataset = ConcatEnvsWrapper(
self.test_envs, add_task_ids=self.task_labels_at_test_time
)
# Check that the observation/action spaces are all the same for all
# the train/valid/test envs
self._check_all_envs_have_same_spaces(
envs_or_env_functions=self.train_envs,
wrappers=self.train_wrappers,
)
# TODO: Inconsistent naming between `val_envs` and `valid_wrappers` etc.
self._check_all_envs_have_same_spaces(
envs_or_env_functions=self.val_envs,
wrappers=self.val_wrappers,
)
self._check_all_envs_have_same_spaces(
envs_or_env_functions=self.test_envs,
wrappers=self.test_wrappers,
)
else:
# TODO: Should we populate the `self.train_envs`, `self.val_envs` and
# `self.test_envs` fields here as well, just to be consistent?
# base_env = self.dataset
# def task_env(task_index: int) -> Callable[[], MultiTaskEnvironment]:
# return self._make_env(
# base_env=base_env,
# wrappers=[],
# )
# self.train_envs = [partial(gym.make, self.dataset) for i in range(self.nb_tasks)]
# self.val_envs = [partial(gym.make, self.dataset) for i in range(self.nb_tasks)]
# self.test_envs = [partial(gym.make, self.dataset) for i in range(self.nb_tasks)]
# assert False, self.train_task_schedule
pass
def test_dataloader(self, batch_size: Optional[int] = None, num_workers: Optional[int] = None):
if not self._using_custom_envs_foreach_task:
return super().test_dataloader(batch_size=batch_size, num_workers=num_workers)
# IDEA: Pretty hacky, but might be cleaner than adding fields for the moment.
test_max_steps = self.test_max_steps
test_max_episodes = self.test_max_episodes
self.test_max_steps = test_max_steps // self.nb_tasks
if self.test_max_episodes:
self.test_max_episodes = test_max_episodes // self.nb_tasks
# self.test_env = self.TestEnvironment(self.test_envs[self.current_task_id])
task_test_env = super().test_dataloader(batch_size=batch_size, num_workers=num_workers)
self.test_max_steps = test_max_steps
self.test_max_episodes = test_max_episodes
return task_test_env
def test_loop(self, method: Method["IncrementalRLSetting"]):
if not self._using_custom_envs_foreach_task:
return super().test_loop(method)
# TODO: If we're using custom envs for each task, then the test loop needs to be
# re-organized.
# raise NotImplementedError(
# f"TODO: Need to add a wrapper that can switch between envs, or "
# f"re-write the test loop."
# )
assert self.nb_tasks == len(self.test_envs), "assuming this for now."
test_envs = []
for task_id in range(self.nb_tasks):
# TODO: Make sure that self.test_dataloader() uses the right number of steps
# per test task (current hard-set to self.test_max_steps).
task_test_env = self.test_dataloader()
test_envs.append(task_test_env)
# TODO: Move these wrappers to sequoia/common/gym_wrappers/multienv_wrappers or something,
# and then import them correctly at the top of this file.
from ..discrete.multienv_wrappers import ConcatEnvsWrapper
task_label_space = spaces.Discrete(self.nb_tasks)
if self.batch_size is not None:
task_label_space = batch_space(task_label_space, self.batch_size)
if not self.task_labels_at_test_time:
task_label_space = Sparse(task_label_space, sparsity=1)
test_envs_with_task_ids = [
FixedTaskLabelWrapper(
env=test_env,
task_label=(i if self.task_labels_at_test_time else None),
task_label_space=task_label_space,
)
for i, test_env in enumerate(test_envs)
]
# NOTE: This check is a bit redundant here, since IncrementalRLSetting always has task
# boundaries, but this might be useful if moving this to DiscreteTaskIncrementalRL
on_task_switch_callback: Optional[Callable[[Optional[int]], None]]
if self.known_task_boundaries_at_test_time:
on_task_switch_callback = getattr(method, "on_task_switch", None)
# NOTE: Not adding a task id here, since we instead add the fixed task id for each test env.
# NOTE: Not adding task ids with this, doing it instead with a dedicated wrapper for each env above.
joined_test_env = ConcatEnvsWrapper(
test_envs_with_task_ids,
add_task_ids=False,
on_task_switch_callback=on_task_switch_callback,
)
# TODO: Use this 'joined' test environment in this test loop somehow.
# IDEA: Hacky way to do it: (I don't think this will work as-is though)
_test_dataloader_method = self.test_dataloader
self.test_dataloader = lambda *args, **kwargs: joined_test_env
super().test_loop(method)
self.test_dataloader = _test_dataloader_method
test_loop_results = DiscreteTaskAgnosticRLSetting.Results()
for task_id, test_env in enumerate(test_envs):
# TODO: The results are still of the wrong type, because we aren't changing
# the type of test environment or the type of Results
results_of_wrong_type: IncrementalRLResults = test_env.get_results()
# For now this weird setup means that there will be only one 'result'
# object in this that actually has metrics:
# assert results_of_wrong_type.task_results[task_id].metrics
all_metrics: List[EpisodeMetrics] = sum(
[result.metrics for result in results_of_wrong_type.task_results], []
)
n_metrics_in_each_result = [
len(result.metrics) for result in results_of_wrong_type.task_results
]
# assert all(n_metrics == 0 for i, n_metrics in enumerate(n_metrics_in_each_result) if i != task_id), (n_metrics_in_each_result, task_id)
# TODO: Also transfer the other properties like runtime, online performance,
# etc?
# TODO: Maybe add addition for these?
# task_result = sum(results_of_wrong_type.task_results)
task_result = TaskResults(metrics=all_metrics)
# task_result: TaskResults[EpisodeMetrics] = results_of_wrong_type.task_results[task_id]
test_loop_results.task_results.append(task_result)
return test_loop_results
@property
def phases(self) -> int:
"""The number of training 'phases', i.e. how many times `method.fit` will be
called.
In this Incremental-RL Setting, fit is called once per task.
(Same as ClassIncrementalSetting in SL).
"""
return self.nb_tasks
@staticmethod
def _make_env(
base_env: Union[str, gym.Env, Callable[[], gym.Env]],
wrappers: List[Callable[[gym.Env], gym.Env]] = None,
**base_env_kwargs: Dict,
) -> gym.Env:
"""Helper function to create a single (non-vectorized) environment.
This is also used to create the env whenever `self.dataset` is a string that
isn't registered in gym. This happens for example when using an environment from
meta-world (or mtenv).
"""
# Check if the env is registed in a known 'third party' gym-like package, and if
# needed, create the base env in the way that package requires.
if isinstance(base_env, str):
env_id = base_env
# Check if the id belongs to mtenv
if MTENV_INSTALLED and env_id in mtenv_envs:
from mtenv import make as mtenv_make
# This is super weird. Don't undestand at all
# why they are doing this. Makes no sense to me whatsoever.
base_env = mtenv_make(env_id, **base_env_kwargs)
# Add a wrapper that will remove the task information, because we use
# the same MultiTaskEnv wrapper for all the environments.
wrappers.insert(0, MTEnvAdapterWrapper)
if METAWORLD_INSTALLED and env_id in metaworld_envs:
# TODO: Should we use a particular benchmark here?
# For now, we find the first benchmark that has an env with this name.
import metaworld
for benchmark_class in [metaworld.ML10]:
benchmark = benchmark_class()
if env_id in benchmark.train_classes.keys():
# TODO: We can either let the base_env be an env type, or
# actually instantiate it.
base_env: Type[MetaWorldEnv] = benchmark.train_classes[env_id]
# NOTE: (@lebrice) Here I believe it's better to just have the
# constructor, that way we re-create the env for each task.
# I think this might be better, as I don't know for sure that
# the `set_task` can be called more than once in metaworld.
# base_env = base_env_type()
break
else:
raise NotImplementedError(
f"Can't find a metaworld benchmark that uses env {env_id}"
)
return ContinualRLSetting._make_env(
base_env=base_env,
wrappers=wrappers,
**base_env_kwargs,
)
def create_task_schedule(
self,
temp_env: gym.Env,
change_steps: List[int],
seed: int = None,
) -> Dict[int, Dict]:
task_schedule: Dict[int, Dict] = {}
if self._using_custom_envs_foreach_task:
# If custom envs were passed to be used for each task, then we don't create
# a "task schedule", because the only reason we're using a task schedule is
# when we want to change something about the 'base' env in order to get
# multiple tasks.
# Create a task schedule dict, just to fit in?
for i, task_step in enumerate(change_steps):
task_schedule[task_step] = {}
return task_schedule
# TODO: Make it possible to use something other than steps as keys in the task
# schedule, something like a NamedTuple[int, DeltaType], e.g. Episodes(10) or
# Steps(10), something like that!
# IDEA: Even fancier, we could use a TimeDelta to say "do one hour of task 0"!!
for step in change_steps:
# TODO: Add a `stage` argument (an enum or something with 'train', 'valid'
# 'test' as values, and pass it to this function. Tasks should be the same
# in train/valid for now, given the same task Id.
# TODO: When the Results become able to handle a different ordering of tasks
# at train vs test time, allow the test task schedule to have different
# ordering than train / valid.
task = type(self)._task_sampling_function(
temp_env,
step=step,
change_steps=change_steps,
seed=seed,
)
task_schedule[step] = task
return task_schedule
def create_train_wrappers(self) -> List[Callable[[gym.Env], gym.Env]]:
"""Create and return the wrappers to apply to the train environment of the current task."""
wrappers: List[Callable[[gym.Env], gym.Env]] = []
# TODO: Clean this up a bit?
if self._using_custom_envs_foreach_task:
# TODO: Maybe do something different here, since we don't actually want to
# add a CL wrapper at all in this case?
assert not any(self.train_task_schedule.values())
base_env = self.train_envs[self.current_task_id]
else:
base_env = self.train_dataset
# assert False, super().create_train_wrappers()
if self.stationary_context:
task_schedule_slice = self.train_task_schedule.copy()
assert len(task_schedule_slice) >= 2
assert self.nb_tasks == len(self.train_task_schedule) - 1
# Need to pop the last task, so that we don't sample it by accident!
max_step = max(task_schedule_slice)
last_task = task_schedule_slice.pop(max_step)
# TODO: Shift the second-to-last task to the last step
last_boundary = max(task_schedule_slice)
second_to_last_task = task_schedule_slice.pop(last_boundary)
task_schedule_slice[max_step] = second_to_last_task
if 0 not in task_schedule_slice:
assert self.nb_tasks == 1
task_schedule_slice[0] = second_to_last_task
# assert False, (max_step, last_boundary, last_task, second_to_last_task)
else:
current_task = list(self.train_task_schedule.values())[self.current_task_id]
task_length = self.train_max_steps // self.nb_tasks
task_schedule_slice = {
0: current_task,
task_length: current_task,
}
return self._make_wrappers(
base_env=base_env,
task_schedule=task_schedule_slice,
# TODO: Removing this, but we have to check that it doesn't change when/how
# the task boundaries are given to the Method.
# sharp_task_boundaries=self.known_task_boundaries_at_train_time,
task_labels_available=self.task_labels_at_train_time,
transforms=self.transforms + self.train_transforms,
starting_step=0,
max_steps=max(task_schedule_slice.keys()),
new_random_task_on_reset=self.stationary_context,
)
def create_valid_wrappers(self):
if self._using_custom_envs_foreach_task:
# TODO: Maybe do something different here, since we don't actually want to
# add a CL wrapper at all in this case?
assert not any(self.val_task_schedule.values())
base_env = self.val_envs[self.current_task_id]
else:
base_env = self.val_dataset
# assert False, super().create_train_wrappers()
if self.stationary_context:
task_schedule_slice = self.val_task_schedule
else:
current_task = list(self.val_task_schedule.values())[self.current_task_id]
task_length = self.train_max_steps // self.nb_tasks
task_schedule_slice = {
0: current_task,
task_length: current_task,
}
return self._make_wrappers(
base_env=base_env,
task_schedule=task_schedule_slice,
# TODO: Removing this, but we have to check that it doesn't change when/how
# the task boundaries are given to the Method.
# sharp_task_boundaries=self.known_task_boundaries_at_train_time,
task_labels_available=self.task_labels_at_train_time,
transforms=self.transforms + self.val_transforms,
starting_step=0,
max_steps=max(task_schedule_slice.keys()),
new_random_task_on_reset=self.stationary_context,
)
def create_test_wrappers(self):
if self._using_custom_envs_foreach_task:
# TODO: Maybe do something different here, since we don't actually want to
# add a CL wrapper at all in this case?
assert not any(self.test_task_schedule.values())
base_env = self.test_envs[self.current_task_id]
else:
base_env = self.test_dataset
# assert False, super().create_train_wrappers()
task_schedule_slice = self.test_task_schedule
# if self.stationary_context:
# else:
# current_task = list(self.test_task_schedule.values())[self.current_task_id]
# task_length = self.test_max_steps // self.nb_tasks
# task_schedule_slice = {
# 0: current_task,
# task_length: current_task,
# }
return self._make_wrappers(
base_env=base_env,
task_schedule=task_schedule_slice,
# TODO: Removing this, but we have to check that it doesn't change when/how
# the task boundaries are given to the Method.
# sharp_task_boundaries=self.known_task_boundaries_at_train_time,
task_labels_available=self.task_labels_at_train_time,
transforms=self.transforms + self.test_transforms,
starting_step=0,
max_steps=self.test_max_steps,
new_random_task_on_reset=self.stationary_context,
)
def _check_all_envs_have_same_spaces(
self,
envs_or_env_functions: List[Union[str, gym.Env, Callable[[], gym.Env]]],
wrappers: List[Callable[[gym.Env], gym.Wrapper]],
) -> None:
"""Checks that all the environments in the list have the same
observation/action spaces.
"""
first_env = self._make_env(
base_env=envs_or_env_functions[0], wrappers=wrappers, **self.base_env_kwargs
)
if not isinstance(envs_or_env_functions[0], gym.Env):
# NOTE: Avoid closing the envs for now in case 'live' envs were passed to the Setting.
# first_env.close()
pass
for task_id, task_env_id_or_function in zip(
range(1, len(envs_or_env_functions)), envs_or_env_functions[1:]
):
task_env = self._make_env(
base_env=task_env_id_or_function,
wrappers=wrappers,
**self.base_env_kwargs,
)
if not isinstance(task_env_id_or_function, gym.Env):
# NOTE: Avoid closing the envs for now in case 'live' envs were passed to the Setting.
# task_env.close()
pass
def warn_spaces_are_different(
task_id: int, kind: str, first_env: gym.Env, task_env: gym.Env
) -> None:
task_space = (
task_env.observation_space if kind == "observation" else task_env.action_space
)
first_space = (
first_env.observation_space if kind == "observation" else first_env.action_space
)
warnings.warn(
RuntimeWarning(
colorize(
f"Env at task {task_id} doesn't have the same {kind} "
f"space as the environment of the first task: \n"
f"{task_space} \n"
f"!=\n"
f"{first_space} \n"
f"This isn't fully supported yet. Don't expect this to work.",
"yellow",
)
)
)
if task_env.observation_space != first_env.observation_space:
if (
isinstance(task_env.observation_space, spaces.Box)
and isinstance(first_env.observation_space, spaces.Box)
and task_env.observation_space.shape == first_env.observation_space.shape
) or (
isinstance(task_env.observation_space, TypedDictSpace)
and isinstance(first_env.observation_space, TypedDictSpace)
and "x" in task_env.observation_space.spaces
and "x" in first_env.observation_space.spaces
and task_env.observation_space.x.shape == first_env.observation_space.x.shape
):
warnings.warn(
RuntimeWarning(
f"The shape of the observation space is the same, but the bounds are "
f"different between the first env and the env of task {task_id}!"
)
)
else:
warn_spaces_are_different(task_id, "observation", first_env, task_env)
if task_env.action_space != first_env.action_space:
warn_spaces_are_different(task_id, "action", first_env, task_env)
def _make_wrappers(
self,
base_env: Union[str, gym.Env, Callable[[], gym.Env]],
task_schedule: Dict[int, Dict],
# sharp_task_boundaries: bool,
task_labels_available: bool,
transforms: List[Transforms],
starting_step: int,
max_steps: int,
new_random_task_on_reset: bool,
) -> List[Callable[[gym.Env], gym.Env]]:
if self._using_custom_envs_foreach_task:
if any(task_schedule.values()):
logger.warning(
RuntimeWarning(
f"Ignoring task schedule {task_schedule}, since custom envs were "
f"passed for each task!"
)
)
task_schedule = None
wrappers = super()._make_wrappers(
base_env=base_env,
task_schedule=task_schedule,
task_labels_available=task_labels_available,
transforms=transforms,
starting_step=starting_step,
max_steps=max_steps,
new_random_task_on_reset=new_random_task_on_reset,
)
if self._using_custom_envs_foreach_task:
# If the user passed a specific env to use for each task, then there won't
# be a MultiTaskEnv wrapper in `wrappers`, since the task schedule is
# None/empty.
# Instead, we will add a Wrapper that always gives the task ID of the
# current task.
# TODO: There are some 'unused' args above: `starting_step`, `max_steps`,
# `new_random_task_on_reset` which are still passed to the super() call, but
# just unused.
if new_random_task_on_reset:
pass
# raise NotImplementedError(
# "TODO: Add a MultiTaskEnv wrapper of some sort that alternates "
# " between the source envs."
# )
else:
assert not task_schedule
task_label = self.current_task_id
task_label_space = spaces.Discrete(self.nb_tasks)
if not task_labels_available:
task_label = None
task_label_space = Sparse(task_label_space, sparsity=1.0)
wrappers.append(
partial(
FixedTaskLabelWrapper,
task_label=task_label,
task_label_space=task_label_space,
)
)
if is_monsterkong_env(base_env):
# TODO: Need to register a MetaMonsterKong-State-v0 or something like that!
# TODO: Maybe add another field for 'force_state_observations' ?
# if self.force_pixel_observations:
pass
return wrappers
class MTEnvAdapterWrapper(TransformObservation):
# TODO: For now, we remove the task id portion of the space and of the observation
# dicts.
def __init__(self, env: MTEnv, f: Callable = operator.itemgetter("env_obs")):
super().__init__(env=env, f=f)
# self.observation_space = self.env.observation_space["env_obs"]
# def observation(self, observation):
# return observation["env_obs"]
def make_metaworld_env(env_class: Type[MetaWorldEnv], tasks: List["Task"]) -> MetaWorldEnv:
env = env_class()
env.set_task(tasks[0])
# TODO: Could maybe replace this with the 'RoundRobin' or 'Random' wrapper from
# `multienv_wrappers.py` by making it appear like it's multiple envs, but actually
# share the env instance
env = MultiTaskEnvironment(
env,
task_schedule={i: operator.methodcaller("set_task", task) for i, task in enumerate(tasks)},
new_random_task_on_reset=True,
add_task_dict_to_info=False,
add_task_id_to_obs=False,
)
return env
def wrap(env_or_env_fn: Union[gym.Env, EnvFactory], wrappers: List[gym.Wrapper] = None) -> gym.Env:
env: gym.Env = env_or_env_fn if isinstance(env_or_env_fn, gym.Env) else env_or_env_fn()
wrappers = wrappers or []
for wrapper in wrappers:
env = wrapper(env)
return env
def create_env(
env_fn: Union[Type[gym.Env], Callable[[], gym.Env]],
kwargs: Dict = None,
wrappers: List[Callable[[gym.Env], gym.Env]] = None,
seed: int = None,
) -> gym.Env:
"""
1. Create an env instance by calling `env_fn`;
2. Wrap it with the wrappers in `wrappers`, if any;
3. seed it with `seed` if it is not None.
"""
env = env_fn(**(kwargs or {}))
wrappers = wrappers or []
for wrapper in wrappers:
env = wrapper(env)
if seed is not None:
env.seed(seed)
return env
def make_lpg_ftw_datasets(
dataset: str,
) -> Tuple[List[EnvFactory], List[EnvFactory], List[EnvFactory]]:
# IDEA: "LPG-FTW-{bodyparts|gravity}-{HalfCheetah|Hopper|Walker2d}-{v2|v3}",
# TODO: Instead of doing what I'm doing here, we could instead add an argument that gets
# passed to the task creation function, for instance to get only a bodysize task, or
# only a gravity task, etc.
train_envs: List[EnvFactory] = []
valid_envs: List[EnvFactory] = []
test_envs: List[EnvFactory] = []
name_parts = dataset.split("-")
if len(name_parts) != 5:
raise ValueError(
"Expected the name to follow this format: \n"
"\t 'LPG-FTW-{bodyparts|gravity}-{HalfCheetah|Hopper|Walker2d}-{v2|v3}' \n"
f"but got {dataset}"
)
_, _, modification_type, env_name, version = name_parts
# NOTE: From the LPG-FTW repo:
# > "500 for halfcheetah, 600 for hopper, 700 for walker"
task_creation_seeds = {"HalfCheetah": 500, "Hopper": 600, "Walker2d": 700}
task_creation_seed = task_creation_seeds[env_name]
rng = np.random.default_rng(task_creation_seed)
from sequoia.settings.rl.envs.mujoco import (
ContinualHalfCheetahV2Env,
ContinualHalfCheetahV3Env,
ContinualHopperV2Env,
ContinualHopperV3Env,
ContinualWalker2dV2Env,
ContinualWalker2dV3Env,
)
env_classes: Dict[str, Dict[str, Type[gym.Env]]] = {
"HalfCheetah": {
"v2": ContinualHalfCheetahV2Env,
"v3": ContinualHalfCheetahV3Env,
},
"Hopper": {"v2": ContinualHopperV2Env, "v3": ContinualHopperV3Env},
"Walker2d": {"v2": ContinualWalker2dV2Env, "v3": ContinualWalker2dV3Env},
}
env_class = env_classes[env_name][version]
# NOTE: Could also get the list of all geoms from the BODY_NAMES property on the classes above,
# but the LPG-FTW repo actually uses a subset of those:
bodyparts_for_env: Dict[str, List[str]] = {
"HalfCheetah": ["torso", "fthigh", "fshin", "ffoot"],
"Hopper": ["torso", "thigh", "leg", "foot"],
"Walker2d": ["torso", "thigh", "leg", "foot"],
}
# From the paper: "We created T_max=20 tasks for HalfCheetah and Hopper domains, and
# T_max=50 tasks for Walker2d domains."
# NOTE: Here if `nb_tasks` is None, we use the default number of tasks from the paper.
nb_tasks = 20 if env_name in ["HalfCheetah", "Hopper"] else 50
task_params: List[Dict] = []
values = []
for task_id in range(nb_tasks):
# NOTE: Could also support a different type of modification per task, by passing a list of
# types of modifications to use!
if modification_type == "gravity":
# This is a function that will be called for each task, and must produce a set of
# (distinct, reproducible) keyword arguments for the given task.
original_gravity = -9.81
task_gravity = round(((rng.random() + 0.5) * original_gravity), 4)
task_kwargs = {"gravity": task_gravity}
values.append(task_gravity)
elif modification_type == "bodyparts":
body_names = bodyparts_for_env[env_name]
scale_factors = (rng.random(len(body_names)) + 0.5).round(4)
values.append(scale_factors)
body_name_to_size_scale = dict(zip(body_names, scale_factors))
# between 0.5 and 1.5, with 4 digits of precision.
# NOTE: Scale the mass by the same factor as the size.
task_kwargs = {
"body_name_to_size_scale": body_name_to_size_scale,
"body_name_to_mass_scale": body_name_to_size_scale.copy(),
}
else:
raise NotImplementedError(
f"Unsupported modification type: '{modification_type}'! Supported values are "
f"'bodyparts', 'gravity'."
)
logger.info(f"Arguments for task {task_id}: {task_kwargs}")
task_params.append(task_kwargs)
values = np.array(values)
logger.debug(values.tolist())
# assert False
# logger.info("Task parameters:")
# logger.info(json.dumps(task_params, indent="\t"))
# NOTE: All envs in LPG-FTW use max_episode_steps of 1000.
# max_episode_steps = 1000
# wrappers = [partial(TimeLimit, max_episode_steps=max_episode_steps)]
for task_id, task_kwargs in enumerate(task_params):
# Function that will create the env with the given task.
base_env_fn = partial(env_class, **task_kwargs)
train_envs.append(base_env_fn)
valid_envs.append(base_env_fn)
test_envs.append(base_env_fn)
return train_envs, valid_envs, test_envs
================================================
FILE: sequoia/settings/rl/incremental/setting_test.py
================================================
import dataclasses
import enum
import functools
import inspect
import math
import random
from typing import Any, ClassVar, Dict, NamedTuple, Optional, Type
import gym
import numpy as np
import pytest
from gym import spaces
from gym.envs.classic_control import CartPoleEnv
from sequoia.common.config import Config
from sequoia.common.gym_wrappers import RenderEnvWrapper
from sequoia.common.spaces import Image, Sparse
from sequoia.conftest import (
metaworld_required,
monsterkong_required,
mtenv_required,
mujoco_required,
slow,
xfail_param,
)
from sequoia.methods.random_baseline import RandomBaselineMethod
from sequoia.settings.assumptions.incremental_test import OtherDummyMethod
from sequoia.settings.rl import TaskIncrementalRLSetting
from sequoia.settings.rl.continual.setting_test import all_different_from_next
from sequoia.settings.rl.setting_test import DummyMethod
from ..discrete.setting_test import (
TestDiscreteTaskAgnosticRLSetting as DiscreteTaskAgnosticRLSettingTests,
)
from .setting import IncrementalRLSetting
class TestIncrementalRLSetting(DiscreteTaskAgnosticRLSettingTests):
Setting: ClassVar[Type[Setting]] = IncrementalRLSetting
dataset: pytest.fixture
@pytest.fixture()
def setting_kwargs(self, dataset: str, nb_tasks: int, config: Config):
"""Fixture used to pass keyword arguments when creating a Setting."""
kwargs = {"dataset": dataset, "nb_tasks": nb_tasks, "max_episode_steps": 100}
if dataset.lower().startswith(("walker2d", "hopper", "halfcheetah", "continual")):
# kwargs["train_max_steps"] = 5_000
# kwargs["max_episode_steps"] = 100
pass
# NOTE: Using 0 workers so I can parallelize the tests without killing my PC.
config.num_workers = 0
kwargs["config"] = config
return kwargs
def test_passing_supported_dataset(self, setting_kwargs: Dict):
# Override this test because envs can be passed for each task.
setting = self.Setting(**setting_kwargs)
assert setting.train_task_schedule
if setting.train_envs:
# Passing the dataset created custom envs for each task (e.g. MT10, CW10, LPG-FTW-(...).
# The task schedule should have keys for the task boundary steps, but values should be
# empty dictionaries.
assert not any(setting.train_task_schedule.values())
else:
# Passing the dataset created a task schedule.
assert all(setting.train_task_schedule.values()), "Should have non-empty tasks."
def validate_results(
self,
setting: IncrementalRLSetting,
method: DummyMethod,
results: IncrementalRLSetting.Results,
) -> None:
"""Check that the results make sense.
The Dummy Method used also keeps useful attributes, which we check here.
"""
assert results
assert results.objective
assert len(results.task_sequence_results) == setting.nb_tasks
assert results.average_final_performance == sum(
results.task_sequence_results[-1].average_metrics_per_task
)
t = setting.nb_tasks
p = setting.phases
assert setting.known_task_boundaries_at_train_time
assert setting.known_task_boundaries_at_test_time
assert setting.task_labels_at_train_time
# assert not setting.task_labels_at_test_time
assert not setting.stationary_context
if setting.nb_tasks == 1:
assert not method.received_task_ids
assert not method.received_while_training
else:
assert method.received_task_ids == sum(
[
[t_i] + [t_j if setting.task_labels_at_test_time else None for t_j in range(t)]
for t_i in range(t)
],
[],
)
assert method.received_while_training == sum(
[[True] + [False for _ in range(t)] for t_i in range(t)], []
)
def test_tasks_are_different(self, setting_kwargs: Dict[str, Any], config: Config):
"""Check that the tasks different from the next.
NOTE: Overriding this test because task schedules are empty when using custom envs for each
task.
"""
config = setting_kwargs.pop("config", config)
assert config.seed is not None
setting = self.Setting(**setting_kwargs, config=config)
# Check that each task is different from the next.
# NOTE: When custom datasets are used for each task then the task schedules' values are
# empty, we have to change the test condition a little bit here.
if setting.train_envs:
# The dataset being used resulted in creating an env per task, rather than just using
# one env with a task schedule.
# Make sure that the fn for creating the env of each task is unique.
assert all_different_from_next(setting.train_envs)
assert all_different_from_next(setting.val_envs)
assert all_different_from_next(setting.test_envs)
else:
# Check that each task is different from the next.
assert all_different_from_next(setting.train_task_schedule.values())
assert all_different_from_next(setting.val_task_schedule.values())
assert all_different_from_next(setting.test_task_schedule.values())
def test_number_of_tasks(self):
setting = self.Setting(
dataset="CartPole-v0",
monitor_training_performance=True,
nb_tasks=10,
train_max_steps=10_000,
test_max_steps=1000,
)
assert setting.nb_tasks == 10
def test_max_number_of_steps_per_task_is_respected(self):
setting = self.Setting(
dataset="CartPole-v0",
monitor_training_performance=True,
# train_steps_per_task=500,
nb_tasks=2,
train_max_steps=1000,
test_max_steps=1000,
)
for task_id in range(setting.phases):
setting.current_task_id = task_id
train_env = setting.train_dataloader()
total_steps = 0
while total_steps < setting.steps_per_phase:
print(total_steps)
obs = train_env.reset()
done = False
while not done:
if total_steps == setting.current_train_task_length:
assert train_env.is_closed()
with pytest.raises(gym.error.ClosedEnvironmentError):
obs, reward, done, info = train_env.step(
train_env.action_space.sample()
)
return
else:
obs, reward, done, info = train_env.step(train_env.action_space.sample())
total_steps += 1
assert total_steps == setting.steps_per_phase
with pytest.raises(gym.error.ClosedEnvironmentError):
train_env.reset()
@monsterkong_required
@pytest.mark.timeout(120)
@pytest.mark.parametrize(
"state",
[False, xfail_param(True, reason="TODO: MonsterkongState doesn't work?")],
)
def test_monsterkong(self, state: bool):
"""Checks that the MonsterKong env works fine with pixel and state input."""
setting = self.Setting(
dataset="StateMetaMonsterKong-v0" if state else "PixelMetaMonsterKong-v0",
# force_state_observations=state,
# force_pixel_observations=(not state),
nb_tasks=5,
train_max_steps=500,
test_max_steps=500,
# train_steps_per_task=100,
# test_steps_per_task=100,
train_transforms=[],
test_transforms=[],
val_transforms=[],
max_episode_steps=10,
)
if state:
# State-based monsterkong: We observe a flattened version of the game state
# (20 x 20 grid + player cell and goal cell, IIRC.)
assert setting.observation_space.x == spaces.Box(
0, 292, (402,), np.int16
), setting._temp_train_env.observation_space
else:
assert setting.observation_space.x == Image(0, 255, (64, 64, 3), np.uint8)
if setting.task_labels_at_test_time:
assert setting.observation_space.task_labels == spaces.Discrete(5)
else:
assert setting.task_labels_at_train_time
assert setting.observation_space.task_labels == Sparse(
spaces.Discrete(5),
sparsity=0.5, # 0.5 since we have task labels at train time.
)
assert setting.test_max_steps == 500
with setting.train_dataloader() as env:
obs = env.reset()
assert obs in setting.observation_space
method = DummyMethod()
results = setting.apply(method)
self.validate_results(setting, method, results)
@mujoco_required
@pytest.mark.parametrize("seed", [None, 123, 456])
@pytest.mark.parametrize("version", ["v2", "v3"])
@pytest.mark.parametrize("env_name", ["HalfCheetah", "Hopper", "Walker2d"])
@pytest.mark.parametrize("modification", ["bodyparts", "gravity"])
def test_LPG_FTW_datasets(
self,
env_name: str,
modification: str,
version: str,
config: Config,
seed: int,
):
"""Test using a dataset from the LPG-FTW paper / repo (continual mujoco variants).
TODO: Check that:
- the task sequence is always the same (uses the same seed), regardless of what seed is
passed;
- The envs are created correctly;
- The number of tasks / train steps / test steps / etc is set to the right values.
"""
# LPG-FTW-{bodysize|gravity}-{HalfCheetah|Hopper|Walker2d}-{v2|v3}
dataset = f"LPG-FTW-{modification}-{env_name}-{version}"
# NOTE: Set the seed in the config, preserving the other values:
config = dataclasses.replace(config, seed=seed)
nb_tasks: Optional[int] = None # Using the default number of tasks for that setting for now
setting: TaskIncrementalRLSetting = self.Setting(
dataset=dataset,
nb_tasks=nb_tasks,
config=config,
)
if nb_tasks is not None:
assert setting.nb_tasks == nb_tasks
else:
assert setting.nb_tasks == 20 if env_name in ["HalfCheetah", "Hopper"] else 50
assert setting.train_steps_per_task == 100_000
assert setting.train_max_steps == setting.train_steps_per_task * setting.nb_tasks
assert setting.test_steps_per_task == 10_000
assert setting.test_max_steps == setting.test_steps_per_task * setting.nb_tasks
assert setting.config == config
expected_values = {
"bodyparts": {
"HalfCheetah": np.array(
[
[1.0667, 1.354, 1.1454, 0.9112],
[0.968, 1.3214, 0.8125, 1.2862],
[0.9356, 0.7476, 0.9421, 1.397],
[1.057, 1.0286, 0.776, 1.3749],
[0.7592, 1.3059, 0.6209, 0.9313],
[0.8497, 1.016, 0.869, 0.9722],
[0.6936, 0.7496, 0.9946, 0.7713],
[0.9878, 1.1394, 1.438, 1.3296],
[1.1359, 1.1118, 1.4415, 1.3868],
[0.5468, 0.9953, 1.3474, 1.3668],
[0.7779, 0.5924, 0.8996, 0.8196],
[0.9775, 0.7775, 1.3211, 1.1515],
[0.6026, 0.833, 0.9688, 1.4437],
[0.6035, 1.161, 1.0771, 0.7065],
[1.0629, 1.4446, 0.9937, 0.5573],
[1.2337, 0.522, 1.0446, 0.86],
[0.7313, 1.35, 1.2919, 0.6101],
[1.0026, 0.5937, 0.6216, 1.3764],
[0.6369, 0.8332, 1.0068, 1.1956],
[1.1337, 0.8872, 1.0393, 1.4391],
]
),
"Hopper": np.array(
[
[0.7135, 0.5054, 1.3158, 1.3817],
[1.2478, 1.4622, 0.8828, 0.7484],
[0.5758, 1.4022, 1.0022, 1.2518],
[1.4175, 0.5328, 0.8692, 0.6997],
[0.6962, 1.3126, 1.2338, 1.4018],
[1.4837, 1.0798, 0.7868, 0.8489],
[1.3545, 0.7424, 1.2719, 1.0976],
[0.6088, 0.516, 0.8584, 1.0396],
[1.19, 0.6938, 0.5663, 0.8589],
[0.8211, 1.3241, 0.9745, 1.345],
[0.6572, 1.0763, 1.3601, 0.659],
[0.7739, 0.7299, 0.6518, 1.469],
[1.0556, 0.7345, 0.532, 1.0279],
[1.2296, 0.6701, 1.4398, 1.0611],
[0.6225, 1.0743, 0.827, 0.6753],
[0.7325, 0.809, 1.2254, 0.9415],
[1.4439, 0.9964, 1.4649, 1.333],
[0.5189, 0.9123, 1.1166, 1.3882],
[1.0468, 1.4162, 1.4152, 1.4333],
[1.1143, 1.2726, 1.0209, 1.0729],
]
),
"Walker2d": np.array(
[
[0.7567, 0.756, 1.4277, 0.9565],
[1.4109, 0.5937, 0.7606, 0.6839],
[1.0276, 1.2041, 1.4451, 0.8439],
[0.9755, 0.8187, 0.591, 0.583],
[1.2181, 0.8519, 0.5878, 0.9935],
[0.8885, 1.2908, 1.3013, 1.1454],
[1.0147, 0.7442, 1.236, 0.5236],
[1.1978, 0.5307, 1.4067, 1.1635],
[0.9529, 0.8574, 0.6655, 0.5294],
[0.8051, 1.1687, 0.8499, 1.3864],
[1.2848, 0.8866, 0.5215, 1.0251],
[1.2241, 0.7499, 1.1479, 0.5744],
[1.2354, 0.5853, 1.1212, 0.5174],
[0.7968, 0.7717, 1.2285, 0.8687],
[1.0544, 0.5814, 0.8588, 0.687],
[1.0695, 0.6469, 0.8567, 0.6682],
[1.2904, 0.8367, 1.228, 0.8606],
[1.0343, 0.7646, 0.515, 1.3386],
[1.1157, 1.2064, 1.0026, 0.9877],
[0.6621, 0.809, 1.0466, 0.5361],
[0.9291, 0.6168, 0.9013, 1.4358],
[1.048, 0.8483, 0.8586, 1.1867],
[1.327, 1.0487, 1.4479, 0.9426],
[1.2382, 0.8678, 1.0034, 1.2412],
[0.5863, 1.4389, 0.934, 1.3923],
[1.1379, 1.154, 0.5595, 0.5955],
[1.3881, 1.3309, 0.5342, 1.1085],
[0.8394, 1.0508, 0.9655, 0.7755],
[0.7494, 0.6891, 0.6979, 1.3249],
[1.1108, 1.3998, 0.7783, 0.599],
[0.8687, 0.5902, 1.212, 0.6375],
[0.5668, 0.981, 0.5026, 1.0739],
[0.9416, 1.4424, 1.0721, 0.9112],
[1.2981, 1.0119, 1.2722, 0.9808],
[1.4171, 1.1066, 0.6053, 1.2302],
[1.1096, 1.0246, 1.3117, 0.5727],
[0.8082, 0.875, 0.9299, 1.2194],
[1.0526, 0.961, 1.0492, 1.2552],
[1.46, 0.8331, 0.934, 0.5725],
[1.3832, 1.4736, 1.2651, 0.7956],
[0.68, 1.2663, 1.4183, 0.9284],
[1.2713, 0.6865, 0.8331, 1.0081],
[1.4115, 0.5781, 0.9823, 0.8094],
[1.4614, 0.5998, 1.2237, 1.3794],
[1.2385, 1.2489, 0.7521, 0.818],
[1.077, 1.2589, 0.748, 1.1483],
[0.7855, 1.1619, 0.5537, 1.2367],
[1.4765, 1.1728, 0.9052, 1.3113],
[1.1144, 0.9986, 1.3052, 0.9948],
[1.1542, 1.3616, 0.7465, 0.8679],
]
),
},
"gravity": {
"HalfCheetah": np.array(
[
-10.4648,
-13.2825,
-11.236,
-8.9384,
-9.4964,
-12.9626,
-7.9709,
-12.6178,
-9.1777,
-7.3343,
-9.2424,
-13.7041,
-10.3694,
-10.091,
-7.6124,
-13.4874,
-7.4477,
-12.8111,
-6.0907,
-9.1363,
]
),
"Hopper": np.array(
[
-6.999,
-4.9579,
-12.9078,
-13.5543,
-12.2405,
-14.3439,
-8.6606,
-7.3419,
-5.6488,
-13.7555,
-9.8317,
-12.2801,
-13.9059,
-5.2266,
-8.5266,
-6.8638,
-6.83,
-12.8763,
-12.104,
-13.7512,
]
),
"Walker2d": np.array(
[
-7.4229,
-7.4163,
-14.006,
-9.3835,
-13.8414,
-5.8243,
-7.461,
-6.7093,
-10.0807,
-11.8119,
-14.1762,
-8.2791,
-9.57,
-8.031,
-5.7979,
-5.7189,
-11.9495,
-8.3575,
-5.7666,
-9.7467,
-8.7165,
-12.6623,
-12.7656,
-11.2362,
-9.9544,
-7.3011,
-12.1249,
-5.1366,
-11.7508,
-5.2058,
-13.8,
-11.4139,
-9.3481,
-8.4107,
-6.5289,
-5.1934,
-7.898,
-11.4647,
-8.3374,
-13.6001,
-12.6038,
-8.6978,
-5.1157,
-10.0563,
-12.0081,
-7.3568,
-11.2612,
-5.6351,
-12.1197,
-5.7417,
]
),
},
}
def _unwrap_partials(env_fn: functools.partial) -> functools.partial:
from gym.envs.mujoco import MujocoEnv
# 'unwrap' the env fn:
while isinstance(env_fn, functools.partial):
# We want to recover the 'base' env factory (the function that actually creates
# the modified mujoco env.)
# NOTE `env_fn` is probably something like:
# `partial(create_env, base_env_factory, wrappers=[...])
# or
# `partial(foo, env_fn=base_env_factory, wrappers=[...])
print(env_fn)
if inspect.isclass(env_fn.func) and issubclass(env_fn.func, MujocoEnv):
# Reached the lowest-level partial, the one we're looking for.
break
if env_fn.args:
env_fn = env_fn.args[0]
else:
env_fn = list(env_fn.keywords.values())[0]
return env_fn
if modification == "bodyparts":
expected_factors_for_env = expected_values["bodyparts"][env_name]
def check_env_fn_matches_expected(task_id: int, env_fn: functools.partial):
env_fn = _unwrap_partials(env_fn)
assert isinstance(env_fn, functools.partial)
kwargs = env_fn.keywords
for argument_name in ["body_name_to_size_scale", "body_name_to_mass_scale"]:
argument_values = np.array(list(kwargs[argument_name].values()))
assert (argument_values == expected_factors_for_env[task_id]).all()
env_fn: functools.partial
# Inspect the env functions and check that the arguments that would be passed to the
# constructor make sense.
# NOTE: Could also create the envs using the setting and inspect these attributes,
# but I think that inspecting the attributes on the multi-env wrappers used by the
# Traditional and MultiTask RL settings might not work. This is ok for now.
for task_id, env_fn in enumerate(setting.train_envs):
check_env_fn_matches_expected(task_id, env_fn)
for task_id, env_fn in enumerate(setting.val_envs):
check_env_fn_matches_expected(task_id, env_fn)
for task_id, env_fn in enumerate(setting.test_envs):
check_env_fn_matches_expected(task_id, env_fn)
elif modification == "gravity":
expected_gravities_for_env = expected_values["gravity"][env_name]
def check_env_fn_matches_expected(task_id: int, env_fn: functools.partial):
env_fn = _unwrap_partials(env_fn)
kwargs = env_fn.keywords
gravity_value: float = kwargs["gravity"]
assert np.isclose(gravity_value, expected_gravities_for_env[task_id])
for task_id, env_fn in enumerate(setting.train_envs):
check_env_fn_matches_expected(task_id, env_fn)
for task_id, env_fn in enumerate(setting.val_envs):
check_env_fn_matches_expected(task_id, env_fn)
for task_id, env_fn in enumerate(setting.test_envs):
check_env_fn_matches_expected(task_id, env_fn)
# TODO: Not sure if this check will also work with the stationary settings, so skipping it
# for now.
if setting.stationary_context:
return
# Check that the max episode length is really respected.
with setting.train_dataloader() as temp_env:
steps = 0
obs = temp_env.reset()
done = False
while not done:
action = temp_env.action_space.sample()
obs, reward, done, info = temp_env.step(action)
assert obs in temp_env.observation_space
steps += 1
assert steps <= 1000
assert steps <= 1000
# NOTE: Testing the 'live' envs is much slower, since we have to actually isntantiate the
# envs. Skipping the rest for now.
return
def _check_env_attributes_match(task_id: int, env: gym.Env):
if modification == "bodyparts":
size_scales = env.body_name_to_size_scale
mass_scales = env.body_name_to_mass_scale
assert size_scales == mass_scales
assert list(size_scales.values()) == expected_factors_for_env[task_id].tolist()
elif modification == "gravity":
gravity = env.gravity
assert gravity == expected_gravities_for_env[task_id]
setting.prepare_data()
for task_id in range(setting.nb_tasks):
print(f"Testing the 'live' envs for task {task_id}.")
setting.current_task_id = task_id
with setting.train_dataloader() as env:
_check_env_attributes_match(task_id, env)
with setting.val_dataloader() as env:
_check_env_attributes_match(task_id, env)
with setting.test_dataloader() as env:
_check_env_attributes_match(task_id, env)
@pytest.mark.timeout(120)
def test_action_space_always_matches_obs_batch_size_in_RL(config: Config):
""" """
from sequoia.settings import TaskIncrementalRLSetting
nb_tasks = 2
batch_size = 1
setting = TaskIncrementalRLSetting(
dataset="cartpole",
nb_tasks=nb_tasks,
batch_size=batch_size,
train_max_steps=200,
test_max_steps=200,
num_workers=0,
# monitor_training_performance=True, # This is still a TODO in RL.
)
total_samples = len(setting.test_dataloader())
method = OtherDummyMethod()
_ = setting.apply(method, config=config)
expected_encountered_batch_sizes = {batch_size or 1}
last_batch_size = total_samples % (batch_size or 1)
if last_batch_size != 0:
expected_encountered_batch_sizes.add(last_batch_size)
assert set(method.batch_sizes) == expected_encountered_batch_sizes
# NOTE: Multiply by nb_tasks because the test loop is ran after each training task.
actual_num_batches = len(method.batch_sizes)
expected_num_batches = math.ceil(total_samples / (batch_size or 1)) * nb_tasks
# MINOR BUG: There's an extra batch for each task. Might make sense, or might not,
# not sure.
assert actual_num_batches == expected_num_batches + nb_tasks
expected_total = total_samples * nb_tasks
actual_total_obs = sum(method.batch_sizes)
assert actual_total_obs == expected_total + nb_tasks
@mtenv_required
@pytest.mark.xfail(reason="don't know how to get the max path length through mtenv!")
def test_mtenv_meta_world_support():
from mtenv import MTEnv, make
env: MTEnv = make("MT-MetaWorld-MT10-v0")
env.set_task_state(0)
env.seed(123)
env.seed_task(123)
obs = env.reset()
assert isinstance(obs, dict)
assert list(obs.keys()) == ["env_obs", "task_obs"]
print(obs)
done = False
# BUG: No idea how to get the max path length, since I'm getting
# AttributeError: 'MetaWorldMTWrapper' object has no attribute 'max_path_length'
steps = 0
while not done and steps < env.max_path_length:
obs, reward, done, info = env.step(env.action_space.sample())
# BUG: Can't render when using metaworld through mtenv, since mtenv *contains* a
# straight-up copy-pasted old version of meta-world, which doesn't support it.
env.render()
steps += 1
env.close()
env_obs_space = env.observation_space["env_obs"]
task_obs_space = env.observation_space["task_obs"]
# TODO: If the task observation space is Discrete(10), then we can't create a
# setting with more than 10 tasks! We could add a check for this.
# TODO: Figure out the default number of tasks depending on the chosen dataset.
setting = IncrementalRLSetting(dataset="MT-MetaWorld-MT10-v0", nb_tasks=3)
assert setting.observation_space.x == env_obs_space
assert setting.nb_tasks == 3
train_env = setting.train_dataloader()
assert train_env.observation_space.x == env_obs_space
assert train_env.observation_space.task_labels == spaces.Discrete(3)
n_episodes = 1
for episode in range(n_episodes):
obs = train_env.reset()
done = False
steps = 0
while not done and steps < env.max_path_length:
obs, reward, done, info = train_env.step(train_env.action_space.sample())
# BUG: Can't render meta-world env when using mtenv.
train_env.render()
steps += 1
# @pytest.mark.no_xvfb
# @pytest.mark.xfail(reason="TODO: Rethink how we want to integrate MetaWorld envs.")
@pytest.mark.skip(reason="BUG: timeout handler seems to be bugged, test lasts forever")
@metaworld_required
@pytest.mark.timeout(60)
def test_metaworld_support(config: Config):
"""Test using metaworld benchmarks as the dataset of an RL Setting.
NOTE: Uses either a MetaWorldEnv instance as the `dataset`, or the env id.
TODO: Need to rethink this, we should instead use one env class per task (where each
task env goes through a subset of the tasks for training)
"""
# TODO: Add option of passing a benchmark instance?
setting = IncrementalRLSetting(
dataset="MT10",
config=config,
max_episode_steps=10,
train_max_steps=500,
test_max_steps=500,
)
assert setting.nb_tasks == len(setting.train_envs)
assert setting.nb_tasks == 10
assert setting.train_max_steps == 500
assert setting.test_max_steps == 500
assert setting.train_steps_per_task == 50
assert setting.test_steps_per_task == 50
method = DummyMethod()
results = setting.apply(method, config=config)
assert results.summary()
@slow
@metaworld_required
@pytest.mark.timeout(180)
@pytest.mark.parametrize("dataset", ["CW10", "CW20"])
def test_continual_world_support(dataset: str, config: Config):
"""Test using CW10 and CW20 benchmarks as the dataset of an RL Setting.
TODO: This test is quite long to run, in part because metaworld takes like 20
seconds to load, and there being 20 tasks in CW20
"""
# TODO: Add option of passing a benchmark instance? That might make it quicker to
# run tests?
setting = IncrementalRLSetting(
dataset=dataset,
config=config,
)
assert setting.nb_tasks == 10 if dataset == "CW10" else 20
assert setting.train_steps_per_task == 1_000_000
assert setting.train_max_steps == 1_000_000 * setting.nb_tasks
assert setting.test_steps_per_task == 10_000
assert setting.test_max_steps == 10_000 * setting.nb_tasks
setting = IncrementalRLSetting(
dataset=dataset,
config=config,
max_episode_steps=10,
train_steps_per_task=50,
test_steps_per_task=50,
)
assert setting.nb_tasks == 10 if dataset == "CW10" else 20
assert setting.train_steps_per_task == 50
assert setting.test_steps_per_task == 50
assert setting.train_max_steps == setting.train_steps_per_task * setting.nb_tasks
assert setting.test_steps_per_task == setting.test_steps_per_task
assert setting.test_max_steps == setting.test_steps_per_task * setting.nb_tasks
assert (
setting.nb_tasks
== len(setting.train_envs)
== len(setting.val_envs)
== len(setting.test_envs)
)
method = DummyMethod()
results = setting.apply(method, config=config)
assert method.train_episodes_per_task == [5 for _ in range(setting.nb_tasks)]
assert results.summary()
@pytest.mark.xfail(reason="Metaworld integration isn't done yet")
@metaworld_required
@pytest.mark.timeout(120)
@pytest.mark.parametrize("pass_env_id_instead_of_env_instance", [True, False])
def test_metaworld_auto_task_schedule(pass_env_id_instead_of_env_instance: bool):
"""Test that when passing just an env id from metaworld and a number of tasks,
the task schedule is created automatically.
"""
import metaworld
from metaworld import MetaWorldEnv
benchmark = metaworld.ML10() # Construct the benchmark, sampling tasks
env_name = "reach-v2"
env_type: Type[MetaWorldEnv] = benchmark.train_classes[env_name]
env = env_type()
# TODO: When not passing a nb_tasks, the number of available tasks for that env
# is used.
# setting = TaskIncrementalRLSetting(
# dataset=env_name if pass_env_id_instead_of_env_instance else env,
# train_steps_per_task=1000,
# )
# assert setting.nb_tasks == 50
# assert setting.steps_per_task == 1000
# assert sorted(setting.train_task_schedule.keys()) == list(range(0, 50_000, 1000))
# Test passing a number of tasks:
with pytest.warns(RuntimeWarning):
setting = TaskIncrementalRLSetting(
dataset=env_name if pass_env_id_instead_of_env_instance else env,
train_max_steps=2000,
nb_tasks=2,
test_max_steps=2000,
transforms=[],
)
assert setting.nb_tasks == 2
assert setting.steps_per_task == 1000
assert sorted(setting.train_task_schedule.keys()) == list(range(0, 2000, 1000))
from sequoia.common.metrics.rl_metrics import EpisodeMetrics
method = DummyMethod()
with pytest.warns(RuntimeWarning):
results: IncrementalRLSetting.Results[EpisodeMetrics] = setting.apply(method)
# TODO: Don't know if these values make sense! Rewards are super high, not sure if
# that's normal in Mujoco/metaworld envs:
# "Average": {
# "Episodes": 66,
# "Mean reward per episode": 13622.872306005293,
# "Mean reward per step": 90.81914870670195
# }
# assert 50 < results.average_final_performance.episodes
# assert 10_000 < results.average_final_performance.mean_reward_per_episode
# assert 100 < results.average_final_performance.mean_episode_length <= 150
@pytest.mark.xfail(reason="WIP: Adding dm_control support")
def test_dm_control_support():
import numpy as np
from dm_control import suite
# Load one task:
env = suite.load(domain_name="cartpole", task_name="swingup")
# Iterate over a task set:
for domain_name, task_name in suite.BENCHMARKING:
task_env = suite.load(domain_name, task_name)
# Step through an episode and print out reward, discount and observation.
action_spec = env.action_spec()
time_step = env.reset()
while not time_step.last():
action = np.random.uniform(action_spec.minimum, action_spec.maximum, size=action_spec.shape)
time_step = env.step(action)
print(time_step.reward, time_step.discount, time_step.observation)
# TODO: Use the task schedule as a way to specify how long each task lasts in a
# given env? For instance:
class PeriodTypeEnum(enum.Enum):
STEPS = enum.auto()
EPISODES = enum.auto()
class Period(NamedTuple):
value: int
type: PeriodTypeEnum = PeriodTypeEnum.STEPS
steps = lambda v: Period(value=v, type=PeriodTypeEnum.STEPS)
episodes = lambda v: Period(value=v, type=PeriodTypeEnum.EPISODES)
train_task_schedule = {
steps(10): "CartPole-v0",
episodes(1000): "ALE/Breakout-v5",
}
from gym.wrappers import TimeLimit
def make_random_cartpole_env(gravity_scale: float):
env = gym.make("CartPole-v1")
env = TimeLimit(env, max_episode_steps=50)
env.unwrapped.gravity *= gravity_scale
return env
class TestPassingEnvsForEachTask:
"""Tests that have to do with the feature of passing the list of environments to
use for each task.
"""
def test_raises_warning_when_envs_have_different_obs_spaces(self):
task_envs = ["CartPole-v0", "Pendulum-v1"]
with pytest.warns(RuntimeWarning, match="doesn't have the same observation space"):
setting = IncrementalRLSetting(train_envs=task_envs)
setting.train_dataloader()
def test_passing_env_fns_for_each_task(self):
nb_tasks = 3
gravity_scales = [0.5 + random.random() for _ in range(nb_tasks)]
# task_envs = ["CartPole-v0", "CartPole-v1"]
task_envs = [
functools.partial(make_random_cartpole_env, gravity_scales[i]) for i in range(nb_tasks)
]
base_env = make_random_cartpole_env(gravity_scale=1.0)
setting = IncrementalRLSetting(train_envs=task_envs)
assert setting.nb_tasks == nb_tasks
# TODO: Using 'no-op' task schedules, rather than empty ones.
# This fixes a bug with the creation of the test environment.
assert not any(setting.train_task_schedule.values())
assert not any(setting.val_task_schedule.values())
assert not any(setting.test_task_schedule.values())
# assert not setting.train_task_schedule
# assert not setting.val_task_schedule
# assert not setting.test_task_schedule
# assert len(setting.train_task_schedule.keys()) == 2
setting.current_task_id = 0
train_env = setting.train_dataloader()
assert train_env.gravity == base_env.gravity * gravity_scales[0]
setting.current_task_id = 1
train_env = setting.train_dataloader()
assert train_env.gravity == base_env.gravity * gravity_scales[1]
assert isinstance(train_env.unwrapped, CartPoleEnv)
# Not sure, do we want to add a 'observation_spaces`, `action_spaces` and
# `reward_spaces` properties?
assert setting.observation_space.x == train_env.observation_space.x
if setting.task_labels_at_train_time:
# TODO: Either add a `__getattr__` proxy on the Sparse space, or create
# dedicated `SparseDiscrete`, `SparseBox` etc spaces so that we eventually
# get to use `space.n` on a Sparse space.
assert train_env.observation_space.task_labels == spaces.Discrete(setting.nb_tasks)
sparsity = 0.0 if setting.task_labels_at_test_time else 0.5
assert setting.observation_space.task_labels == Sparse(
spaces.Discrete(setting.nb_tasks),
sparsity=sparsity,
)
def test_passing_env_for_each_task(self):
nb_tasks = 3
gravity_scales = [0.5 + random.random() for _ in range(nb_tasks)]
# task_envs = ["CartPole-v0", "CartPole-v1"]
task_envs = [make_random_cartpole_env(gravity_scales[i]) for i in range(nb_tasks)]
base_env = make_random_cartpole_env(1.0)
setting = IncrementalRLSetting(train_envs=task_envs)
assert setting.nb_tasks == nb_tasks
# TODO: Using 'no-op' task schedules, rather than empty ones.
# This fixes a bug with the creation of the test environment.
assert not any(setting.train_task_schedule.values())
assert not any(setting.val_task_schedule.values())
assert not any(setting.test_task_schedule.values())
# assert not setting.train_task_schedule
# assert not setting.val_task_schedule
# assert not setting.test_task_schedule
# assert len(setting.train_task_schedule.keys()) == 2
setting.current_task_id = 0
train_env = setting.train_dataloader()
assert train_env.gravity == base_env.gravity * gravity_scales[0]
setting.current_task_id = 1
train_env = setting.train_dataloader()
assert train_env.gravity == base_env.gravity * gravity_scales[1]
assert isinstance(train_env.unwrapped, CartPoleEnv)
# Not sure, do we want to add a 'observation_spaces`, `action_spaces` and
# `reward_spaces` properties?
assert setting.observation_space.x == train_env.observation_space.x
if setting.task_labels_at_train_time:
# TODO: Either add a `__getattr__` proxy on the Sparse space, or create
# dedicated `SparseDiscrete`, `SparseBox` etc spaces so that we eventually
# get to use `space.n` on a Sparse space.
assert train_env.observation_space.task_labels == spaces.Discrete(setting.nb_tasks)
sparsity = 0.0 if setting.task_labels_at_test_time else 0.5
assert setting.observation_space.task_labels == Sparse(
spaces.Discrete(setting.nb_tasks), sparsity=sparsity
)
def test_command_line(self):
# TODO: If someone passes the same env ids from the command-line, then shouldn't
# we somehow vary the tasks by changing the level or something?
setting = IncrementalRLSetting.from_args(argv="--train_envs CartPole-v0 Pendulum-v1")
assert setting.train_envs == ["CartPole-v0", "Pendulum-v1"]
# TODO: Not using this:
def test_raises_warning_when_envs_have_different_obs_spaces(self):
task_envs = ["CartPole-v1", "Pendulum-v1"]
with pytest.warns(RuntimeWarning, match="doesn't have the same observation space"):
setting = IncrementalRLSetting(train_envs=task_envs)
setting.train_dataloader()
def test_random_baseline(self):
nb_tasks = 3
gravities = [random.random() * 10 for _ in range(nb_tasks)]
from gym.wrappers import TimeLimit
# task_envs = ["CartPole-v0", "CartPole-v1"]
task_envs = [make_random_cartpole_env(i) for i in range(nb_tasks)]
setting = IncrementalRLSetting(
train_envs=task_envs, train_max_steps=1000, test_max_steps=1000
)
assert setting.nb_tasks == nb_tasks
method = RandomBaselineMethod()
results = setting.apply(method)
assert results.objective > 0
@pytest.mark.xfail(reason=f"Don't yet fully changing the size of the body parts.")
@mujoco_required
def test_incremental_mujoco_like_LPG_FTW():
"""Trying to get the same-ish setup as the "LPG_FTW" experiments
See https://github.com/Lifelong-ML/LPG-FTW/tree/master/experiments
"""
nb_tasks = 5
from sequoia.settings.rl.envs.mujoco import ContinualHalfCheetahEnv
task_gravity_factors = [random.random() + 0.5 for _ in range(nb_tasks)]
task_size_scale_factors = [random.random() + 0.5 for _ in range(nb_tasks)]
task_envs = [
RenderEnvWrapper(
ContinualHalfCheetahEnv(
gravity=task_gravity_factors[task_id] * -9.81,
body_name_to_size_scale={"torso": task_size_scale_factors[task_id]},
),
)
for task_id in range(nb_tasks)
]
setting = IncrementalRLSetting(
train_envs=task_envs,
train_steps_per_task=10_000,
train_wrappers=RenderEnvWrapper,
test_max_steps=10_000,
)
assert setting.nb_tasks == nb_tasks
# NOTE: Same as above: we use a `no-op` task schedule, rather than an empty one.
assert not any(setting.train_task_schedule.values())
assert not any(setting.val_task_schedule.values())
assert not any(setting.test_task_schedule.values())
# assert not setting.train_task_schedule
# assert not setting.val_task_schedule
# assert not setting.test_task_schedule
method = RandomBaselineMethod()
# TODO: Using `render=True` causes a silent crash for some reason!
results = setting.apply(method)
assert results.objective > 0
================================================
FILE: sequoia/settings/rl/incremental/tasks.py
================================================
""" TODO: Add the tasks for IncrementalRLSetting, on top of the existing tasks from
ContinualRL
"""
import operator
import warnings
from functools import partial, singledispatch
from typing import Callable, List
import gym
import numpy as np
from sequoia.settings.rl.envs import (
METAWORLD_INSTALLED,
MTENV_INSTALLED,
MetaWorldEnv,
MetaWorldMujocoEnv,
MTEnv,
SawyerXYZEnv,
)
from ..discrete.tasks import (
DiscreteTask,
_is_supported,
make_discrete_task,
sequoia_registry,
task_sampling_function,
)
IncrementalTask = DiscreteTask
@task_sampling_function(env_registry=sequoia_registry, based_on=make_discrete_task)
@singledispatch
def make_incremental_task(
env: gym.Env,
*,
step: int,
change_steps: List[int],
seed: int = None,
**kwargs,
) -> IncrementalTask:
"""Generic function used by Sequoia's `IncrementalRLSetting` (and its
descendants) to create a "task" that will be applied to an environment like `env`.
To add support for a new type of environment, simply register a handler function:
```
@make_incremental_task.register(SomeGymEnvClass)
def make_incremental_task_for_my_env(env: SomeGymEnvClass, step: int, change_steps: List[int], **kwargs,):
return {"my_attribute": random.random()}
```
"""
raise NotImplementedError(f"Don't know how to create an (incremental) task for env {env}")
is_supported = partial(_is_supported, _make_task_function=make_incremental_task)
# def is_supported(
# env_id: str,
# env_registry: EnvRegistry = sequoia_registry,
# _make_task_function: Callable[..., DiscreteTask] = make_incremental_task,
# ) -> bool
# """ Returns wether Sequoia is able to create (incremental) tasks for the given
# environment.
# """
# return is_supported_by_parent(env_id, env_registry=env_registry, _make_task_function=_make_task_function)
# return make_incremental_task.is_supported(env_id=env_id, env_registry=env_registry)
if MTENV_INSTALLED:
@make_incremental_task.register
def make_task_for_mtenv_env(
env: MTEnv,
step: int,
change_steps: List[int],
seed: int = None,
**kwargs,
) -> Callable[[MTEnv], None]:
"""Samples a task for an env from MTEnv.
The Task in this case will be a callable that will call the env's
`set_task_state` method, passing in an integer (`task`).
When `seed` is None, then the task will be the same as the task index.
"""
assert change_steps, "Need task boundaries to construct the task schedule."
if step not in change_steps:
raise RuntimeError(
f"MTENV has discrete tasks (as far as I'm aware), so step {step} "
f"should be in {change_steps}!"
)
task_index = change_steps.index(step)
task_states = list(range(len(change_steps)))
if seed is not None:
# perform a deterministic shuffling of the 'task ids'
rng = rng or np.random.default_rng(seed)
rng.shuffle(task_states)
# NOTE: Task state is an integer for now, but I'm not sure if it can also be
# something else..
task_state: int = task_states[task_index]
return operator.methodcaller("set_task_state", task_state)
if METAWORLD_INSTALLED:
@make_incremental_task.register(SawyerXYZEnv)
@make_incremental_task.register(MetaWorldMujocoEnv)
@make_incremental_task.register(MetaWorldEnv)
def make_task_for_metaworld_env(
env: MetaWorldEnv,
step: int,
change_steps: List[int] = None,
seed: int = None,
**kwargs,
) -> Callable[[MetaWorldEnv], None]:
"""Samples a task for an environment from MetaWorld.
The Task in this case will be a callable that will call the env's
`set_task` method, passing in a task from the `train_tasks` of the benchmark
that contains this environment.
When `seed` is None, then the task will be the same as the task index.
"""
# TODO: Which benchmark should we use?
found = False
assert change_steps, "Need task boundaries to construct the task schedule."
if step not in change_steps:
raise RuntimeError(
f"MTENV has discrete tasks (as far as I'm aware), so step {step} "
f"should be in {change_steps}!"
)
task_index = change_steps.index(step)
import metaworld
# TODO: Not sure how exactly we're supposed to use the train_classes vs
# train_tasks, should it be a MultiTaskEnv within a given env class?
warnings.warn(RuntimeWarning("This is supposedly not the right way to do it!"))
env_name = ""
# Find the benchmark that contains this type of env.
for benchmark_class in [metaworld.ML10]:
benchmark = benchmark_class()
for env_name, env_class in benchmark.train_classes.items():
if isinstance(env, env_class):
# Found the right benchmark that contains this env class, now
# create the task schedule using
# the tasks.
found = True
break
if found:
break
if not found:
raise NotImplementedError(f"Can't find a benchmark with env class {type(env)}!")
# `benchmark` is here the right benchmark to use to create the tasks.
training_tasks = [task for task in benchmark.train_tasks if task.env_name == env_name]
tasks = training_tasks.copy()
if seed is not None:
# perform a deterministic shuffling of the 'task ids'
rng = rng or np.random.default_rng(seed)
rng.shuffle(tasks)
task = tasks[task_index]
return operator.methodcaller("set_task", task)
================================================
FILE: sequoia/settings/rl/multi_task/__init__.py
================================================
from .setting import MultiTaskRLSetting
================================================
FILE: sequoia/settings/rl/multi_task/setting.py
================================================
""" 'Classical' RL setting.
"""
from dataclasses import dataclass
from typing import Callable, List
import gym
from sequoia.utils.logging_utils import get_logger
from sequoia.utils.utils import constant
from ..task_incremental import TaskIncrementalRLSetting
from ..traditional import TraditionalRLSetting
logger = get_logger(__name__)
@dataclass
class MultiTaskRLSetting(TaskIncrementalRLSetting, TraditionalRLSetting):
"""Reinforcement Learning setting where the environment alternates between a set
of tasks sampled uniformly.
Implemented as a TaskIncrementalRLSetting, but where the tasks are randomly sampled
during training.
"""
# TODO: Move this into a new Assumption about the context non-stationarity.
stationary_context: bool = constant(True)
@property
def phases(self) -> int:
"""The number of training 'phases', i.e. how many times `method.fit` will be
called.
Defaults to the number of tasks, but may be different, for instance in so-called
Multi-Task Settings, this is set to 1.
"""
return 1
# TODO: Show how the multi-task wrapper is created here, rather than in the base class.
def create_train_wrappers(self) -> List[Callable[[gym.Env], gym.Env]]:
return super().create_train_wrappers()
def create_test_wrappers(self) -> List[Callable[[gym.Env], gym.Env]]:
"""Get the list of wrappers to add to a single test environment.
The result of this method must be pickleable when using
multiprocessing.
Returns
-------
List[Callable[[gym.Env], gym.Env]]
[description]
"""
if self.stationary_context:
logger.warning(
"The test phase will go through all tasks in sequence, rather than "
"shuffling them! (This is to make it easier to compile the performance "
"metrics for each task."
)
new_random_task_on_reset = False
# TODO: If we're in the 'Multi-Task RL' setting, then should we maybe change
# the task schedule, so that we give an equal number of steps per task?
return self._make_wrappers(
base_env=self.test_dataset,
task_schedule=self.test_task_schedule,
# sharp_task_boundaries=self.known_task_boundaries_at_test_time,
task_labels_available=self.task_labels_at_test_time,
transforms=self.test_transforms,
starting_step=0,
max_steps=self.test_max_steps,
new_random_task_on_reset=new_random_task_on_reset,
)
================================================
FILE: sequoia/settings/rl/multi_task/setting_test.py
================================================
# TODO: Tests for the multi-task RL setting.
from typing import ClassVar, Type
import pytest
from sequoia.settings.rl.setting_test import DummyMethod
from ..task_incremental.setting_test import (
TestTaskIncrementalRLSetting as TaskIncrementalRLSettingTests,
)
from .setting import MultiTaskRLSetting
class TestMultiTaskRLSetting(TaskIncrementalRLSettingTests):
Setting: ClassVar[Type[Setting]] = MultiTaskRLSetting
dataset: pytest.fixture
# def test_on_task_switch_is_called(self):
# setting = self.Setting(
# dataset="CartPole-v0",
# nb_tasks=5,
# # train_steps_per_task=100,
# train_max_steps=500,
# test_max_steps=500,
# )
# method = DummyMethod()
# _ = setting.apply(method)
# assert setting.task_labels_at_test_time
# assert False, method.observation_task_labels
def validate_results(
self,
setting: MultiTaskRLSetting,
method: DummyMethod,
results: MultiTaskRLSetting.Results,
) -> None:
"""Check that the results make sense.
The Dummy Method used also keeps useful attributes, which we check here.
"""
assert results
assert results.objective
assert setting.stationary_context
assert len(results.task_results) == setting.nb_tasks
assert results.average_metrics == sum(
task_result.average_metrics for task_result in results.task_results
)
t = setting.nb_tasks
p = setting.phases
assert setting.known_task_boundaries_at_train_time
assert setting.known_task_boundaries_at_test_time
assert setting.task_labels_at_train_time
assert setting.task_labels_at_test_time
if setting.nb_tasks == 1:
assert not method.received_task_ids
assert not method.received_while_training
else:
# Only received during testing.
assert method.received_task_ids == [t_i for t_i in range(t)]
assert method.received_while_training == [False for _ in range(t)]
================================================
FILE: sequoia/settings/rl/objects.py
================================================
from dataclasses import dataclass
from typing import TypeVar
from torch import Tensor
from sequoia.settings.base import Setting
T = TypeVar("T")
@dataclass(frozen=True)
class Observations(Setting.Observations):
"""Observations in a continual RL Setting."""
# Input example
x: Tensor
@dataclass(frozen=True)
class Actions(Setting.Actions):
pass
# TODO: Replace this 'Rewards' with a 'SparseRewards'-like object for RL, and a
# 'DenseRewards'-like object in SL, rather than use the same in RL and SL.
@dataclass(frozen=True)
class Rewards(Setting.Rewards[T]):
"""Rewards given back by the environment in RL Settings."""
# @dataclass(frozen=True)
# class RLReward(Rewards[T]):
# reward: T
# @dataclass(frozen=True)
# class SLReward(Rewards[T]):
# reward: T
# y: Sequence[T]
ObservationType = TypeVar("ObservationType", bound=Observations)
ActionType = TypeVar("ActionType", bound=Actions)
RewardType = TypeVar("RewardType", bound=Rewards)
# from .environment import RLEnvironment as Environment
================================================
FILE: sequoia/settings/rl/setting.py
================================================
from dataclasses import dataclass
from typing import ClassVar, Type
from sequoia.settings.base import Setting
from sequoia.settings.base.environment import ActionType, ObservationType, RewardType
from .environment import RLEnvironment
from .objects import Actions, ActionType, Observations, ObservationType, Rewards, RewardType
@dataclass
class RLSetting(Setting[RLEnvironment[ObservationType, ActionType, RewardType]]):
"""LightningDataModule for an 'active' setting.
This is to be the parent of settings like RL or maybe Active Learning.
"""
Observations: ClassVar[Type[ObservationType]] = Observations
Actions: ClassVar[Type[ActionType]] = Actions
Rewards: ClassVar[Type[RewardType]] = Rewards
================================================
FILE: sequoia/settings/rl/setting_test.py
================================================
""" Utilities used in tests for the RL Settings. """
from typing import Any, Callable, Dict, List, Optional
import warnings
from sequoia.common.gym_wrappers import IterableWrapper
from sequoia.methods import RandomBaselineMethod
from sequoia.settings.base import Environment
from sequoia.utils.logging_utils import get_logger
logger = get_logger(__name__)
class DummyMethod(RandomBaselineMethod):
"""Random baseline method used for debugging the (RL) settings.
TODO: Remove the other `DummyMethod` variants, replace them with this.
"""
def __init__(
self,
additional_train_wrappers: List[Callable[[Environment], Environment]] = None,
additional_valid_wrappers: List[Callable[[Environment], Environment]] = None,
):
super().__init__()
# Wrappers to be added to the train/val environments to debug/test that the
# setting's environments work correctly.
self.train_env: Optional[Environment] = None
self.valid_env: Optional[Environment] = None
self.additional_train_wrappers = additional_train_wrappers or []
self.additional_valid_wrappers = additional_valid_wrappers or []
self.all_train_values = []
self.all_valid_values = []
self.observation_task_labels: List[Any] = []
self.n_fit_calls = 0
self.n_task_switches = 0
self.received_task_ids: List[Optional[int]] = []
self.received_while_training: List[bool] = []
self.train_steps_per_task: List[int] = []
self.train_episodes_per_task: List[int] = []
self._has_been_configured_before = False
self.changing_attributes: List[str] = []
def configure(self, setting):
if self._has_been_configured_before:
raise RuntimeError("Can't reuse this Method across Settings for now.")
self._has_been_configured_before = True
# The attributes to look for changes with.
self.changing_attributes = list(
set().union(*[task.keys() for task in setting.train_task_schedule.values()])
)
self.train_env = None
self.valid_env = None
def fit(
self,
train_env: Environment,
valid_env: Environment,
):
# Add wrappers, if necessary.
for wrapper in self.additional_train_wrappers:
train_env = wrapper(train_env)
for wrapper in self.additional_valid_wrappers:
valid_env = wrapper(valid_env)
train_env = CheckAttributesWrapper(train_env, attributes=self.changing_attributes)
valid_env = CheckAttributesWrapper(valid_env, attributes=self.changing_attributes)
self.train_env = train_env
self.valid_env = valid_env
# TODO: Replace the loop below with adding soem wrappers around the train/valid envs, and
# just delegate to super().fit (so we use the RandomBaselineMethod).
# return super().fit(train_env, valid_env)
episodes = 0
val_interval = 10
total_steps = 0
self.train_steps_per_task.append(0)
self.train_episodes_per_task.append(0)
import tqdm
train_pbar = tqdm.tqdm(desc="Fake training")
while not train_env.is_closed():
obs = train_env.reset()
task_labels = obs.task_labels
if task_labels is None or isinstance(task_labels, int) or not task_labels.shape:
task_labels = [task_labels]
self.observation_task_labels.extend(task_labels)
attr_dict = {attr: getattr(train_env, attr) for attr in self.changing_attributes}
logger.debug(f"Start of episode #{episodes}: {attr_dict}")
done = False
while not done and not train_env.is_closed():
actions = train_env.action_space.sample()
# print(train_env.current_task)
obs, rew, done, info = train_env.step(actions)
total_steps += 1
self.train_steps_per_task[-1] += 1
train_pbar.update()
train_pbar.set_postfix({"episodes": episodes, "total steps": total_steps})
episodes += 1
self.train_episodes_per_task[-1] += 1
if episodes % val_interval == 0 and not valid_env.is_closed():
# Perform one 'validation' episode.
obs = valid_env.reset()
done = False
while not done and not valid_env.is_closed():
actions = valid_env.action_space.sample()
obs, rew, done, info = valid_env.step(actions)
if self.max_train_episodes is not None and episodes < self.max_train_episodes:
break
self.all_train_values.append(self.train_env.values)
self.all_valid_values.append(self.valid_env.values)
self.n_fit_calls += 1
def on_task_switch(self, task_id: Optional[int] = None):
self.n_task_switches += 1
self.received_task_ids.append(task_id)
self.received_while_training.append(self.training)
class CheckAttributesWrapper(IterableWrapper):
"""Wrapper that stores the value of a given attribute at each step."""
def __init__(self, env, attributes: List[str]):
super().__init__(env)
self.attributes = attributes
self.values: Dict[int, Dict[str, Any]] = {}
self.steps = 0
def _store_current_attributes(self):
if self.steps not in self.values:
self.values[self.steps] = {}
for attribute in self.attributes:
value = getattr(self.env, attribute)
unwrapped_value = getattr(self.env.unwrapped, attribute)
assert value == unwrapped_value, (attribute, value, unwrapped_value)
self.values[self.steps][attribute] = value
def step(self, action):
self._store_current_attributes()
result = super().step(action)
self.steps += 1
self._store_current_attributes()
return result
================================================
FILE: sequoia/settings/rl/task_incremental/__init__.py
================================================
from .setting import TaskIncrementalRLSetting
================================================
FILE: sequoia/settings/rl/task_incremental/setting.py
================================================
from dataclasses import dataclass
from sequoia.utils.utils import constant
from ..incremental import IncrementalRLSetting
@dataclass
class TaskIncrementalRLSetting(IncrementalRLSetting):
"""Continual RL setting with clear task boundaries and task labels.
The task labels are given at both train and test time.
"""
task_labels_at_train_time: bool = constant(True)
task_labels_at_test_time: bool = constant(True)
================================================
FILE: sequoia/settings/rl/task_incremental/setting_test.py
================================================
from typing import ClassVar, List, Type
import pytest
from sequoia.common.gym_wrappers import MultiTaskEnvironment
from sequoia.settings.rl.incremental.setting_test import (
TestIncrementalRLSetting as IncrementalRLSettingTests,
)
from .setting import TaskIncrementalRLSetting
class TestTaskIncrementalRLSetting(IncrementalRLSettingTests):
Setting: ClassVar[Type[Setting]] = TaskIncrementalRLSetting
dataset: pytest.fixture
def test_task_label_space_of_env_has_right_n():
setting = TaskIncrementalRLSetting(dataset="MountainCarContinuous-v0")
default_nb_tasks = setting.nb_tasks
assert setting.observation_space.task_labels.n == default_nb_tasks
assert setting.train_dataloader().observation_space.task_labels.n == default_nb_tasks
assert setting.val_dataloader().observation_space.task_labels.n == default_nb_tasks
assert setting.test_dataloader().observation_space.task_labels.n == default_nb_tasks
def test_task_schedule_is_used():
"""Test that the tasks are switching over time."""
setting = TaskIncrementalRLSetting(
dataset="CartPole-v0",
train_max_steps=100,
nb_tasks=2,
)
default_length = 0.5
for task_id in range(2):
setting.current_task_id = task_id
env = setting.train_dataloader(batch_size=None)
env: MultiTaskEnvironment
assert len(setting.train_task_schedule) == 3
assert len(setting.val_task_schedule) == 3
assert len(setting.test_task_schedule) == 3
starting_length = env.length
_ = env.reset()
lengths: List[float] = []
for i in range(setting.steps_per_phase):
obs, reward, done, info = env.step(env.action_space.sample())
# NOTE: If we're done on the last step, we can't reset, since that would go
# over the step budget.
if done and i != setting.steps_per_phase - 1:
env.reset()
# Get the length of the pole from the environment.
length = env.length
lengths.append(length)
if task_id == 0:
assert starting_length == default_length
assert all(length == default_length for length in lengths)
else:
# The length of the pole is different than the default length
assert starting_length != default_length
# The length shouldn't be changing over time.
assert all(length == starting_length for length in lengths)
================================================
FILE: sequoia/settings/rl/task_incremental/tasks.py
================================================
from ..incremental.tasks import make_incremental_task
# NOTE: For now there aren't any tasks specific to only task-incremental.
make_task_incremental_task = make_incremental_task
is_supported = make_task_incremental_task.is_supported
================================================
FILE: sequoia/settings/rl/traditional/__init__.py
================================================
from .setting import TraditionalRLSetting
================================================
FILE: sequoia/settings/rl/traditional/setting.py
================================================
""" 'Classical' RL setting.
"""
from dataclasses import dataclass
from typing import ClassVar, Dict
from simple_parsing.helpers import choice
from typing_extensions import Final
from sequoia.utils.utils import constant
# NOTE: We can reuse those results for now, since they describe the same thing.
from ..discrete.results import DiscreteTaskAgnosticRLResults as TraditionalRLResults
from ..incremental import IncrementalRLSetting
@dataclass
class TraditionalRLSetting(IncrementalRLSetting):
"""Your usual "Classical" Reinforcement Learning setting.
Implemented as a MultiTaskRLSetting, but with a single task.
"""
# Class variable that holds the dict of available environments.
available_datasets: ClassVar[Dict[str, str]] = IncrementalRLSetting.available_datasets.copy()
# Which dataset/environment to use for training, validation and testing.
dataset: str = choice(available_datasets, default="CartPole-v0")
# IDEA: By default, only use one task, although there may actually be more than one.
nb_tasks: int = 5
stationary_context: Final[bool] = constant(True)
known_task_boundaries_at_train_time: Final[bool] = constant(True)
task_labels_at_train_time: Final[bool] = constant(True)
task_labels_at_test_time: bool = False
# Results: ClassVar[Type[Results]] = TaskSequenceResults
def __post_init__(self):
super().__post_init__()
assert self.stationary_context
def apply(self, method, config=None):
results: IncrementalRLSetting.Results = super().apply(method, config=config)
assert len(results.task_sequence_results) == 1
return results.task_sequence_results[0]
# result: TraditionalRLResults = TraditionalRLResults(task_results=results.task_sequence_results[0].task_results)
result: TraditionalRLResults = results.task_sequence_results[0]
# assert False, result._runtime
return result
@property
def phases(self) -> int:
"""The number of training 'phases', i.e. how many times `method.fit` will be
called.
Defaults to the number of tasks, but may be different, for instance in so-called
Multi-Task Settings, this is set to 1.
"""
return 1
================================================
FILE: sequoia/settings/rl/traditional/setting_test.py
================================================
# TODO: Tests for the "traditional" RL setting.
from typing import ClassVar, Type
import pytest
import torch
from sequoia.settings.assumptions.incremental_results import TaskSequenceResults
from sequoia.settings.rl.setting_test import DummyMethod
from ..incremental.setting_test import TestIncrementalRLSetting as IncrementalRLSettingTests
from .setting import TraditionalRLSetting
class TestTraditionalRLSetting(IncrementalRLSettingTests):
Setting: ClassVar[Type[Setting]] = TraditionalRLSetting
dataset: pytest.fixture
def test_on_task_switch_is_called(self):
setting = self.Setting(
dataset="CartPole-v0",
nb_tasks=5,
# train_steps_per_task=100,
train_max_steps=500,
test_max_steps=500,
)
assert setting.stationary_context
method = DummyMethod()
_ = setting.apply(method)
# assert setting.task_labels_at_test_time
# assert False, method.observation_task_labels
assert method.n_fit_calls == 1
import torch
assert torch.unique_consecutive(
torch.as_tensor(method.observation_task_labels)
).tolist() != list(range(setting.nb_tasks))
def validate_results(
self,
setting: TraditionalRLSetting,
method: DummyMethod,
results: TraditionalRLSetting.Results,
) -> None:
"""Check that the results make sense.
The Dummy Method used also keeps useful attributes, which we check here.
"""
assert results
assert results.objective
assert setting.stationary_context
assert len(results.task_results) == setting.nb_tasks
assert results.average_metrics == sum(
task_result.average_metrics for task_result in results.task_results
)
t = setting.nb_tasks
p = setting.phases
assert setting.known_task_boundaries_at_train_time
assert setting.known_task_boundaries_at_test_time
assert setting.task_labels_at_train_time
assert not setting.task_labels_at_test_time
if setting.nb_tasks == 1:
assert not method.received_task_ids
assert not method.received_while_training
else:
# Only received during testing.
assert method.n_task_switches == t
assert method.received_task_ids == [None for t_i in range(t)]
assert method.received_while_training == [False for _ in range(t)]
def validate_results(
self,
setting: TraditionalRLSetting,
method: DummyMethod,
results: TraditionalRLSetting.Results,
) -> None:
assert results
assert results.objective
assert isinstance(results, TaskSequenceResults)
assert len(results.task_results) == setting.nb_tasks
assert results.average_metrics == sum(
task_result.average_metrics for task_result in results.task_results
)
assert method.n_fit_calls == 1
# BUG: Traditional/Multi-Task RL have one too many task labels:
assert list(set(method.observation_task_labels)) == list(range(setting.nb_tasks))
train_task_labels = torch.as_tensor(method.observation_task_labels)
new_train_task_labels = torch.unique_consecutive(train_task_labels).tolist()
if setting.nb_tasks > 1:
assert new_train_task_labels != list(range(setting.nb_tasks))
else:
assert set(method.observation_task_labels) == {0}
================================================
FILE: sequoia/settings/rl/wrappers/__init__.py
================================================
""" Wrappers specific to the RL settings, so not exactly as general as those in
`common/gym_wrappers`.
"""
from .measure_performance import MeasureRLPerformanceWrapper
from .task_labels import HideTaskLabelsWrapper, RemoveTaskLabelsWrapper
from .typed_objects import NoTypedObjectsWrapper, TypedObjectsWrapper
================================================
FILE: sequoia/settings/rl/wrappers/measure_performance.py
================================================
""" TODO: Create a Wrapper that measures performance over the first epoch of training in SL.
Then maybe after we can make something more general that also works for RL.
"""
from typing import Any, Dict, List, Optional, Sequence, Union
import numpy as np
from torch import Tensor
import wandb
from sequoia.common.gym_wrappers.measure_performance import MeasurePerformanceWrapper
from sequoia.common.metrics import Metrics
from sequoia.common.metrics.rl_metrics import EpisodeMetrics
from sequoia.settings.base import Actions, Observations, Rewards
from sequoia.settings.rl import ActiveEnvironment
from sequoia.utils.utils import add_prefix
class MeasureRLPerformanceWrapper(
MeasurePerformanceWrapper
# MeasurePerformanceWrapper[ActiveEnvironment] # python 3.7
# MeasurePerformanceWrapper[ActiveEnvironment, EpisodeMetrics] # python 3.8+
):
def __init__(
self,
env: ActiveEnvironment,
eval_episodes: int = None,
eval_steps: int = None,
wandb_prefix: str = None,
):
super().__init__(env)
self._metrics: Dict[int, EpisodeMetrics] = {}
self._eval_episodes = eval_episodes or 0
self._eval_steps = eval_steps or 0
# Counter for the number of steps.
self._steps: int = 0
# Counter for the number of episodes
self._episodes: int = 0
self.wandb_prefix = wandb_prefix
self._batch_size = self.env.num_envs if self.is_vectorized else 1
self._current_episode_reward = np.zeros([self._batch_size], dtype=float)
self._current_episode_steps = np.zeros([self._batch_size], dtype=int)
@property
def in_evaluation_period(self) -> bool:
"""Returns wether the performance is currently being monitored.
Returns
-------
bool
Wether or not the performance on the env is being monitored.
"""
if self._eval_steps:
return self._steps <= self._eval_steps
if self._eval_episodes:
return self._eval_episodes <= self._eval_episodes
return True
def reset(self) -> Union[Observations, Any]:
obs = super().reset()
# assert isinstance(obs, Observations)
return obs
def step(self, action: Actions):
observation, rewards_, done, info = super().step(action)
self._steps += 1
reward = rewards_.y if isinstance(rewards_, Rewards) else rewards_
if isinstance(done, bool):
self._episodes += int(done)
elif isinstance(done, np.ndarray):
self._episodes += sum(done)
else:
self._episodes += done.int().sum()
if self.in_evaluation_period:
if self.is_vectorized:
for env_index, (env_is_done, env_reward) in enumerate(zip(done, reward)):
self._current_episode_reward[env_index] += env_reward
self._current_episode_steps[env_index] += 1
else:
self._current_episode_reward[0] += reward
self._current_episode_steps[0] += 1
metrics = self.get_metrics(action, reward, done)
if metrics is not None:
assert self._steps not in self._metrics, "two metrics at same step?"
self._metrics[self._steps] = metrics
return observation, rewards_, done, info
# def send(self, action: Actions) -> Rewards:
# self.action_ = action
# rewards_ = super().send(action)
# self._steps += 1
# reward = rewards_.y if isinstance(rewards_, Rewards) else rewards_
# # TODO: Need access to the "done" signal in here somehow.
# done = self.done_
# if isinstance(done, bool):
# self._episodes += int(done)
# elif isinstance(done, np.ndarray):
# self._episodes += sum(done)
# else:
# self._episodes += done.int().sum()
# if self.in_evaluation_period:
# if self.is_vectorized:
# for env_index, (env_is_done, env_reward) in enumerate(
# zip(done, reward)
# ):
# self._current_episode_reward[env_index] += env_reward
# self._current_episode_steps[env_index] += 1
# else:
# self._current_episode_reward[0] += reward
# self._current_episode_steps[0] += 1
# metrics = self.get_metrics(action, reward, done)
# if metrics is not None:
# assert self._steps not in self._metrics, "two metrics at same step?"
# self._metrics[self._steps] = metrics
# return rewards_
def get_metrics(
self,
action: Union[Actions, Any],
reward: Union[Rewards, Any],
done: Union[bool, Sequence[bool]],
) -> Optional[EpisodeMetrics]:
# TODO: Add some metric about the entropy of the policy's distribution?
rewards = reward.y if isinstance(reward, Rewards) else reward
actions = action.y_pred if isinstance(action, Actions) else action
dones: Sequence[bool]
if not self.is_vectorized:
rewards = [rewards]
actions = [actions]
assert isinstance(done, bool)
dones = [done]
else:
assert isinstance(done, (np.ndarray, Tensor))
dones = done
metrics: List[EpisodeMetrics] = []
for env_index, (env_is_done, reward) in enumerate(zip(dones, rewards)):
if env_is_done:
metrics.append(
EpisodeMetrics(
n_samples=1,
# The average reward per episode.
mean_episode_reward=self._current_episode_reward[env_index],
# The average length of each episode.
mean_episode_length=self._current_episode_steps[env_index],
)
)
self._current_episode_reward[env_index] = 0
self._current_episode_steps[env_index] = 0
if not metrics:
return None
metric = sum(metrics, Metrics())
if wandb.run:
log_dict = metric.to_log_dict()
if self.wandb_prefix:
log_dict = add_prefix(log_dict, prefix=self.wandb_prefix, sep="/")
log_dict["steps"] = self._steps
log_dict["episode"] = self._episodes
wandb.log(log_dict)
return metric
================================================
FILE: sequoia/settings/rl/wrappers/measure_performance_test.py
================================================
import itertools
from functools import partial
from itertools import accumulate
import numpy as np
import pytest
from gym.vector import SyncVectorEnv
# from sequoia.settings.rl.continual import ContinualRLSetting
from sequoia.common.gym_wrappers import EnvDataset
from sequoia.common.metrics.rl_metrics import EpisodeMetrics
from sequoia.conftest import DummyEnvironment
from .measure_performance import MeasureRLPerformanceWrapper
def test_measure_RL_performance_basics():
env = DummyEnvironment(start=0, target=5, max_value=10)
# env = TypedObjectsWrapper(env, observations_type=ContinualRLSetting.Observations, actions_type=ContinualRLSetting.Actions, rewards_type=ContinualRLSetting.Rewards)
env = MeasureRLPerformanceWrapper(env)
env.seed(123)
all_episode_rewards = []
all_episode_steps = []
for episode in range(5):
episode_steps = 0
episode_reward = 0
obs = env.reset()
print(f"Episode {episode}, obs: {obs}")
done = False
while not done:
action = env.action_space.sample()
obs, reward, done, info = env.step(action)
episode_reward += reward
episode_steps += 1
# print(obs, reward, done, info)
all_episode_steps.append(episode_steps)
all_episode_rewards.append(episode_reward)
from itertools import accumulate
expected_metrics = {}
for episode_steps, cumul_step, episode_reward in zip(
all_episode_steps, accumulate(all_episode_steps), all_episode_rewards
):
expected_metrics[cumul_step] = EpisodeMetrics(
n_samples=1,
mean_episode_reward=episode_reward,
mean_episode_length=episode_steps,
)
assert env.get_online_performance() == expected_metrics
def test_measure_RL_performance_iteration():
env = DummyEnvironment(start=0, target=5, max_value=10)
from gym.wrappers import TimeLimit
max_episode_steps = 50
env = EnvDataset(env)
env = TimeLimit(env, max_episode_steps=max_episode_steps)
# env = TypedObjectsWrapper(env, observations_type=ContinualRLSetting.Observations, actions_type=ContinualRLSetting.Actions, rewards_type=ContinualRLSetting.Rewards)
env = MeasureRLPerformanceWrapper(env)
env.seed(123)
all_episode_rewards = []
all_episode_steps = []
for episode in range(5):
episode_steps = 0
episode_reward = 0
for step, obs in enumerate(env):
print(f"Episode {episode}, obs: {obs}")
action = env.action_space.sample()
reward = env.send(action)
episode_reward += reward
episode_steps += 1
# print(obs, reward, done, info)
assert step <= max_episode_steps, "shouldn't be able to iterate longer than that."
all_episode_steps.append(episode_steps)
all_episode_rewards.append(episode_reward)
expected_metrics = {}
for episode_steps, cumul_step, episode_reward in zip(
all_episode_steps, accumulate(all_episode_steps), all_episode_rewards
):
expected_metrics[cumul_step] = EpisodeMetrics(
n_samples=1,
mean_episode_reward=episode_reward,
mean_episode_length=episode_steps,
)
assert env.get_online_performance() == expected_metrics
@pytest.mark.xfail(
reason=f"TODO: The wrapper seems to works but the test condition is too complicated"
)
def test_measure_RL_performance_batched_env():
batch_size = 3
start = [i for i in range(batch_size)]
target = 5
env = EnvDataset(
SyncVectorEnv(
[
partial(DummyEnvironment, start=start[i], target=target, max_value=target * 2)
for i in range(batch_size)
]
)
)
# env = TypedObjectsWrapper(env, observations_type=ContinualRLSetting.Observations, actions_type=ContinualRLSetting.Actions, rewards_type=ContinualRLSetting.Rewards)
env = MeasureRLPerformanceWrapper(env)
env.seed(123)
all_episode_rewards = []
all_episode_steps = []
for step, obs in enumerate(itertools.islice(env, 100)):
print(f"step {step} obs: {obs}")
action = np.ones(batch_size) # always increment the counter
reward = env.send(action)
print(env.done_)
# print(obs, reward, done, info)
assert step == 99
from collections import defaultdict
from sequoia.common.metrics import Metrics
expected_metrics = defaultdict(Metrics)
for i in range(101):
for env_index in range(batch_size):
if i and i % target == 0:
expected_metrics[i] += EpisodeMetrics(
n_samples=1,
mean_episode_reward=10.0, # ? FIXME: Actually understand this condition
mean_episode_length=target,
)
# FIXME: This test is a bit too complicated, hard to follow. I'll keep the
# batches synced-up for now.
# if i > 0 and (i + env_index) % target == 0:
# expected_metrics[i] += EpisodeMetrics(
# n_samples=1,
# mean_episode_reward=sum(target - (i + env_index % target) for j in range(start[env_index], target)),
# mean_episode_length=target - start[env_index] - 1
# )
assert env.get_online_performance() == expected_metrics
================================================
FILE: sequoia/settings/rl/wrappers/no_typed_objects.py
================================================
================================================
FILE: sequoia/settings/rl/wrappers/task_labels.py
================================================
from collections.abc import Mapping
from dataclasses import is_dataclass, replace
from functools import singledispatch
from typing import Any, Dict, Optional, Tuple, TypeVar, Union
import gym
from gym import Space, spaces
from sequoia.common import Batch
from sequoia.common.gym_wrappers import IterableWrapper, TransformObservation
from sequoia.common.gym_wrappers.multi_task_environment import add_task_labels
from sequoia.common.gym_wrappers.utils import IterableWrapper
from sequoia.common.spaces import Sparse, TypedDictSpace
from sequoia.common.spaces.named_tuple import NamedTupleSpace
from sequoia.settings.base.objects import ObservationType
T = TypeVar("T")
@singledispatch
def hide_task_labels(observation: Tuple[T, int]) -> Tuple[T, Optional[int]]:
assert len(observation) == 2
return observation[0], None
@hide_task_labels.register(dict)
def _hide_task_labels_in_dict(observation: Dict) -> Dict:
new_observation = observation.copy()
assert "task_labels" in observation
new_observation["task_labels"] = None
return new_observation
@hide_task_labels.register
def _hide_task_labels_on_batch(observation: Batch) -> Batch:
return replace(observation, task_labels=None)
@hide_task_labels.register(Space)
def hide_task_labels_in_space(observation: Space) -> Space:
raise NotImplementedError(
f"TODO: Don't know how to remove task labels from space {observation}."
)
@hide_task_labels.register
def _hide_task_labels_in_namedtuple_space(
observation: NamedTupleSpace,
) -> NamedTupleSpace:
spaces = observation._spaces.copy()
task_label_space = spaces["task_labels"]
if isinstance(task_label_space, Sparse):
if task_label_space.sparsity == 1.0:
# No need to change anything:
return observation
# Replace the existing 'Sparse' space with another one with the same
# base but with sparsity = 1.0
task_label_space = task_label_space.base
assert not isinstance(task_label_space, Sparse)
task_label_space = Sparse(task_label_space, sparsity=1.0)
spaces["task_labels"] = task_label_space
return type(observation)(**spaces)
@hide_task_labels.register
def _hide_task_labels_in_tuple_space(observation: spaces.Tuple) -> spaces.Tuple:
assert len(observation.spaces) == 2, "ambiguous"
task_label_space = observation.spaces[1]
if isinstance(task_label_space, Sparse):
# Replace the existing 'Sparse' space with another one with the same
# base but with sparsity = 1.0
task_label_space = task_label_space.base
assert not isinstance(task_label_space, Sparse)
# We set the task label space as sparse, instead of removing that space.
return spaces.Tuple([observation[0], Sparse(task_label_space, sparsity=1.0)])
@hide_task_labels.register
def hide_task_labels_in_dict_space(observation: spaces.Dict) -> spaces.Dict:
task_label_space = observation.spaces["task_labels"]
if isinstance(task_label_space, Sparse):
# Replace the existing 'Sparse' space with another one with the same
# base but with sparsity = 1.0
task_label_space = task_label_space.base
assert not isinstance(task_label_space, Sparse)
return type(observation)(
{
key: subspace if key != "task_labels" else Sparse(task_label_space, 1.0)
for key, subspace in observation.spaces.items()
}
)
@hide_task_labels.register(TypedDictSpace)
def hide_task_labels_in_typed_dict_space(
observation: TypedDictSpace[T],
) -> TypedDictSpace[T]:
task_label_space = observation.spaces["task_labels"]
if isinstance(task_label_space, Sparse):
# Replace the existing 'Sparse' space with another one with the same
# base but with sparsity = 1.0
task_label_space = task_label_space.base
assert not isinstance(task_label_space, Sparse)
return type(observation)(
{
key: subspace if key != "task_labels" else Sparse(task_label_space, 1.0)
for key, subspace in observation.spaces.items()
},
dtype=observation.dtype,
)
class HideTaskLabelsWrapper(TransformObservation):
"""Hides the task labels by setting them to None, rather than removing them
entirely.
This might be useful in order not to break the inheritance 'contract' when
going from contexts where you don't have the task labels to contexts where
you do have them.
"""
def __init__(self, env: gym.Env, f=hide_task_labels):
super().__init__(env, f=f)
self.observation_space = hide_task_labels(self.env.observation_space)
@singledispatch
def remove_task_labels(observation: Any) -> Any:
"""Removes the task labels from an observation / observation space."""
if is_dataclass(observation):
return replace(observation, task_labels=None)
raise NotImplementedError(
f"No handler registered for value {observation} of type {type(observation)}"
)
@remove_task_labels.register(spaces.Tuple)
@remove_task_labels.register(tuple)
def _(observation: Tuple[T, Any]) -> Tuple[T]:
if len(observation) == 2:
return observation[1]
if len(observation) == 1:
return observation[0]
raise NotImplementedError(observation)
@remove_task_labels.register
def _remove_task_labels_in_namedtuple_space(
observation: NamedTupleSpace,
) -> NamedTupleSpace:
spaces = observation._spaces.copy()
spaces.pop("task_labels")
return type(observation)(**spaces)
@remove_task_labels.register(spaces.Dict)
@remove_task_labels.register(Mapping)
def _(observation: Dict) -> Dict:
assert "task_labels" in observation.keys()
return type(observation)(
**{key: value for key, value in observation.items() if key != "task_labels"}
)
class RemoveTaskLabelsWrapper(TransformObservation):
"""Removes the task labels from the observations and the observation space."""
def __init__(self, env: gym.Env, f=remove_task_labels):
super().__init__(env, f=f)
self.observation_space = remove_task_labels(self.env.observation_space)
@classmethod
def space_change(cls, input_space: gym.Space) -> gym.Space:
assert isinstance(input_space, spaces.Tuple), input_space
# assert len(input_space) == 2, input_space
return input_space[0]
class FixedTaskLabelWrapper(IterableWrapper):
"""Wrapper that adds always the same given task id to the observations.
Used when the list of envs for each task is passed, so that each env also has the
task id as part of their observation space and in their observations.
"""
def __init__(self, env: gym.Env, task_label: Optional[int], task_label_space: gym.Space):
super().__init__(env=env)
self.task_label = task_label
self.task_label_space = task_label_space
self.observation_space = add_task_labels(
self.env.observation_space, task_labels=task_label_space
)
def observation(self, observation: Union[ObservationType, Any]) -> ObservationType:
return add_task_labels(observation, self.task_label)
def reset(self):
return self.observation(super().reset())
def step(self, action):
obs, reward, done, info = super().step(action)
return self.observation(obs), reward, done, info
================================================
FILE: sequoia/settings/rl/wrappers/typed_objects.py
================================================
from dataclasses import fields
import dataclasses
from functools import singledispatch
from typing import Any, Dict, Sequence, Tuple, TypeVar, Union
import gym
import numpy as np
from gym import Space, spaces
from torch import Tensor
from sequoia.common.gym_wrappers import IterableWrapper
from sequoia.common.gym_wrappers.convert_tensors import supports_tensors
from sequoia.common.spaces import TypedDictSpace
from sequoia.common.spaces.named_tuple import NamedTupleSpace
from sequoia.settings.base.environment import Environment
from sequoia.settings.base.objects import (
Actions,
ActionType,
Observations,
ObservationType,
Rewards,
RewardType,
)
T = TypeVar("T")
class TypedObjectsWrapper(IterableWrapper, Environment[ObservationType, ActionType, RewardType]):
"""Wrapper that converts the observations and rewards coming from the env
to `Batch` objects.
NOTE: Not super necessary atm, but this would perhaps be useful if methods
are built and expect to have a given 'type' of observations to work with,
then any new setting that inherits from their target setting should have
observations that subclass/inherit from the observations of their parent, so
as not to break compatibility.
For example, if a Method targets the ClassIncrementalSetting, then it
expects to receive "observations" of the type described by
ClassIncrementalSetting.Observations, and if it were to be applied on a
TaskIncrementalSLSetting (which inherits from ClassIncrementalSetting), then
the observations from that setting should be isinstances (or subclasses of)
the Observations class that this method was designed to receive!
"""
def __init__(
self,
env: gym.Env,
observations_type: ObservationType,
rewards_type: RewardType,
actions_type: ActionType,
observation_space: TypedDictSpace = None,
action_space: TypedDictSpace = None,
reward_space: TypedDictSpace = None,
):
self.Observations = observations_type
self.Rewards = rewards_type
self.Actions = actions_type
super().__init__(env=env)
observation_fields = fields(self.Observations)
action_fields = fields(self.Actions)
reward_fields = fields(self.Rewards)
if not all([observation_fields, action_fields, reward_fields]):
raise RuntimeError(
f"The Observations/Actions/Rewards classes passed to the TypedObjectsWrapper all need to have at least one field!"
)
simple_spaces = (spaces.Box, spaces.Discrete, spaces.MultiDiscrete, spaces.MultiBinary)
num_envs = getattr(self.env, "num_envs", None)
# Set the observation space.
if observation_space:
self.observation_space = observation_space
elif isinstance(self.env.observation_space, spaces.Dict):
# Convert the spaces.Dict into a TypedDictSpace, or replace a TypedDictSpace's `dtype`.
self.observation_space = TypedDictSpace(
spaces=self.env.observation_space.spaces,
dtype=self.Observations,
)
elif isinstance(self.env.observation_space, simple_spaces):
# we can get away with this since the class has only one field and the space is simple.
field_name = observation_fields[0].name
if len(observation_fields) > 1:
# all the other fields need to have a default value, since the space doesn't have any.
# TODO: Create a `ConstantSpace`, `NoneSpace`. If a field has `None` default value,
# put a
required_fields = [
f
for f in observation_fields
if f.default is dataclasses.MISSING
and f.default_factory is dataclasses.MISSING
and f.init
]
required_field_names = [f.name for f in required_fields]
if any(f.name != field_name for f in required_fields):
raise NotImplementedError(
f"Can't infer the observaiton space is given class {self.Observations}, "
f"since has required fields {required_field_names} "
f"that aren't present in the observation space. "
)
self.observation_space = TypedDictSpace(
spaces={field_name: self.env.observation_space}, dtype=self.Observations
)
else:
raise NotImplementedError(
f"Need to pass the observation space to the TypedObjectsWrapper constructor when "
f"the wrapped env's observation space isn't already a Dict or TypedDictSpace and "
f"`Observations` has more than one field. (Observations: {self.Observations}, "
f"observation_fields: {[f.name for f in observation_fields]})"
)
# Set/construct the action space.
if action_space:
self.action_space = action_space
elif isinstance(self.env.action_space, spaces.Dict):
# Convert the spaces.Dict into a TypedDictSpace, or replace a TypedDictSpace's `dtype`.
self.action_space = TypedDictSpace(
spaces=self.env.action_space.spaces,
dtype=self.Actions,
)
elif (isinstance(self.env.action_space, simple_spaces) and len(action_fields) == 1) or (
isinstance(self.env.action_space, spaces.Tuple) and num_envs
):
field_name = action_fields[0].name
self.action_space = TypedDictSpace(
spaces={field_name: self.env.action_space}, dtype=self.Actions
)
else:
raise NotImplementedError(
"Need to pass the action space to the TypedObjectsWrapper constructor when "
"the wrapped env's action space isn't already a Dict or TypedDictSpace and "
"the Actions class doesn't have just one field."
f"(wrapped action space: {self.env.action_space}, Actions: {self.Actions})"
)
# Set / construct the reward space.
# Get the default reward space in case the wrapped env doesn't have a `reward_space` attr.
default_reward_space = spaces.Box(
low=self.env.reward_range[0],
high=self.env.reward_range[1],
shape=((num_envs,) if num_envs is not None else ()),
dtype=np.float64,
)
if reward_space:
self.reward_space = reward_space
elif not hasattr(self.env, "reward_space"):
if len(reward_fields) != 1:
raise NotImplementedError(
"Need to pass the reward space to the TypedObjectsWrapper constructor when "
"the wrapped env doesn't have a `reward_space` attribute and the Rewards "
"class has more than one field."
)
field_name = reward_fields[0].name
self.reward_space = TypedDictSpace(
spaces={field_name: default_reward_space},
dtype=self.Rewards,
)
elif isinstance(self.env.reward_space, spaces.Dict):
# Convert the spaces.Dict into a TypedDictSpace, or replace a TypedDictSpace's `dtype`.
self.reward_space = TypedDictSpace(
spaces=self.env.reward_space.spaces,
dtype=self.Rewards,
)
elif isinstance(self.env.reward_space, simple_spaces) and len(reward_fields) == 1:
field_name = reward_fields[0].name
self.reward_space = TypedDictSpace(
spaces={field_name: self.env.reward_space},
dtype=self.Rewards,
)
else:
raise NotImplementedError(
"Need to pass the reward space to the TypedObjectsWrapper constructor when "
"the wrapped env's reward space isn't already a Dict or TypedDictSpace and "
"the Rewards class doesn't have just one field."
)
# TODO: Using a TypedDictSpace for the action/reward spaces is a small change in code, but
# will most likely have a large impact on all the methods and tests!
# THis here can be used to 'turn off' the changes to those spaces done above:
self.action_space = self.env.action_space
self.reward_space = getattr(self.env, "reward_space", default_reward_space)
# if isinstance(self.env.observation_space, NamedTupleSpace):
# self.observation_space = self.env.observation_space
# self.observation_space.dtype = self.Observations
def step(
self, action: ActionType
) -> Tuple[
ObservationType, RewardType, Union[bool, Sequence[bool]], Union[Dict, Sequence[Dict]]
]:
# "unwrap" the actions before passing it to the wrapped environment.
action = self.action(action)
observation, reward, done, info = self.env.step(action)
# TODO: Make the observation space a Dict
observation = self.observation(observation)
reward = self.reward(reward)
return observation, reward, done, info
def observation(self, observation: Any) -> ObservationType:
if isinstance(observation, self.Observations):
return observation
if isinstance(observation, tuple):
# TODO: Dissallow this: shouldn't handle tuples since they can be quite ambiguous.
# assert False, observation
return self.Observations(*observation)
if isinstance(observation, dict):
try:
return self.Observations(**observation)
except TypeError:
assert False, (self.Observations, observation)
assert isinstance(observation, (Tensor, np.ndarray))
return self.Observations(observation)
def action(self, action: ActionType) -> Any:
# TODO: Assert this eventually
# assert isinstance(action, Actions), action
if isinstance(action, Actions):
action = action.y_pred
if isinstance(action, Tensor) and not supports_tensors(self.env.action_space):
action = action.detach().cpu().numpy()
if action not in self.env.action_space:
if isinstance(self.env.action_space, spaces.Tuple):
action = tuple(action)
return action
def reward(self, reward: Any) -> RewardType:
return self.Rewards(reward)
def reset(self, **kwargs) -> ObservationType:
observation = self.env.reset(**kwargs)
return self.observation(observation)
def __iter__(self):
for batch in self.env:
if isinstance(batch, tuple) and len(batch) == 2:
yield self.observation(batch[0]), self.reward(batch[1])
elif isinstance(batch, tuple) and len(batch) == 1:
yield self.observation(batch[0])
else:
yield self.observation(batch)
def send(self, action: ActionType) -> RewardType:
action = self.action(action)
reward = self.env.send(action)
return self.reward(reward)
# TODO: turn unwrap into a single-dispatch callable.
# TODO: Atm 'unwrap' basically means "get rid of everything apart from the first
# item", which is a bit ugly.
# Unwrap should probably be a method on the corresponding `Batch` class, which could
# maybe accept a Space to fit into?
@singledispatch
def unwrap(obj: Any) -> Any:
return obj
# raise NotImplementedError(obj)
@unwrap.register(int)
@unwrap.register(float)
@unwrap.register(np.ndarray)
@unwrap.register(list)
def _unwrap_scalar(v):
return v
@unwrap.register(Actions)
def _unwrap_actions(obj: Actions) -> Union[Tensor, np.ndarray]:
return obj.y_pred
@unwrap.register(Rewards)
def _unwrap_rewards(obj: Rewards) -> Union[Tensor, np.ndarray]:
return obj.y
@unwrap.register(Observations)
def _unwrap_observations(obj: Observations) -> Union[Tensor, np.ndarray]:
# This gets rid of everything except just the image.
# TODO: Keep the task labels? or no? For now, no.
return obj.x
@unwrap.register(NamedTupleSpace)
def _unwrap_space(obj: NamedTupleSpace) -> Space:
# This gets rid of everything except just the first item in the space.
# TODO: Keep the task labels? or no? For now, no.
return obj[0]
@unwrap.register(TypedDictSpace)
def _unwrap_space(obj: TypedDictSpace) -> spaces.Dict:
# This gets rid of everything except just the first item in the space.
# TODO: Keep the task labels? or no? For now, no.
return spaces.Dict(obj.spaces)
class NoTypedObjectsWrapper(IterableWrapper):
"""Does the opposite of the 'TypedObjects' wrapper.
Can be added on top of that wrapper to strip off the typed objects it
returns and just returns tensors/np.ndarrays instead.
This is used for example when applying a method from stable-baselines3, as
they only want to get np.ndarrays as inputs.
Parameters
----------
IterableWrapper : [type]
[description]
"""
def __init__(self, env: gym.Env):
super().__init__(env)
self.observation_space = unwrap(self.env.observation_space)
def step(self, action):
if isinstance(action, Actions):
action = unwrap(action)
if hasattr(action, "detach"):
action = action.detach()
assert action in self.action_space, (action, type(action), self.action_space)
observation, reward, done, info = self.env.step(action)
observation = unwrap(observation)
reward = unwrap(reward)
return observation, reward, done, info
def reset(self, **kwargs):
observation = self.env.reset(**kwargs)
return unwrap(observation)
================================================
FILE: sequoia/settings/settings.puml
================================================
@startuml settings
' skinparam linetype polyline
' skinparam linetype ortho
' skinparam classFontSize 20
' fieldFontSize 20
' !include gym.puml
' !include assumptions/assumptions.puml
hide empty members
' hide fields
' hide methods
' ' Use this to turn on / off the display of assumptions
' remove Assumptions
' ' Use this to turn on / off groups of assumptions
' remove supervision_assumptions
' remove action_space_assumption
' remove Settings
' Comment/uncomment this to show/hide the descriptions for each node.
' hide fields
package settings as sequoia.settings {
' !include base/base.puml
' package settings.base {
' }
package settings.assumptions {
!include assumptions/assumptions.puml
remove assumptions
remove <>
remove <>
remove <>
remove <>
' remove supervision_assumptions
' remove context_assumption_family
' remove <>
}
' !include settings/rl/rl.puml
package rl {
' ContinualRLSetting -.- rl.continuous.ContinuousTaskAgnosticRLSetting
abstract class RLSetting <> extends SparseFeedback, ActiveEnvironment {}
package continuous as rl.continuous {
class ContinuousTaskAgnosticRLSetting <> implements RLSetting, ContinuousTaskAgnosticSetting {}
}
package discrete as rl.discrete {
class DiscreteTaskAgnosticRLSetting <> implements DiscreteTaskAgnosticSetting, ContinuousTaskAgnosticRLSetting {}
}
package incremental as rl.incremental {
class IncrementalRLSetting <> implements IncrementalSetting, DiscreteTaskAgnosticRLSetting {}
}
package class_incremental as rl.class_incremental {
class ClassIncrementalRLSetting <> implements ClassIncrementalSetting, IncrementalRLSetting {}
}
package domain_incremental as rl.domain_incremental {
class DomainIncrementalRLSetting <> implements DomainIncrementalSetting, IncrementalRLSetting {}
}
package traditional as rl.traditional {
class TraditionalRLSetting <> implements TraditionalSetting, IncrementalRLSetting {}
}
package task_incremental as rl.task_incremental {
class TaskIncrementalRLSetting <> implements TaskIncrementalSetting, IncrementalRLSetting {}
}
package multi_task as rl.multi_task {
class MultiTaskRLSetting <> implements MultiTaskSetting, TaskIncrementalRLSetting, TraditionalRLSetting {}
}
remove rl.class_incremental
remove rl.domain_incremental
}
' !include settings/rl/sl.puml
package sl {
abstract class SLSetting <> extends DenseFeedback, PassiveEnvironment {}
package continuous as sl.continuous {
class ContinuousTaskAgnosticSLSetting <> implements SLSetting, ContinuousTaskAgnosticSetting {}
}
package discrete as sl.discrete {
class DiscreteTaskAgnosticSLSetting <> implements DiscreteTaskAgnosticSetting, ContinuousTaskAgnosticSLSetting {}
}
package incremental as sl.incremental {
class IncrementalSLSetting <> implements IncrementalSetting, DiscreteTaskAgnosticSLSetting {}
}
package class_incremental as sl.class_incremental {
class ClassIncrementalSLSetting <> implements ClassIncrementalSetting, IncrementalSLSetting {}
}
package domain_incremental as sl.domain_incremental {
class DomainIncrementalSLSetting <> implements DomainIncrementalSetting, IncrementalSLSetting {}
}
package traditional as sl.traditional {
class TraditionalSLSetting <> implements TraditionalSetting, IncrementalSLSetting {}
}
package task_incremental as sl.task_incremental {
class TaskIncrementalSLSetting <> implements TaskIncrementalSetting, IncrementalSLSetting {}
}
package multi_task as sl.multi_task {
class MultiTaskSLSetting <> implements MultiTaskSetting, TaskIncrementalSLSetting, TraditionalSLSetting {}
}
remove sl.class_incremental
remove sl.domain_incremental
}
}
@enduml
================================================
FILE: sequoia/settings/sl/README.md
================================================
# SL Tree
This is the Tree of Setting on the RL side.
================================================
FILE: sequoia/settings/sl/__init__.py
================================================
from .. import Results
from .environment import PassiveEnvironment
# TODO: Replace all uses of 'PassiveEnvironment' with 'SLEnvironment'
SLEnvironment = PassiveEnvironment
from .continual import ContinualSLSetting
from .discrete import DiscreteTaskAgnosticSLSetting
from .incremental import IncrementalSLSetting
from .setting import SLSetting
# NOTE: Class-Incremental is now the same as IncrementalSLSetting.
# from .class_incremental import ClassIncrementalSetting
ClassIncrementalSetting = IncrementalSLSetting
from .domain_incremental import DomainIncrementalSLSetting
from .multi_task import MultiTaskSLSetting
from .task_incremental import TaskIncrementalSLSetting
from .traditional import TraditionalSLSetting
# TODO: Import variants without the 'SL' in it above, and then don't include then in the
# __all__ below, to improve backward compatibility a bit.
# __all__ = [
# "PassiveEnvironment",
# "SLSetting", ...
# ]
================================================
FILE: sequoia/settings/sl/continual/__init__.py
================================================
from .environment import ContinualSLEnvironment, ContinualSLTestEnvironment
from .objects import Actions, Observations, ObservationSpace, Rewards
from .setting import ContinualSLSetting
Environment = ContinualSLEnvironment
TestEnvironment = ContinualSLTestEnvironment
================================================
FILE: sequoia/settings/sl/continual/environment.py
================================================
""" Continual SL environment. (smooth task boundaries, etc)
"""
import warnings
from functools import partial
from typing import Any, Callable, Dict, Optional, Sequence, Tuple, Type, Union
import gym
import numpy as np
from continuum.datasets import (
CIFAR10,
CIFAR100,
EMNIST,
KMNIST,
MNIST,
QMNIST,
CIFARFellowship,
Core50,
Core50v2_79,
Core50v2_196,
Core50v2_391,
FashionMNIST,
ImageNet100,
ImageNet1000,
MNISTFellowship,
Synbols,
)
from gym import Space, spaces
from torch import Tensor
from torch.nn import functional as F
from torch.utils.data import Dataset, IterableDataset
from sequoia.common.gym_wrappers.convert_tensors import add_tensor_support as tensor_space
from sequoia.common.gym_wrappers.utils import tile_images
from sequoia.common.spaces import Image, TypedDictSpace
from sequoia.common.transforms import Transforms
from sequoia.settings.sl.environment import PassiveEnvironment
from sequoia.utils.logging_utils import get_logger
from .objects import Actions, ActionType, Observations, ObservationType, Rewards, RewardType
logger = get_logger(__name__)
base_observation_spaces: Dict[str, Space] = {
dataset_class.__name__.lower(): space
for dataset_class, space in {
MNIST: tensor_space(Image(0, 1, shape=(1, 28, 28))),
FashionMNIST: tensor_space(Image(0, 1, shape=(1, 28, 28))),
KMNIST: tensor_space(Image(0, 1, shape=(1, 28, 28))),
EMNIST: tensor_space(Image(0, 1, shape=(1, 28, 28))),
QMNIST: tensor_space(Image(0, 1, shape=(1, 28, 28))),
MNISTFellowship: tensor_space(Image(0, 1, shape=(1, 28, 28))),
# TODO: Determine the true bounds on the image values in cifar10.
# Appears to be ~= [-2.5, 2.5]
CIFAR10: tensor_space(Image(-np.inf, np.inf, shape=(3, 32, 32))),
CIFAR100: tensor_space(Image(-np.inf, np.inf, shape=(3, 32, 32))),
CIFARFellowship: tensor_space(Image(-np.inf, np.inf, shape=(3, 32, 32))),
ImageNet100: tensor_space(Image(0, 1, shape=(224, 224, 3))),
ImageNet1000: tensor_space(Image(0, 1, shape=(224, 224, 3))),
Core50: tensor_space(Image(0, 1, shape=(224, 224, 3))),
Core50v2_79: tensor_space(Image(0, 1, shape=(224, 224, 3))),
Core50v2_196: tensor_space(Image(0, 1, shape=(224, 224, 3))),
Core50v2_391: tensor_space(Image(0, 1, shape=(224, 224, 3))),
Synbols: tensor_space(Image(0, 1, shape=(3, 32, 32))),
}.items()
}
base_action_spaces: Dict[str, Space] = {
dataset_class.__name__.lower(): space
for dataset_class, space in {
MNIST: spaces.Discrete(10),
FashionMNIST: spaces.Discrete(10),
KMNIST: spaces.Discrete(10),
EMNIST: spaces.Discrete(10),
QMNIST: spaces.Discrete(10),
MNISTFellowship: spaces.Discrete(30),
CIFAR10: spaces.Discrete(10),
CIFAR100: spaces.Discrete(100),
CIFARFellowship: spaces.Discrete(110),
ImageNet100: spaces.Discrete(100),
ImageNet1000: spaces.Discrete(1000),
Core50: spaces.Discrete(50),
Core50v2_79: spaces.Discrete(50),
Core50v2_196: spaces.Discrete(50),
Core50v2_391: spaces.Discrete(50),
Synbols: spaces.Discrete(48),
}.items()
}
# NOTE: Since the current SL datasets are image classification, the reward spaces are
# the same as the action space. But that won't be the case when we add other types of
# datasets!
base_reward_spaces: Dict[str, Space] = {
dataset_name: action_space
for dataset_name, action_space in base_action_spaces.items()
if isinstance(action_space, spaces.Discrete)
}
def split_batch(
batch: Tuple[Tensor, ...],
hide_task_labels: bool,
Observations=Observations,
Rewards=Rewards,
) -> Tuple[Observations, Rewards]:
"""Splits the batch into a tuple of Observations and Rewards.
Parameters
----------
batch : Tuple[Tensor, ...]
A batch of data coming from the dataset.
Returns
-------
Tuple[Observations, Rewards]
A tuple of Observations and Rewards.
"""
# In this context (class_incremental), we will always have 3 items per
# batch, because we use the ClassIncremental scenario from Continuum.
if len(batch) == 2 and all(isinstance(item, Tensor) for item in batch):
x, y = batch
t = None
else:
assert len(batch) == 3
x, y, t = batch
if hide_task_labels:
# Remove the task labels if we're not currently allowed to have
# them.
# TODO: Using None might cause some issues. Maybe set -1 instead?
t = None
observations = Observations(x=x, task_labels=t)
rewards = Rewards(y=y)
return observations, rewards
# IDEA: Have this env be the 'wrapper' / base env type for the continual SL envs, and
# register them in gym!
def default_split_batch_function(
hide_task_labels: bool,
Observations: Type[ObservationType] = Observations,
Rewards: Type[RewardType] = Rewards,
) -> Callable[[Tuple[Tensor, ...]], Tuple[ObservationType, RewardType]]:
"""Returns a callable that is used to split a batch into observations and rewards."""
return partial(
split_batch,
hide_task_labels=hide_task_labels,
Observations=Observations,
Rewards=Rewards,
)
class ContinualSLEnvironment(PassiveEnvironment[ObservationType, ActionType, RewardType]):
"""Continual Supervised Learning Environment.
TODO: Here we actually inform the environment of its observation / action / reward
spaces, which isn't ideal, but is arguably better than giving the env the
responsibility (and arguments needed) to create the datasets of each task for the
right split, apply the transforms,
of each task and to use
the right train/val/test split
"""
def __init__(
self,
dataset: Union[Dataset, IterableDataset],
hide_task_labels: bool = True,
observation_space: TypedDictSpace = None,
action_space: gym.Space = None,
reward_space: gym.Space = None,
Observations: Type[ObservationType] = Observations,
Actions: Type[ActionType] = Actions,
Rewards: Type[RewardType] = Rewards,
split_batch_fn: Callable[[Tuple[Any, ...]], Tuple[ObservationType, ActionType]] = None,
pretend_to_be_active: bool = False,
strict: bool = False,
one_epoch_only: bool = True,
drop_last: bool = False,
**kwargs,
):
assert isinstance(dataset, Dataset)
self._hide_task_labels = hide_task_labels
split_batch_fn = default_split_batch_function(
hide_task_labels=hide_task_labels,
Observations=Observations,
Rewards=Rewards, # TODO: Fix this 'Rewards' being of the 'wrong' type.
)
self._one_epoch_only = one_epoch_only
super().__init__(
dataset=dataset,
split_batch_fn=split_batch_fn,
observation_space=observation_space,
action_space=action_space,
reward_space=reward_space,
pretend_to_be_active=pretend_to_be_active,
strict=strict,
drop_last=drop_last,
**kwargs,
)
# TODO: Clean up the batching of a Sparse(Discrete) space so its less ugly.
def step(
self, action: ActionType
) -> Tuple[ObservationType, Optional[RewardType], bool, Sequence[Dict]]:
obs, reward, done, info = super().step(action)
if done and self._one_epoch_only:
self.close()
return obs, reward, done, info
def __iter__(self):
yield from super().__iter__()
if self._one_epoch_only:
self.close()
# TODO: Remove / fix this 'split batch function'. The problem is that we need to
# tell the environment how to take the three items from continuum and convert them
# into
from pathlib import Path
from typing import Optional
import torch
from sequoia.common.config import Config
from sequoia.common.gym_wrappers import has_wrapper
from sequoia.common.metrics import ClassificationMetrics
from sequoia.settings.assumptions.continual import TestEnvironment
from sequoia.utils.logging_utils import get_logger
from .results import ContinualSLResults
class ContinualSLTestEnvironment(TestEnvironment[ContinualSLEnvironment]):
def __init__(
self,
env: ContinualSLEnvironment,
directory: Path,
hide_task_labels: bool = True,
step_limit: Optional[int] = None,
no_rewards: bool = False,
config: Config = None,
**kwargs,
):
from .wrappers import ShowLabelDistributionWrapper
if not has_wrapper(env, ShowLabelDistributionWrapper):
env = ShowLabelDistributionWrapper(env, env_name="test")
super().__init__(
env,
directory=directory,
step_limit=step_limit,
no_rewards=no_rewards,
config=config,
**kwargs,
)
# IDEA: Make the env give us the task ids, and then hide them again after, just
# so we can get propper 'per-task' metrics.
# NOTE: This wouldn't be ideal however, as would assume that there is a 'discrete'
# set of values for the task id, which is only true in Classification datasets.
assert isinstance(self.env.unwrapped, ContinualSLEnvironment)
self.env.unwrapped.hide_task_labels = False
self._steps = 0
self.results = ContinualSLResults()
self._reset = False
self.action_: Optional[ActionType] = None
from collections import deque
self.observation_queue = deque(maxlen=3)
def get_results(self) -> ContinualSLResults:
from .wrappers import ShowLabelDistributionWrapper
if has_wrapper(self, ShowLabelDistributionWrapper):
self.results.plots_dict["Label distribution"] = self.env.make_figure()
return self.results
def __iter__(self):
"""BUG: The iter/send type of test loop doesn't produce any results!"""
assert self.unwrapped.pretend_to_be_active
# obs = self.reset()
# self.observations = obs
# yield obs, None
self._before_reset()
for i, (obs, rewards) in enumerate(self.env.__iter__()):
if i == 0:
self._after_reset(obs)
if len(self.observation_queue) == self.observation_queue.maxlen:
raise RuntimeError(
f"Can't consume more than {self.observation_queue.maxlen} batches "
f"in a row without sending an action!"
)
self.observation_queue.append(obs)
if self.no_rewards:
rewards = None
yield obs, rewards
self.close()
def send(self, actions: ActionType) -> Optional[RewardType]:
self._before_step(actions)
rewards = self.env.send(actions)
obs = self.observation_queue.popleft()
info = getattr(obs, "info", {})
done = self.get_total_steps() >= self.step_limit
self._after_step(obs, rewards, done, info)
if self.no_rewards:
rewards = None
return rewards
def reset(self):
return super().reset()
# if not self._reset:
# logger.debug("Initial reset.")
# self._reset = True
# return super().reset()
# else:
# # TODO: Why is this a good thing again? Why not just let an 'EpisodeLimit'
# # wrapper handle this?
# logger.debug("Resetting the env closes it. (only one episode in SL)")
# self.close()
# return None
def _before_step(self, action):
self.action_ = action
return super()._before_step(action)
def _after_step(self, observation, reward, done, info):
# TODO: Fix this once we actually use a ClassificationAction!
if not isinstance(reward, Rewards):
reward = Rewards(y=torch.as_tensor(reward))
batch_size = reward.batch_size
action = self.action_
assert action is not None
if isinstance(self.action_space, (spaces.MultiDiscrete, spaces.MultiBinary)):
n_classes = self.action_space.nvec[0]
from sequoia.settings.assumptions.task_type import ClassificationActions
if not isinstance(action, ClassificationActions):
if isinstance(action, Actions):
y_pred = action.y_pred
# 'upgrade', creating some fake logits.
else:
y_pred = torch.as_tensor(action)
fake_logits = F.one_hot(y_pred, n_classes)
action = ClassificationActions(y_pred=y_pred, logits=fake_logits)
else:
raise NotImplementedError(
f"TODO: Remove the assumption here that the env is a classification env "
f"({self.action_space}, {self.reward_space})"
)
if action.batch_size != reward.batch_size:
warnings.warn(
RuntimeWarning(
f"Truncating the action since its batch size {action.batch_size} "
f"is larger than the rewards': ({reward.batch_size})"
)
)
action = action[:, : reward.batch_size]
# TODO: Use some kind of generic `get_metrics(actions: Actions, rewards: Rewards)`
# function instead.
y = reward.y
logits = action.logits
y_pred = action.y_pred
metric = ClassificationMetrics(y=y, logits=logits, y_pred=y_pred)
self.results.metrics.append(metric)
self._steps += 1
# Debugging issue with Monitor class:
# return super()._after_step(observation, reward, done, info)
if not self.enabled:
return done
if done and self.env_semantics_autoreset:
# For envs with BlockingReset wrapping VNCEnv, this observation will be the
# first one of the new episode
if self.config.render:
self.reset_video_recorder()
self.episode_id += 1
self._flush()
# Record stats: (TODO: accuracy serves as the 'reward'!)
reward_for_stats = metric.accuracy
self.stats_recorder.after_step(observation, reward_for_stats, done, info)
# Record video
if self.config.render:
self.video_recorder.capture_frame()
return done
##
def _after_reset(self, observation: ObservationType):
image_batch = observation.numpy().x
# Need to create a single image with the right dtype for the Monitor
# from gym to create gifs / videos with it.
if self.batch_size:
# Need to tile the image batch so it can be seen as a single image
# by the Monitor.
image_batch = tile_images(image_batch)
image_batch = Transforms.channels_last_if_needed(image_batch)
if image_batch.dtype == np.float32:
assert (0 <= image_batch).all() and (image_batch <= 1).all()
image_batch = (256 * image_batch).astype(np.uint8)
assert image_batch.dtype == np.uint8
# Debugging this issue here:
# super()._after_reset(image_batch)
# -- Code from Monitor
if not self.enabled:
return
# Reset the stat count
self.stats_recorder.after_reset(observation)
if self.config and self.config.render:
self.reset_video_recorder()
# Bump *after* all reset activity has finished
self.episode_id += 1
self._flush()
# --
def render(self, mode="human", **kwargs):
# NOTE: This doesn't get called, because the video recorder uses
# self.env.render(), rather than self.render()
# TODO: Render when the 'render' argument in config is set to True.
image_batch = super().render(mode=mode, **kwargs)
if mode == "rgb_array" and self.batch_size:
image_batch = tile_images(image_batch)
return image_batch
================================================
FILE: sequoia/settings/sl/continual/environment_test.py
================================================
""" TODO: Tests for the TestEnvironment of the ContinualSLSetting. """
from pathlib import Path
from typing import ClassVar, Type
import gym
import numpy as np
import pytest
from torch.utils.data import Subset
from torchvision.datasets import MNIST
from sequoia.common.config import Config
from sequoia.common.metrics import ClassificationMetrics
from sequoia.common.spaces import Image
from sequoia.common.transforms import Compose, Transforms
from sequoia.settings.sl.environment import PassiveEnvironment
from .environment import ContinualSLEnvironment, ContinualSLTestEnvironment
from .results import ContinualSLResults
class TestContinualSLTestEnvironment:
Environment: ClassVar[Type[Environment]] = ContinualSLEnvironment
TestEnvironment: ClassVar[Type[TestEnvironment]] = ContinualSLTestEnvironment
@pytest.fixture()
def base_env(self):
batch_size = 5
transforms = Compose([Transforms.to_tensor, Transforms.three_channels])
dataset = MNIST(
"data", transform=Compose([Transforms.to_tensor, Transforms.three_channels])
)
max_samples = 100
dataset = Subset(dataset, list(range(max_samples)))
obs_space = Image(0, 255, (1, 28, 28), np.uint8)
obs_space = transforms(obs_space)
env = self.Environment(
dataset,
n_classes=10,
batch_size=batch_size,
observation_space=obs_space,
pretend_to_be_active=True,
drop_last=False,
)
assert env.observation_space == Image(0, 1, (batch_size, 3, 28, 28))
assert env.action_space.shape == (batch_size,)
assert env.reward_space == env.action_space
return env
@pytest.mark.parametrize("no_rewards", [True, False])
def test_iteration_produces_results(
self,
no_rewards: bool,
base_env: ContinualSLEnvironment,
tmp_path: Path,
config: Config,
):
"""TODO: Test that when iterating through the env as a dataloader and sending
actions produces results.
"""
env = self.TestEnvironment(
base_env,
directory=tmp_path,
step_limit=100 // base_env.batch_size,
no_rewards=no_rewards,
)
env.config = config
for obs, rewards in env:
assert rewards is None
action = env.action_space.sample()
rewards = env.send(action)
assert (rewards is None) == env.no_rewards
assert env.is_closed()
results = env.get_results()
self.validate_results(results)
def validate_results(self, results: ContinualSLResults):
assert isinstance(results, ContinualSLResults)
assert isinstance(results.average_metrics, ClassificationMetrics)
assert results.objective > 0
# TODO: Fix this problem:
assert results.average_metrics.n_samples in [95, 100]
@pytest.mark.parametrize("no_rewards", [True, False])
def test_gym_interaction_produces_results(
self, no_rewards: bool, base_env: PassiveEnvironment, tmp_path: Path, config: Config
):
"""TODO: Test that when iterating through the env as a dataloader and sending
actions produces results.
"""
env = self.TestEnvironment(
base_env,
directory=tmp_path,
step_limit=100 // base_env.batch_size,
no_rewards=no_rewards,
)
env.config = config
done = False
obs = env.reset()
steps = 0
while not done:
action = env.action_space.sample()
obs, rewards, done, info = env.step(action)
steps += 1
assert (rewards is None) == env.no_rewards
if steps > 20:
pytest.fail("Shouldn't have gone longer than 20 steps!")
# BUG: There's currently a weird off-by-1 error with the total number of steps,
# which makes these checks for `is_closed()` fail. However, in practice we don't
# try to iterate twice on the env, so it's not a big deal.
# FIXME: Fix this check:
assert env.is_closed()
# FIXME: Fix this check:
with pytest.raises((gym.error.ClosedEnvironmentError, gym.error.Error)):
env.reset()
# FIXME: Fix this check:
with pytest.raises(gym.error.ClosedEnvironmentError):
_ = env.step(env.action_space.sample())
results = env.get_results()
self.validate_results(results)
================================================
FILE: sequoia/settings/sl/continual/envs.py
================================================
""" Utility functions for determining the observation space for a given SL dataset.
"""
from typing import Any, Dict, List, Optional, Sequence
import gym
import numpy as np
import torch
from continuum.datasets import (
CIFAR10,
CIFAR100,
EMNIST,
KMNIST,
MNIST,
QMNIST,
CIFARFellowship,
Core50,
Core50v2_79,
Core50v2_196,
Core50v2_391,
FashionMNIST,
ImageNet100,
ImageNet1000,
MNISTFellowship,
Synbols,
)
from continuum.tasks import TaskSet
from gym import Space, spaces
from torch.utils.data import Subset, TensorDataset
from sequoia.common.spaces import ImageTensorSpace, TensorBox, TensorDiscrete
from sequoia.common.spaces.image import could_become_image
from sequoia.utils.logging_utils import get_logger
logger = get_logger(__name__)
base_observation_spaces: Dict[str, Space] = {
dataset_class.__name__.lower(): space
for dataset_class, space in {
MNIST: ImageTensorSpace(0, 1, shape=(1, 28, 28)),
FashionMNIST: ImageTensorSpace(0, 1, shape=(1, 28, 28)),
KMNIST: ImageTensorSpace(0, 1, shape=(1, 28, 28)),
EMNIST: ImageTensorSpace(0, 1, shape=(1, 28, 28)),
QMNIST: ImageTensorSpace(0, 1, shape=(1, 28, 28)),
MNISTFellowship: ImageTensorSpace(0, 1, shape=(1, 28, 28)),
# TODO: Determine the true bounds on the image values in cifar10.
# Appears to be ~= [-2.5, 2.5]
CIFAR10: ImageTensorSpace(-np.inf, np.inf, shape=(3, 32, 32)),
CIFAR100: ImageTensorSpace(-np.inf, np.inf, shape=(3, 32, 32)),
CIFARFellowship: ImageTensorSpace(-np.inf, np.inf, shape=(3, 32, 32)),
ImageNet100: ImageTensorSpace(0, 1, shape=(224, 224, 3)),
ImageNet1000: ImageTensorSpace(0, 1, shape=(224, 224, 3)),
Core50: ImageTensorSpace(0, 1, shape=(224, 224, 3)),
Core50v2_79: ImageTensorSpace(0, 1, shape=(224, 224, 3)),
Core50v2_196: ImageTensorSpace(0, 1, shape=(224, 224, 3)),
Core50v2_391: ImageTensorSpace(0, 1, shape=(224, 224, 3)),
Synbols: ImageTensorSpace(0, 1, shape=(3, 32, 32)),
}.items()
}
base_action_spaces: Dict[str, Space] = {
dataset_class.__name__.lower(): space
for dataset_class, space in {
MNIST: spaces.Discrete(10),
FashionMNIST: spaces.Discrete(10),
KMNIST: spaces.Discrete(10),
EMNIST: spaces.Discrete(10),
QMNIST: spaces.Discrete(10),
MNISTFellowship: spaces.Discrete(30),
CIFAR10: spaces.Discrete(10),
CIFAR100: spaces.Discrete(100),
CIFARFellowship: spaces.Discrete(110),
ImageNet100: spaces.Discrete(100),
ImageNet1000: spaces.Discrete(1000),
Core50: spaces.Discrete(50),
Core50v2_79: spaces.Discrete(50),
Core50v2_196: spaces.Discrete(50),
Core50v2_391: spaces.Discrete(50),
Synbols: spaces.Discrete(48),
}.items()
}
# NOTE: Since the current SL datasets are image classification, the reward spaces are
# the same as the action space. But that won't be the case when we add other types of
# datasets!
base_reward_spaces: Dict[str, Space] = {
dataset_name: action_space
for dataset_name, action_space in base_action_spaces.items()
if isinstance(action_space, spaces.Discrete)
}
CTRL_INSTALLED: bool = False
CTRL_STREAMS: List[str] = []
CTRL_NB_TASKS: Dict[str, Optional[int]] = {}
try:
from ctrl.tasks.task import Task
from ctrl.tasks.task_generator import TaskGenerator
except ImportError as exc:
logger.debug(f"ctrl-bench isn't installed: {exc}")
# Creating those just for type hinting.
class Task:
pass
class TaskGenerator:
pass
else:
CTRL_INSTALLED = True
CTRL_STREAMS = ["s_plus", "s_minus", "s_in", "s_out", "s_pl", "s_long"]
n_tasks = [5, 5, 5, 5, 4, None]
CTRL_NB_TASKS = dict(zip(CTRL_STREAMS, n_tasks))
x_dims = [(3, 32, 32)] * len(CTRL_STREAMS)
n_classes = [10, 10, 10, 10, 10, 5]
for i, stream_name in enumerate(CTRL_STREAMS):
# Create the 'base observation space' for this stream.
obs_space = ImageTensorSpace(0, 1, shape=x_dims[i], dtype=torch.float32)
# TODO: Not sure if the classes should be considered 'shared' or 'distinct'.
# For now assume they are shared, so the setting's action space is always [0, 5]
# but the action changes.
# total_n_classes = n_tasks[i] * n_classes[i]
# action_space = TensorDiscrete(n=total_n_classes)
n_classes_per_task = n_classes[i]
action_space = TensorDiscrete(n=n_classes_per_task)
base_observation_spaces[stream_name] = obs_space
base_action_spaces[stream_name] = action_space
from functools import singledispatch
@singledispatch
def get_observation_space(dataset: Any) -> gym.Space:
raise NotImplementedError(
f"Don't yet have a registered handler to get the observation space of dataset "
f"{dataset}."
)
@get_observation_space.register(Subset)
def _get_observation_space_for_subset(dataset: Subset) -> gym.Space:
# The observations space of a Subset dataset is actually the same as the original
# dataset.
return get_observation_space(dataset.dataset)
@get_observation_space.register(str)
def _get_observation_space_for_dataset_name(dataset: str) -> gym.Space:
if dataset not in base_observation_spaces:
raise NotImplementedError(
f"Can't yet tell what the 'base' observation space is for dataset "
f"{dataset} because it doesn't have an entry in the "
f"`base_observation_spaces` dict."
)
return base_observation_spaces[dataset]
@get_observation_space.register(TaskSet)
def _get_observation_space_for_taskset(dataset: TaskSet) -> gym.Space:
assert False, dataset
# return get_observation_space(type(dataset).__name__.lower())
@get_observation_space.register(TensorDataset)
def _get_observation_space_for_tensor_dataset(dataset: TensorDataset) -> gym.Space:
x = dataset.tensors[0]
if not (1 <= len(dataset.tensors) <= 2) or not (2 <= x.dim()):
raise NotImplementedError(
f"For now, can only handle TensorDatasets with 1 or 2 tensors. (x and y) "
f"but dataset {dataset} has {len(dataset.tensors)}!"
)
low = x.min().cpu().item()
high = x.max().cpu().item()
obs_space = TensorBox(low=low, high=high, shape=x.shape[1:], dtype=x.dtype)
if could_become_image(obs_space):
obs_space = ImageTensorSpace.wrap(obs_space)
return obs_space
@singledispatch
def get_action_space(dataset: Any) -> gym.Space:
raise NotImplementedError(
f"Don't yet have a registered handler to get the action space of dataset " f"{dataset}."
)
@get_action_space.register(Subset)
def _get_action_space_for_subset(dataset: Subset) -> gym.Space:
# The actions space of a Subset dataset is actually the same as the original
# dataset.
return get_action_space(dataset.dataset)
@get_action_space.register(str)
def _get_action_space_for_dataset_name(dataset: str) -> gym.Space:
if dataset not in base_action_spaces:
raise NotImplementedError(
f"Can't yet tell what the 'base' action space is for dataset "
f"{dataset} because it doesn't have an entry in the "
f"`base_action_spaces` dict."
)
return base_action_spaces[dataset]
@singledispatch
def get_reward_space(dataset: Any) -> gym.Space:
raise NotImplementedError(
f"Don't yet have a registered handler to get the reward space of dataset " f"{dataset}."
)
@get_reward_space.register(Subset)
def _get_reward_space_for_subset(dataset: Subset) -> gym.Space:
# The rewards space of a Subset dataset is *usually* the same as the original
# dataset.
# TODO: Need to check this though? Maybe we're taking only the indices with a given class
return get_reward_space(dataset.dataset)
@get_reward_space.register(str)
def _get_reward_space_for_dataset_name(dataset: str) -> gym.Space:
if dataset not in base_reward_spaces:
raise NotImplementedError(
f"Can't yet tell what the 'base' reward space is for dataset "
f"{dataset} because it doesn't have an entry in the "
f"`base_reward_spaces` dict."
)
return base_reward_spaces[dataset]
@get_reward_space.register(TensorDataset)
@get_action_space.register(TensorDataset)
def get_y_space_for_tensor_dataset(dataset: TensorDataset) -> gym.Space:
if len(dataset.tensors) != 2:
raise NotImplementedError(
f"Only able to detect the action space of TensorDatasets if they have two "
f"tensors for now (x and y), but dataset {dataset} has {len(dataset.tensors)}!"
)
y = dataset.tensors[-1]
low = y.min().item()
high = y.max().item()
y_sample_shape = y.shape[1:]
if y.dtype.is_floating_point:
return TensorBox(low, high, shape=y_sample_shape, dtype=y.dtype)
# Integer y:
if low == 0:
n_classes = high + 1
return TensorDiscrete(n_classes)
# TODO: Add a space like DiscreteWithOffset ?
return TensorBox(low, high, shape=y_sample_shape, dtype=y.dtype)
@get_action_space.register(list)
@get_action_space.register(tuple)
def _get_action_space_for_list_of_datasets(datasets: Sequence[TaskSet]) -> gym.Space:
# TODO: IDEA: If given a list of datasets, try to find the 'union' of their spaces.
# This is meant to be one potential solution to the case where custom datasets are
# passed for each task, like [0, 2), [3, 4], etc.
action_spaces = [get_action_space(dataset) for dataset in datasets]
if isinstance(action_spaces[0], spaces.Discrete):
lows = [0 if isinstance(space, spaces.Discrete) else space.low for space in action_spaces]
highs = [
space.n - 1 if isinstance(space, spaces.Discrete) else space.high
for space in action_spaces
]
if isinstance(reward_spaces[0], spaces.Discrete) and min(lows) == 0:
return TensorDiscrete(max(highs) + 1)
raise NotImplementedError(
f"Don't yet know how to get the 'union' of the action spaces ({action_spaces}) "
f" of datasets {datasets}"
)
@get_reward_space.register(list)
@get_reward_space.register(tuple)
def _get_reward_space_for_list_of_datasets(datasets: Sequence[TaskSet]) -> gym.Space:
# TODO: IDEA: If given a list of datasets, try to find the 'union' of their spaces.
# This is meant to be one potential solution to the case where custom datasets are
# passed for each task, like [0, 2), [3, 4], etc.
reward_spaces = [get_reward_space(dataset) for dataset in datasets]
if isinstance(reward_spaces[0], spaces.Discrete):
lows = [0 if isinstance(space, spaces.Discrete) else space.low for space in reward_spaces]
highs = [
space.n - 1 if isinstance(space, spaces.Discrete) else space.high
for space in reward_spaces
]
if isinstance(reward_spaces[0], spaces.Discrete) and min(lows) == 0:
return TensorDiscrete(max(highs) + 1)
raise NotImplementedError(
f"Don't yet know how to get the 'union' of the reward spaces ({reward_spaces}) "
f" of datasets {datasets}"
)
================================================
FILE: sequoia/settings/sl/continual/objects.py
================================================
from dataclasses import dataclass
from typing import Optional, TypeVar
from gym import spaces
from torch import Tensor
from sequoia.common.spaces import ImageTensorSpace, Sparse, TypedDictSpace
from sequoia.settings.assumptions.continual import ContinualAssumption
from sequoia.settings.sl.setting import SLSetting
@dataclass(frozen=True)
class Observations(SLSetting.Observations, ContinualAssumption.Observations):
"""Observations from a Continual Supervised Learning environment."""
x: Tensor
task_labels: Optional[Tensor] = None
ObservationType = TypeVar("ObservationType", bound=Observations)
import torch
class ObservationSpace(TypedDictSpace[ObservationType]):
"""Observation space of a Continual SL Setting."""
# The sample space: this is a gym.spaces.Box subclass with added properties for
# images, such as `channels`, `h`, `w`, `is_channels_first`, etc.
# This space will return Tensors.
x: ImageTensorSpace
# The task label space: This is a gym.spaces.MultiDiscrete of Tensors.
task_labels: Sparse[torch.LongTensor]
# TODO: Eventually also use some kind of structured action and reward space!
# TODO: Figure out how/where to switch the actions type to be specific to classification
# from sequoia.settings.assumptions.task_type import ClassificationActions
@dataclass(frozen=True)
class Actions(SLSetting.Actions):
"""Actions to be sent to a Continual Supervised Learning environment."""
y_pred: Tensor
class ActionSpace(TypedDictSpace):
"""Action space of a Continual SL Setting."""
y_pred: spaces.Space
@dataclass(frozen=True)
class Rewards(SLSetting.Rewards):
"""Rewards obtained from a Continual Supervised Learning environment."""
y: Tensor
class RewardSpace(TypedDictSpace):
"""Reward space of a Continual SL Setting."""
y: spaces.Space
ActionType = TypeVar("ActionType", bound=Actions)
RewardType = TypeVar("RewardType", bound=Rewards)
================================================
FILE: sequoia/settings/sl/continual/results.py
================================================
from sequoia.common.metrics import MetricsType
from sequoia.settings.assumptions.continual import ContinualResults
class ContinualSLResults(ContinualResults[MetricsType]):
pass
================================================
FILE: sequoia/settings/sl/continual/setting.py
================================================
import itertools
from dataclasses import dataclass
from pathlib import Path
from typing import ClassVar, Dict, List, Optional, Type, TypeVar, Union
import gym
import numpy as np
import torch
from continuum.datasets import (
CIFAR10,
CIFAR100,
EMNIST,
KMNIST,
MNIST,
QMNIST,
CIFARFellowship,
FashionMNIST,
ImageNet100,
ImageNet1000,
MNISTFellowship,
Synbols,
_ContinuumDataset,
)
from continuum.scenarios import ClassIncremental, _BaseScenario
from continuum.tasks import TaskSet, concat, split_train_val
from gym import spaces
from simple_parsing import choice, field, list_field
from torch import Tensor
from torch.utils.data import ConcatDataset, Dataset, Subset
import wandb
from sequoia.common.config import Config
from sequoia.common.gym_wrappers import RenderEnvWrapper, TransformObservation
from sequoia.common.gym_wrappers.convert_tensors import add_tensor_support
from sequoia.common.spaces import Sparse
from sequoia.common.transforms import Compose, Transforms
from sequoia.settings.assumptions.continual import ContinualAssumption
from sequoia.settings.base import Method
from sequoia.settings.sl.setting import SLSetting
from sequoia.settings.sl.wrappers import MeasureSLPerformanceWrapper
from sequoia.utils.generic_functions import concatenate
from sequoia.utils.logging_utils import get_logger
from sequoia.utils.utils import flag
from .environment import ContinualSLEnvironment, ContinualSLTestEnvironment
from .envs import (
CTRL_INSTALLED,
CTRL_STREAMS,
base_action_spaces,
base_observation_spaces,
base_reward_spaces,
get_action_space,
get_observation_space,
get_reward_space,
)
from .objects import Actions, ActionSpace, Observations, ObservationSpace, Rewards, RewardSpace
from .results import ContinualSLResults
from .wrappers import relabel
logger = get_logger(__name__)
EnvironmentType = TypeVar("EnvironmentType", bound=ContinualSLEnvironment)
available_datasets = {
c.__name__.lower(): c
for c in [
CIFARFellowship,
MNISTFellowship,
ImageNet100,
ImageNet1000,
CIFAR10,
CIFAR100,
EMNIST,
KMNIST,
MNIST,
QMNIST,
FashionMNIST,
Synbols,
]
# "synbols": Synbols,
# "synbols_font": partial(Synbols, task="fonts"),
}
if CTRL_INSTALLED:
available_datasets.update(dict(zip(CTRL_STREAMS, CTRL_STREAMS)))
@dataclass
class ContinualSLSetting(SLSetting, ContinualAssumption):
"""Continuous, Task-Agnostic, Continual Supervised Learning.
This is *currently* the most "general" Supervised Continual Learning setting in
Sequoia.
- Data distribution changes smoothly over time.
- Smooth transitions between "tasks"
- No information about task boundaries or task identity (no task IDs)
- Maximum of one 'epoch' through the environment.
"""
# Class variables that hold the 'base' observation/action/reward spaces for the
# available datasets.
base_observation_spaces: ClassVar[Dict[str, gym.Space]] = base_observation_spaces
base_action_spaces: ClassVar[Dict[str, gym.Space]] = base_action_spaces
base_reward_spaces: ClassVar[Dict[str, gym.Space]] = base_reward_spaces
# (NOTE: commenting out SLSetting.Observations as it is the same class
# as Setting.Observations, and we want a consistent method resolution order.
Observations: ClassVar[Type[Observations]] = Observations
Actions: ClassVar[Type[Actions]] = Actions
Rewards: ClassVar[Type[Rewards]] = Rewards
ObservationSpace: ClassVar[Type[ObservationSpace]] = ObservationSpace
Environment: ClassVar[Type[SLSetting.Environment]] = ContinualSLEnvironment[
Observations, Actions, Rewards
]
Results: ClassVar[Type[ContinualSLResults]] = ContinualSLResults
# Class variable holding a dict of the names and types of all available
# datasets.
# TODO: Issue #43: Support other datasets than just classification
available_datasets: ClassVar[Dict[str, Type[_ContinuumDataset]]] = available_datasets
# A continual dataset to use. (Should be taken from the continuum package).
dataset: str = choice(available_datasets.keys(), default="mnist")
# Transformations to use. See the Transforms enum for the available values.
transforms: List[Transforms] = list_field(
Transforms.to_tensor,
# BUG: The input_shape given to the Model doesn't have the right number
# of channels, even if we 'fixed' them here. However the images are fine
# after.
Transforms.three_channels,
Transforms.channels_first_if_needed,
)
# Either number of classes per task, or a list specifying for
# every task the amount of new classes.
increment: Union[int, List[int]] = list_field(
2, type=int, nargs="*", alias="n_classes_per_task"
)
# The scenario number of tasks.
# If zero, defaults to the number of classes divied by the increment.
nb_tasks: int = 0
# A different task size applied only for the first task.
# Desactivated if `increment` is a list.
initial_increment: int = 0
# An optional custom class order, used for NC.
class_order: Optional[List[int]] = None
# Either number of classes per task, or a list specifying for
# every task the amount of new classes (defaults to the value of
# `increment`).
test_increment: Optional[Union[List[int], int]] = None
# A different task size applied only for the first test task.
# Desactivated if `test_increment` is a list. Defaults to the
# value of `initial_increment`.
test_initial_increment: Optional[int] = None
# An optional custom class order for testing, used for NC.
# Defaults to the value of `class_order`.
test_class_order: Optional[List[int]] = None
# Wether task boundaries are smooth or not.
smooth_task_boundaries: bool = flag(True)
# Wether the context (task) variable is stationary or not.
stationary_context: bool = flag(False)
# Wether tasks share the same action space or not.
# TODO: This will probably be moved into a different assumption.
shared_action_space: Optional[bool] = None
# TODO: Need to put num_workers in only one place.
batch_size: int = field(default=32, cmd=False)
num_workers: int = field(default=4, cmd=False)
# When True, a Monitor-like wrapper will be applied to the training environment
# and monitor the 'online' performance during training. Note that in SL, this will
# also cause the Rewards (y) to be withheld until actions are passed to the `send`
# method of the Environment.
monitor_training_performance: bool = flag(False)
train_datasets: List[Dataset] = field(
default_factory=list, cmd=False, repr=False, to_dict=False
)
val_datasets: List[Dataset] = field(default_factory=list, cmd=False, repr=False, to_dict=False)
test_datasets: List[Dataset] = field(default_factory=list, cmd=False, repr=False, to_dict=False)
def __post_init__(self):
super().__post_init__()
# assert not self.has_setup_fit
# Test values default to the same as train.
self.test_increment = self.test_increment or self.increment
self.test_initial_increment = self.test_initial_increment or self.initial_increment
self.test_class_order = self.test_class_order or self.class_order
# TODO: For now we assume a fixed, equal number of classes per task, for
# sake of simplicity. We could take out this assumption, but it might
# make things a bit more complicated.
if isinstance(self.increment, list) and len(self.increment) == 1:
self.increment = self.increment[0]
if isinstance(self.test_increment, list) and len(self.test_increment) == 1:
self.test_increment = self.test_increment[0]
assert isinstance(self.increment, int)
assert isinstance(self.test_increment, int)
# The 'scenarios' for train and test from continuum. (ClassIncremental for now).
self.train_cl_loader: Optional[_BaseScenario] = None
self.test_cl_loader: Optional[_BaseScenario] = None
self.train_cl_dataset: Optional[_ContinuumDataset] = None
self.test_cl_dataset: Optional[_ContinuumDataset] = None
# This will be set by the Experiment, or passed to the `apply` method.
# TODO: This could be a bit cleaner.
self.config: Config
# Default path to which the datasets will be downloaded.
self.data_dir: Optional[Path] = None
self.train_env: ContinualSLEnvironment = None # type: ignore
self.val_env: ContinualSLEnvironment = None # type: ignore
self.test_env: ContinualSLEnvironment = None # type: ignore
# BUG: These `has_setup_fit`, `has_setup_test`, `has_prepared_data` properties
# aren't working correctly: they get set before the call to the function has
# been executed, making it impossible to check those values from inside those
# functions.
self._has_prepared_data = False
self._has_setup_fit = False
self._has_setup_test = False
if CTRL_INSTALLED and self.dataset in CTRL_STREAMS:
import ctrl
from ctrl.tasks.task_generator import TaskGenerator
from .envs import CTRL_NB_TASKS
self.nb_tasks = self.nb_tasks or CTRL_NB_TASKS[self.dataset]
if self.dataset == "s_long" and not self.nb_tasks:
warnings.warn(
RuntimeWarning(
f"Limiting the scenario to 100 tasks for now when using 's_long' stream."
)
)
self.nb_tasks = 100
task_generator: TaskGenerator = ctrl.get_stream(self.dataset, seed=42)
# Get the train/val/test splits from the tasks.
for task_dataset in itertools.islice(task_generator, self.nb_tasks):
train_dataset = task_dataset.datasets[task_dataset.split_names.index("Train")]
val_dataset = task_dataset.datasets[task_dataset.split_names.index("Val")]
test_dataset = task_dataset.datasets[task_dataset.split_names.index("Test")]
self.train_datasets.append(train_dataset)
self.val_datasets.append(val_dataset)
self.test_datasets.append(test_dataset)
## NOTE: Not sure this is a good idea, because we might easily mix the train/val
## and test splits between different runs! Actually, now that I think about it,
## I need to make sure that this isn't happening already with Avalanche!
# if self.datasets:
# if any(self.train_datasets, self.val_datasets, self.test_datasets):
# raise RuntimeError(
# f"When passing your own datasets to the setting, you have to pass "
# f"either `datasets` or all three of `train_datasets`, "
# f"`val_datasets` and `test_datasets`."
# )
# self.train_datasets = []
# self.val_datasets = []
# self.test_datasets = []
# rng = np.random.default_rng(self.config.seed if self.config else 123)
# for dataset in datasets:
# n = len(dataset)
# n_train_val = int(n * 0.8)
# n_test = n - n_train_val
# n_train = int(n_train_val * 0.8)
# n_valid = n_train_val - n_train
# train_val_dataset, test_dataset = random_split(
# dataset, [n_train_val, n_test], generator=rng,
# )
# train_dataset, val_dataset = random_split(
# train_val_dataset, [n_train, n_valid], generator=rng,
# )
# self.train_datasets.append(train_dataset)
# self.val_datasets.append(val_dataset)
# self.test_datasets.append(test_dataset)
if any([self.train_datasets, self.val_datasets, self.test_datasets]):
if not all([self.train_datasets, self.val_datasets, self.test_datasets]):
raise RuntimeError(
f"When passing your own datasets to the setting, you have to pass "
f"`train_datasets`, `val_datasets` and `test_datasets`."
)
self.nb_tasks = len(self.train_datasets)
if not (len(self.val_datasets) == len(self.test_datasets) == self.nb_tasks):
raise RuntimeError(
f"When passing your own datasets to the setting, you need to pass "
f"The same number of train/valid and test datasets for now."
)
# FIXME: For now, setting `self.dataset` to None, because it has a default
# of 'mnist'. Should probably make it a required argument instead.
self.dataset = None
# x_shape = self.train_datasets[0][0][0].shape
# self.observation_space.x.shape = x_shape
# assert False, (x_shape, self.observation_space)
# Note: Using the same name as in the RL Setting for now, since that's where
# this feature of passing the "envs" for each task was first added.
self._using_custom_envs_foreach_task: bool = bool(self.train_datasets)
# TODO: Remove this
if self.dataset in self.base_action_spaces:
if isinstance(self.action_space, spaces.Discrete):
base_action_space = self.base_action_spaces[self.dataset]
n_classes = base_action_space.n
self.class_order = self.class_order or list(range(n_classes))
if self.nb_tasks:
self.increment = n_classes // self.nb_tasks
if not self.nb_tasks:
base_action_space = self.base_action_spaces[self.dataset]
if isinstance(base_action_space, spaces.Discrete):
self.nb_tasks = base_action_space.n // self.increment
assert self.nb_tasks != 0, self.nb_tasks
def apply(
self, method: Method["ContinualSLSetting"], config: Config = None
) -> ContinualSLResults:
"""Apply the given method on this setting to producing some results."""
# TODO: It still isn't super clear what should be in charge of creating
# the config, and how to create it, when it isn't passed explicitly.
self.config = config or self._setup_config(method)
assert self.config is not None
method.configure(setting=self)
# Run the main loop (defined in ContinualAssumption).
# Basically does the following:
# 1. Call method.fit(train_env, valid_env)
# 2. Test the method on test_env.
# Return the results, as reported by the test environment.
results: ContinualSLResults = super().main_loop(method)
method.receive_results(self, results=results)
return results
def train_dataloader(
self, batch_size: int = 32, num_workers: Optional[int] = 4
) -> EnvironmentType:
if not self.has_prepared_data:
self.prepare_data()
if not self.has_setup_fit:
self.setup("fit")
if self.train_env:
self.train_env.close()
batch_size = batch_size if batch_size is not None else self.batch_size
num_workers = num_workers if num_workers is not None else self.num_workers
# NOTE: ATM the dataset here doesn't have any transforms. We add the transforms after the
# dataloader below using the TransformObservations wrapper. This isn't ideal.
dataset = self._make_train_dataset()
# TODO: Add some kind of Wrapper around the dataset to make it
# semi-supervised?
env = self.Environment(
dataset,
hide_task_labels=(not self.task_labels_at_train_time),
observation_space=self.observation_space,
action_space=self.action_space,
reward_space=self.reward_space,
Observations=self.Observations,
Actions=self.Actions,
Rewards=self.Rewards,
pin_memory=True,
batch_size=batch_size,
num_workers=num_workers,
drop_last=self.drop_last,
shuffle=False,
one_epoch_only=(not self.known_task_boundaries_at_train_time),
)
if self.config.render:
# Add a wrapper that calls 'env.render' at each step?
env = RenderEnvWrapper(env)
train_transforms = Compose(self.transforms + self.train_transforms)
if train_transforms:
env = TransformObservation(env, f=train_transforms)
if self.config.device:
# TODO: Put this before or after the image transforms?
from sequoia.common.gym_wrappers.convert_tensors import ConvertToFromTensors
env = ConvertToFromTensors(env, device=self.config.device)
# env = TransformObservation(env, f=partial(move, device=self.config.device))
# env = TransformReward(env, f=partial(move, device=self.config.device))
if self.monitor_training_performance:
env = MeasureSLPerformanceWrapper(
env,
first_epoch_only=True,
wandb_prefix=f"Train/",
)
# NOTE: Quickfix for the 'dtype' of the TypedDictSpace perhaps getting lost
# when transforms don't propagate the 'dtype' field.
env.observation_space.dtype = self.Observations
self.train_env = env
return self.train_env
def val_dataloader(
self, batch_size: int = 32, num_workers: Optional[int] = 4
) -> EnvironmentType:
if not self.has_prepared_data:
self.prepare_data()
if not self.has_setup_validate:
self.setup("validate")
if self.val_env:
self.val_env.close()
batch_size = batch_size if batch_size is not None else self.batch_size
num_workers = num_workers if num_workers is not None else self.num_workers
dataset = self._make_val_dataset()
# TODO: Add some kind of Wrapper around the dataset to make it
# semi-supervised?
# TODO: Change the reward and action spaces to also use objects.
env = self.Environment(
dataset,
hide_task_labels=(not self.task_labels_at_train_time),
observation_space=self.observation_space,
action_space=self.action_space,
reward_space=self.reward_space,
Observations=self.Observations,
Actions=self.Actions,
Rewards=self.Rewards,
pin_memory=True,
drop_last=self.drop_last,
batch_size=batch_size,
num_workers=num_workers,
one_epoch_only=(not self.known_task_boundaries_at_train_time),
)
# TODO: If wandb is enabled, then add customized Monitor wrapper (with
# IterableWrapper as an additional subclass). There would then be a lot of
# overlap between such a Monitor and the current TestEnvironment.
if self.config.render:
# Add a wrapper that calls 'env.render' at each step?
env = RenderEnvWrapper(env)
# NOTE: The transforms from `self.transforms` (the 'base' transforms) were
# already added when creating the datasets and the CL scenario.
val_transforms = self.transforms + self.val_transforms
if val_transforms:
env = TransformObservation(env, f=val_transforms)
if self.config.device:
# TODO: Put this before or after the image transforms?
from sequoia.common.gym_wrappers.convert_tensors import ConvertToFromTensors
env = ConvertToFromTensors(env, device=self.config.device)
# env = TransformObservation(env, f=partial(move, device=self.config.device))
# env = TransformReward(env, f=partial(move, device=self.config.device))
# NOTE: We don't measure online performance on the validation set.
# if self.monitor_training_performance:
# env = MeasureSLPerformanceWrapper(
# env,
# first_epoch_only=True,
# wandb_prefix=f"Train/Task {self.current_task_id}",
# )
# NOTE: Quickfix for the 'dtype' of the TypedDictSpace perhaps getting lost
# when transforms don't propagate the 'dtype' field.
env.observation_space.dtype = self.Observations
self.val_env = env
return self.val_env
def test_dataloader(
self, batch_size: int = None, num_workers: int = None
) -> ContinualSLEnvironment[Observations, Actions, Rewards]:
"""Returns a Continual SL Test environment."""
if not self.has_prepared_data:
self.prepare_data()
if not self.has_setup_test:
self.setup("test")
batch_size = batch_size if batch_size is not None else self.batch_size
num_workers = num_workers if num_workers is not None else self.num_workers
dataset = self._make_test_dataset()
env = self.Environment(
dataset,
batch_size=batch_size,
num_workers=num_workers,
hide_task_labels=(not self.task_labels_at_test_time),
observation_space=self.observation_space,
action_space=self.action_space,
reward_space=self.reward_space,
Observations=self.Observations,
Actions=self.Actions,
Rewards=self.Rewards,
pretend_to_be_active=True,
drop_last=self.drop_last,
shuffle=False,
one_epoch_only=True,
)
# NOTE: The transforms from `self.transforms` (the 'base' transforms) were
# already added when creating the datasets and the CL scenario.
test_transforms = self.transforms + self.test_transforms
if test_transforms:
env = TransformObservation(env, f=test_transforms)
if self.config.device:
# TODO: Put this before or after the image transforms?
from sequoia.common.gym_wrappers.convert_tensors import ConvertToFromTensors
env = ConvertToFromTensors(env, device=self.config.device)
# env = TransformObservation(env, f=partial(move, device=self.config.device))
# env = TransformReward(env, f=partial(move, device=self.config.device))
# FIXME: Instead of trying to create a 'fake' task schedule for the test
# environment, instead let the test environment see the task ids, (and then hide
# them if necessary) so that it can compile the stats for each task based on the
# task IDs of the observations.
# TODO: Configure the 'monitoring' dir properly.
if wandb.run:
test_dir = wandb.run.dir
else:
test_dir = self.config.log_dir
test_loop_max_steps = len(dataset) // (env.batch_size or 1)
test_env = ContinualSLTestEnvironment(
env,
directory=test_dir,
step_limit=test_loop_max_steps,
force=True,
config=self.config,
video_callable=None if (wandb.run or self.config.render) else False,
)
# NOTE: Quickfix for the 'dtype' of the TypedDictSpace perhaps getting lost
# when transforms don't propagate the 'dtype' field.
env.observation_space.dtype = self.Observations
if self.test_env:
self.test_env.close()
self.test_env = test_env
return self.test_env
def prepare_data(self, data_dir: Path = None) -> None:
# TODO: Pass the transformations to the CL scenario, or to the dataset?
if data_dir is None:
if self.config:
data_dir = self.config.data_dir
else:
data_dir = Path("data")
logger.info(f"Downloading datasets to directory {data_dir}")
self._using_custom_envs_foreach_task = bool(self.train_datasets)
if not self._using_custom_envs_foreach_task:
self.train_cl_dataset = self.make_dataset(data_dir, download=True, train=True)
self.test_cl_dataset = self.make_dataset(data_dir, download=True, train=False)
return super().prepare_data()
def setup(self, stage: str = None):
if not self.has_prepared_data:
self.prepare_data()
super().setup(stage=stage)
if stage not in (None, "fit", "test", "validate"):
raise RuntimeError(f"`stage` should be 'fit', 'test', 'validate' or None.")
if stage in (None, "fit", "validate"):
if not self._using_custom_envs_foreach_task:
self.train_cl_dataset = self.train_cl_dataset or self.make_dataset(
self.config.data_dir, download=False, train=True
)
nb_tasks_kwarg = {}
if self.nb_tasks is not None:
nb_tasks_kwarg.update(nb_tasks=self.nb_tasks)
else:
nb_tasks_kwarg.update(increment=self.increment)
if not self._using_custom_envs_foreach_task:
self.train_cl_loader = self.train_cl_loader or ClassIncremental(
cl_dataset=self.train_cl_dataset,
**nb_tasks_kwarg,
initial_increment=self.initial_increment,
transformations=[], # NOTE: Changing this: The transforms will get added after.
class_order=self.class_order,
)
if not self.train_datasets and not self.val_datasets:
for task_id, train_taskset in enumerate(self.train_cl_loader):
train_taskset, valid_taskset = split_train_val(train_taskset, val_split=0.1)
self.train_datasets.append(train_taskset)
self.val_datasets.append(valid_taskset)
# IDEA: We could do the remapping here instead of adding a wrapper later.
if self.shared_action_space and isinstance(self.action_space, spaces.Discrete):
# If we have a shared output space, then they are all mapped to [0, n_per_task]
self.train_datasets = list(map(relabel, self.train_datasets))
self.val_datasets = list(map(relabel, self.val_datasets))
if stage in (None, "test"):
if not self._using_custom_envs_foreach_task:
self.test_cl_dataset = self.test_cl_dataset or self.make_dataset(
self.config.data_dir, download=False, train=False
)
self.test_class_order = self.test_class_order or self.class_order
self.test_cl_loader = self.test_cl_loader or ClassIncremental(
cl_dataset=self.test_cl_dataset,
nb_tasks=self.nb_tasks,
increment=self.test_increment,
initial_increment=self.test_initial_increment,
transformations=[], # note: not passing transforms here, they get added later
class_order=self.test_class_order,
)
if not self.test_datasets:
# TODO: If we decide to 'shuffle' the test tasks, then store the sequence of
# task ids in a new property, probably here.
# self.test_task_order = list(range(len(self.test_datasets)))
self.test_datasets = list(self.test_cl_loader)
# IDEA: We could do the remapping here instead of adding a wrapper later.
if self.shared_action_space and isinstance(self.action_space, spaces.Discrete):
# If we have a shared output space, then they are all mapped to [0, n_per_task]
self.test_datasets = list(map(relabel, self.test_datasets))
def _make_train_dataset(self) -> Union[TaskSet, Dataset]:
# NOTE: Passing the same seed to `train`/`valid`/`test` is fine, because it's
# only used for the shuffling used to make the task boundaries smooth.
if self.smooth_task_boundaries:
return smooth_task_boundaries_concat(
self.train_datasets, seed=self.config.seed if self.config else None
)
if self.stationary_context:
joined_dataset = concat(self.train_datasets)
return shuffle(joined_dataset, seed=self.config.seed)
if self.known_task_boundaries_at_train_time:
return self.train_datasets[self.current_task_id]
else:
return concatenate(self.train_datasets)
def _make_val_dataset(self) -> Dataset:
if self.smooth_task_boundaries:
return smooth_task_boundaries_concat(self.val_datasets, seed=self.config.seed)
if self.stationary_context:
joined_dataset = concat(self.val_datasets)
return shuffle(joined_dataset, seed=self.config.seed)
if self.known_task_boundaries_at_train_time:
return self.val_datasets[self.current_task_id]
return concatenate(self.val_datasets)
def _make_test_dataset(self) -> Dataset:
if self.smooth_task_boundaries:
return smooth_task_boundaries_concat(self.test_datasets, seed=self.config.seed)
else:
return concatenate(self.test_datasets)
def make_dataset(
self, data_dir: Path, download: bool = True, train: bool = True, **kwargs
) -> _ContinuumDataset:
# TODO: #7 Use this method here to fix the errors that happen when
# trying to create every single dataset from continuum.
data_dir = Path(data_dir)
if not data_dir.exists():
data_dir.mkdir(parents=True, exist_ok=True)
if self.dataset in self.available_datasets:
dataset_class = self.available_datasets[self.dataset]
return dataset_class(data_path=data_dir, download=download, train=train, **kwargs)
elif self.dataset in self.available_datasets.values():
dataset_class = self.dataset
return dataset_class(data_path=data_dir, download=download, train=train, **kwargs)
elif isinstance(self.dataset, Dataset):
logger.info(f"Using a custom dataset {self.dataset}")
return self.dataset
else:
raise NotImplementedError(self.dataset)
@property
def observation_space(self) -> ObservationSpace[Observations]:
"""The un-batched observation space, based on the choice of dataset and
the transforms at `self.transforms` (which apply to the train/valid/test
environments).
The returned space is a TypedDictSpace, with the following properties:
- `x`: observation space (e.g. `Image` space)
- `task_labels`: Union[Discrete, Sparse[Discrete]]
The task labels for each sample. When task labels are not available,
the task labels space is Sparse, and entries will be `None`.
"""
# TODO: Need to clean this up a bit:
if self._using_custom_envs_foreach_task:
x_space = get_observation_space(self.train_datasets[0])
else:
x_space = get_observation_space(self.dataset)
if not self.transforms:
# NOTE: When we don't pass any transforms, continuum scenarios still
# at least use 'to_tensor'.
x_space = Transforms.to_tensor(x_space)
# apply the transforms to the observation space.
for transform in self.transforms:
x_space = transform(x_space)
x_space = add_tensor_support(x_space)
task_label_space = spaces.Discrete(self.nb_tasks)
if not self.task_labels_at_train_time:
task_label_space = Sparse(task_label_space, 1.0)
task_label_space = add_tensor_support(task_label_space)
self._observation_space = self.ObservationSpace(
x=x_space,
task_labels=task_label_space,
dtype=self.Observations,
)
return self._observation_space
# TODO: Add a `train_observation_space`, `train_action_space`, `train_reward_space`?
@property
def action_space(self) -> spaces.Discrete:
"""Action space for this setting."""
if self._action_space:
return self._action_space
# Determine the action space using the right dataset.
# (NOTE: same across train/val/test for now.)
dataset = self.dataset
if self._using_custom_envs_foreach_task:
dataset = self.train_datasets[0]
action_space = get_action_space(dataset)
# TODO: Remove this
if isinstance(action_space, spaces.Discrete) and self.dataset in self.base_action_spaces:
if self.shared_action_space:
assert isinstance(self.increment, int), (
"Need to have same number of classes in each task when "
"`shared_action_space` is true."
)
action_space = spaces.Discrete(self.increment)
self._action_space = action_space
return self._action_space
# TODO: IDEA: Have the action space only reflect the number of 'current' classes
# in order to create a "true" class-incremental learning setting.
# n_classes_seen_so_far = 0
# for task_id in range(self.current_task_id):
# n_classes_seen_so_far += self.num_classes_in_task(task_id)
# return spaces.Discrete(n_classes_seen_so_far)
@property
def reward_space(self) -> spaces.Discrete:
if self._reward_space:
return self._reward_space
# Determine the reward space using the right dataset.
# (NOTE: same across train/val/test for now.)
dataset = self.dataset
if self._using_custom_envs_foreach_task:
dataset = self.train_datasets
reward_space = get_reward_space(dataset)
# TODO: Remove this
if isinstance(reward_space, spaces.Discrete) and self.dataset in self.base_reward_spaces:
if self.shared_action_space:
assert isinstance(self.increment, int), (
"Need to have same number of classes in each task when "
"`shared_action_space` is true."
)
reward_space = spaces.Discrete(self.increment)
self._reward_space = reward_space
return self._reward_space
def smooth_task_boundaries_concat(
datasets: List[Dataset], seed: int = None, window_length: float = 0.03
) -> ConcatDataset:
"""TODO: Use a smarter way of mixing from one to the other?"""
lengths = [len(dataset) for dataset in datasets]
total_length = sum(lengths)
n_tasks = len(datasets)
if not isinstance(window_length, int):
window_length = int(total_length * window_length)
assert (
window_length > 1
), f"Window length should be positive or a fraction of the dataset length. ({window_length})"
rng = np.random.default_rng(seed)
def option1():
shuffled_indices = np.arange(total_length)
for start_index in range(0, total_length - window_length + 1, window_length // 2):
rng.shuffle(shuffled_indices[start_index : start_index + window_length])
return shuffled_indices
# Maybe do the same but backwards?
# IDEA #2: Sample based on how close to the 'center' of the task we are.
def option2():
boundaries = np.array(list(itertools.accumulate(lengths, initial=0)))
middles = [(start + end) / 2 for start, end in zip(boundaries[0:], boundaries[1:])]
samples_left: Dict[int, int] = {i: length for i, length in enumerate(lengths)}
indices_left: Dict[int, List[int]] = {
i: list(range(boundaries[i], boundaries[i] + length))
for i, length in enumerate(lengths)
}
out_indices: List[int] = []
last_dataset_index = n_tasks - 1
for step in range(total_length):
if step < middles[0] and samples_left[0]:
# Prevent sampling things from task 1 at the beginning of task 0, and
eligible_dataset_ids = [0]
elif step > middles[-1] and samples_left[last_dataset_index]:
# Prevent sampling things from task N-1 at the emd of task N
eligible_dataset_ids = [last_dataset_index]
else:
# 'smooth', but at the boundaries there are actually two or three datasets,
# from future tasks even!
eligible_dataset_ids = list(k for k, v in samples_left.items() if v > 0)
# if len(eligible_dataset_ids) > 2:
# # Prevent sampling from future tasks (past the next task) when at a
# # boundary.
# left_dataset_index = min(eligible_dataset_ids)
# right_dataset_index = min(
# v for v in eligible_dataset_ids if v > left_dataset_index
# )
# eligible_dataset_ids = [left_dataset_index, right_dataset_index]
options = np.array(eligible_dataset_ids, dtype=int)
# Calculate the 'distance' to the center of the task's dataset.
distances = np.abs([step - middles[dataset_index] for dataset_index in options])
# NOTE: THis exponent is kindof arbitrary, setting it to this value because it
# sortof works for MNIST so far.
probs = 1 / (1 + np.abs(distances) ** 2)
probs /= sum(probs)
chosen_dataset = rng.choice(options, p=probs)
chosen_index = indices_left[chosen_dataset].pop()
samples_left[chosen_dataset] -= 1
out_indices.append(chosen_index)
shuffled_indices = np.array(out_indices)
return shuffled_indices
def option3():
shuffled_indices = np.arange(total_length)
for start_index in range(0, total_length - window_length + 1, window_length // 2):
rng.shuffle(shuffled_indices[start_index : start_index + window_length])
for start_index in reversed(range(0, total_length - window_length + 1, window_length // 2)):
rng.shuffle(shuffled_indices[start_index : start_index + window_length])
return shuffled_indices
shuffled_indices = option3()
if all(isinstance(dataset, TaskSet) for dataset in datasets):
# Use the 'concat' from continuum, just to preserve the field/methods of a
# TaskSet.
joined_taskset = concat(datasets)
return subset(joined_taskset, shuffled_indices)
else:
joined_dataset = ConcatDataset(datasets)
return Subset(joined_dataset, shuffled_indices)
return shuffled_indices
from functools import singledispatch
from typing import Sequence, overload
from .wrappers import replace_taskset_attributes
DatasetType = TypeVar("DatasetType", bound=Dataset)
@overload
def subset(dataset: TaskSet, indices: Sequence[int]) -> TaskSet:
...
@singledispatch
def subset(dataset: DatasetType, indices: Sequence[int]) -> Union[Subset, DatasetType]:
raise NotImplementedError(f"Don't know how to take a subset of dataset {dataset}")
return Subset(dataset, indices)
@subset.register
def taskset_subset(taskset: TaskSet, indices: np.ndarray) -> TaskSet:
# x, y, t = taskset.get_raw_samples(indices)
x, y, t = taskset.get_raw_samples(indices)
# TODO: Not sure if/how to handle the `bounding_boxes` attribute here.
bounding_boxes = taskset.bounding_boxes
if bounding_boxes is not None:
bounding_boxes = bounding_boxes[indices]
return replace_taskset_attributes(taskset, x=x, y=y, t=t, bounding_boxes=bounding_boxes)
def random_subset(
taskset: TaskSet, n_samples: int, seed: int = None, ordered: bool = True
) -> TaskSet:
"""Returns a random (ordered) subset of the given TaskSet."""
rng = np.random.default_rng(seed)
dataset_length = len(taskset)
if n_samples > dataset_length:
raise RuntimeError(f"Dataset has {dataset_length}, asked for {n_samples} samples.")
indices = rng.permutation(range(dataset_length))[:n_samples]
# indices = rng.choice(len(taskset), size=n_samples, replace=False)
if ordered:
indices = sorted(indices)
assert len(indices) == n_samples
return subset(taskset, indices)
DatasetType = TypeVar("DatasetType", bound=Dataset)
def shuffle(dataset: DatasetType, seed: int = None) -> DatasetType:
length = len(dataset)
rng = np.random.default_rng(seed)
perm = rng.permutation(range(length))
return subset(dataset, perm)
import torch
from torch import Tensor
def smart_class_prediction(
logits: Tensor, task_labels: Tensor, setting: SLSetting, train: bool
) -> Tensor:
"""Predicts classes which are available, given the task labels."""
unique_task_ids = set(task_labels.unique().cpu().tolist())
classes_in_each_task = {
task_id: setting.task_classes(task_id, train=train) for task_id in unique_task_ids
}
y_pred = limit_to_available_classes(logits, task_labels, classes_in_each_task)
return y_pred
def limit_to_available_classes(
logits: Tensor, task_labels: Tensor, classes_in_each_present_task: Dict[int, List[int]]
) -> Tensor:
B = logits.shape[0]
C = logits.shape[-1]
assert logits.shape[0] == task_labels.shape[0] == B
y_preds = []
indices = torch.arange(C, dtype=torch.long, device=logits.device)
elligible_masks = {
task_id: sum(
[indices == label for label in labels],
start=torch.zeros([C], dtype=bool, device=logits.device),
)
for task_id, labels in classes_in_each_present_task.items()
}
y_preds = []
# TODO: Also return the logits, so we can get a loss for the selected indices?
# logits = []
for logit, task_label in zip(logits, task_labels):
t = task_label.item()
eligible_classes_list = classes_in_each_present_task[t]
eligible_classes = torch.as_tensor(eligible_classes_list, dtype=int, device=logits.device)
is_eligible = elligible_masks[t]
if not is_eligible.any():
# Return a random prediction from the set of possible classes, since
# the network has fewer outputs than there are classes.
# NOTE: This can occur for instance when testing on future tasks
# when using a MultiTask module.
y_pred = eligible_classes[torch.randint(len(eligible_classes), (1,))]
else:
masked_logit = logit[is_eligible]
y_pred_without_offset = masked_logit.argmax(-1)
y_pred = eligible_classes[y_pred_without_offset]
assert y_pred.item() in eligible_classes_list
y_preds.append(y_pred.reshape(())) # Just to make sure they all have the same shape.
return torch.stack(y_preds)
from sequoia.common.transforms.channels import has_channels_last, has_channels_first
@has_channels_last.register(ContinualSLSetting.Observations)
def _has_channels_last(obs: ContinualSLSetting.Observations) -> bool:
return has_channels_last(obs.x)
================================================
FILE: sequoia/settings/sl/continual/setting_test.py
================================================
import functools
from collections import Counter
from pathlib import Path
from typing import Any, ClassVar, Dict, Tuple, Type
import gym
import pytest
import torch
from sklearn.datasets import make_classification
from torch.utils.data import TensorDataset, random_split
from sequoia.common.config import Config
from sequoia.methods import RandomBaselineMethod
from sequoia.settings.base.setting_test import SettingTests
from sequoia.settings.sl.continual.setting import shuffle
from .setting import ContinualSLSetting, random_subset, smooth_task_boundaries_concat
from .wrappers import ShowLabelDistributionWrapper
def test_continuum_shuffle(config: Config):
from continuum.datasets import MNIST
from continuum.scenarios import ClassIncremental
from continuum.tasks import concat
dataset = MNIST(data_path=config.data_dir, train=True)
cl_dataset = concat(ClassIncremental(dataset, increment=2))
shuffled_dataset = shuffle(cl_dataset)
assert (shuffled_dataset._y != cl_dataset._y).sum() > len(cl_dataset) / 2
assert (shuffled_dataset._t != cl_dataset._t).sum() > len(cl_dataset) / 2
class TestContinualSLSetting(SettingTests):
Setting: ClassVar[Type[Setting]] = ContinualSLSetting
# The kwargs to be passed to the Setting when we want to create a 'short' setting.
# TODO: Transform this into a fixture instead.
fast_dev_run_kwargs: ClassVar[Dict[str, Any]] = dict(
dataset="mnist",
batch_size=64,
)
@pytest.fixture(scope="session")
def short_setting(self, session_config):
kwargs = self.fast_dev_run_kwargs.copy()
kwargs["config"] = session_config
setting = self.Setting(**kwargs)
setting.config = session_config
setting.prepare_data()
setting.setup()
# Testing this out: Shortening the train datasets:
setting.train_datasets = [
random_subset(task_dataset, 100) for task_dataset in setting.train_datasets
]
setting.val_datasets = [
random_subset(task_dataset, 100) for task_dataset in setting.val_datasets
]
setting.test_datasets = [
random_subset(task_dataset, 100) for task_dataset in setting.test_datasets
]
assert len(setting.train_datasets) == 5
assert len(setting.val_datasets) == 5
assert len(setting.test_datasets) == 5
assert all(len(dataset) == 100 for dataset in setting.train_datasets)
assert all(len(dataset) == 100 for dataset in setting.val_datasets)
assert all(len(dataset) == 100 for dataset in setting.test_datasets)
# Assert that calling setup doesn't overwrite the datasets.
setting.setup()
assert len(setting.train_datasets) == 5
assert len(setting.val_datasets) == 5
assert len(setting.test_datasets) == 5
assert all(len(dataset) == 100 for dataset in setting.train_datasets)
assert all(len(dataset) == 100 for dataset in setting.val_datasets)
assert all(len(dataset) == 100 for dataset in setting.test_datasets)
return setting
def test_shared_action_space(self, config: Config):
kwargs = self.fast_dev_run_kwargs.copy()
kwargs["config"] = config
if (
isinstance(self.Setting, functools.partial)
and not self.Setting.args[0].shared_action_space
):
# NOTE: This `self.Setting` being a partial instead of a Setting class only
# happens in the tests for the SettingProxy.
kwargs.update(shared_action_space=True)
elif not self.Setting.shared_action_space:
kwargs.update(shared_action_space=True)
setting = self.Setting(**kwargs)
y_counter = Counter()
t_counter = Counter()
test_env = setting.test_dataloader()
for obs, rewards in test_env:
if rewards is None:
action = test_env.action_space.sample()
# NOTE: On the last batch, the rewards might have a smaller batch size
# than the action space.
# TODO: Add tests to check that the envs can explicitly handle this, so
# that we don't give the burden to the Method.
rewards = test_env.send(action)
y = rewards.y.tolist()
t = (
obs.task_labels.tolist()
if obs.task_labels is not None
else [None for _ in range(obs.x.shape[0])]
)
y_counter.update(y)
t_counter.update(t)
# This is what you get with mnist, with the default class ordering:
# if setting.known_task_boundaries_at_train_time:
# # Only the first task of mnist, in this case.
# assert y_counter == {1: 6065, 0: 5534}
assert y_counter == {0: 4926, 1: 5074}
if setting.task_labels_at_test_time:
assert t_counter == {0: 2115, 1: 2042, 3: 1986, 4: 1983, 2: 1874}
else:
assert t_counter == {None: 10_000}
# assert t_counter
# Full Train envs:
# assert y_counter == {1: 27456, 0: 26546}
# assert False, c
def test_only_one_epoch(self, short_setting):
setting = short_setting
train_env = setting.train_dataloader()
for _ in train_env:
pass
if not setting.known_task_boundaries_at_train_time:
assert train_env.is_closed()
with pytest.raises(gym.error.ClosedEnvironmentError):
for _ in train_env:
pass
else:
assert not train_env.is_closed()
@pytest.mark.no_xvfb
@pytest.mark.timeout(20)
@pytest.mark.skipif(
not Path("temp").exists(),
reason="Need temp dir for saving the figure this test creates.",
)
def test_show_distributions(self, config: Config):
setting = self.Setting(dataset="mnist", config=config)
figures_dir = Path("temp")
# fig, axes = plt.subplots(2, 3)
name_to_env_fn = {
"train": setting.train_dataloader,
"valid": setting.val_dataloader,
"test": setting.test_dataloader,
}
# TODO: Maybe add these plots as part of the results for ContinualSL? How much
# memory would actually be needed to store these here?
for i, (name, env_fn) in enumerate(name_to_env_fn.items()):
env = env_fn(batch_size=100, num_workers=4)
env = ShowLabelDistributionWrapper(env, env_name=name)
# Iterate through the env.
for obs, rewards in env:
if rewards is None:
rewards = env.send(env.action_space.sample())
fig = env.make_figure()
fig.set_size_inches((6, 4), forward=False)
save_path = Path(f"{figures_dir}/{setting.get_name()}_{name}.png")
save_path.parent.mkdir(exist_ok=True)
fig.savefig(save_path)
# plt.waitforbuttonpress(10)
# plt.show()
def test_passing_datasets_to_setting(self, config: Config):
image_shape = (16, 16, 3)
n_classes = 10
datasets = [
create_image_classification_dataset(
image_shape=image_shape, n_classes=2, y_offset=i * 2
)
for i in range(5)
]
train_datasets = []
val_datasets = []
test_datasets = []
for dataset in datasets:
n = len(dataset)
n_train_val = int(n * 0.8)
n_test = n - n_train_val
n_train = int(n_train_val * 0.8)
n_valid = n_train_val - n_train
train_val_dataset, test_dataset = random_split(dataset, [n_train_val, n_test])
train_dataset, val_dataset = random_split(train_val_dataset, [n_train, n_valid])
train_datasets.append(train_dataset)
val_datasets.append(val_dataset)
test_datasets.append(test_dataset)
setting = self.Setting(
train_datasets=train_datasets,
val_datasets=val_datasets,
test_datasets=test_datasets,
transforms=[],
# train_transforms=[],
# val_transforms=[],
# test_transforms=[]
)
assert setting.train_datasets is train_datasets
assert setting.val_datasets is val_datasets
assert setting.test_datasets is test_datasets
assert setting.nb_tasks == len(setting.train_datasets)
assert setting.observation_space.x.shape == image_shape
assert setting.reward_space.n == n_classes
from sequoia.conftest import skip_param
from .envs import CTRL_INSTALLED, CTRL_STREAMS
@pytest.mark.skipif(not CTRL_INSTALLED, reason="Need ctrl-benchmark for this test.")
@pytest.mark.parametrize(
"stream",
[
"s_plus",
"s_minus",
"s_in",
"s_out",
"s_pl",
skip_param("s_long", reason="Very long"),
],
)
def test_ctrl_stream_support(self, stream: str, config: Config):
setting_kwargs = self.fast_dev_run_kwargs.copy()
setting_kwargs["dataset"] = stream
setting = self.Setting(**setting_kwargs)
method = RandomBaselineMethod()
results = setting.apply(method, config=config)
self.assert_chance_level(setting, results=results)
def create_image_classification_dataset(
image_shape: Tuple[int, ...],
n_classes: int,
n_samples_per_class: int = 100,
y_offset: int = 0,
):
"""Copied and Adapted from
https://github.com/ContinualAI/avalanche/blob/master/tests/unit_tests_utils.py
"""
# n_classes = 10
# image_shape = (16, 16, 3)
# n_samples_per_class = 100
n_features = np.prod(image_shape)
dataset = make_classification(
n_samples=n_classes * n_samples_per_class,
n_classes=n_classes,
n_features=n_features,
n_informative=n_features,
n_redundant=0,
)
x = torch.from_numpy(dataset[0]).reshape([-1, *image_shape]).float()
y = torch.from_numpy(dataset[1]).long()
# y_offset can be used to get [2,3] rather than [0,1] for instance.
if y_offset:
y += y_offset
return TensorDataset(x, y)
# train_X, test_X, train_y, test_y = train_test_split(
# X, y, train_size=0.6, shuffle=True, stratify=y)
# train_dataset = TensorDataset(train_X, train_y)
# test_dataset = TensorDataset(test_X, test_y)
# return my_nc_benchmark
from typing import List, Tuple
import numpy as np
import pytest
from torch.utils.data import DataLoader
@pytest.mark.timeout(30)
@pytest.mark.no_xvfb
def test_concat_smooth_boundaries(config: Config):
from continuum.datasets import MNIST
from continuum.scenarios import ClassIncremental
from continuum.tasks import split_train_val
dataset = MNIST(config.data_dir, download=True, train=True)
scenario = ClassIncremental(
dataset,
increment=2,
)
print(f"Number of classes: {scenario.nb_classes}.")
print(f"Number of tasks: {scenario.nb_tasks}.")
train_datasets = []
valid_datasets = []
for task_id, train_taskset in enumerate(scenario):
train_taskset, val_taskset = split_train_val(train_taskset, val_split=0.1)
train_datasets.append(train_taskset)
valid_datasets.append(val_taskset)
# train_datasets = [Subset(task_dataset, np.arange(20)) for task_dataset in train_datasets]
train_dataset = smooth_task_boundaries_concat(train_datasets, seed=123)
xs = np.arange(len(train_dataset))
y_counters: List[Counter] = []
t_counters: List[Counter] = []
dataloader = DataLoader(train_dataset, batch_size=100, shuffle=False)
for x, y, t in dataloader:
y_count = Counter(y.tolist())
t_count = Counter(t.tolist())
y_counters.append(y_count)
t_counters.append(t_count)
classes = list(set().union(*y_counters))
nb_classes = len(classes)
x = np.arange(len(dataloader))
import matplotlib.pyplot as plt
fig, axes = plt.subplots(2)
for label in range(nb_classes):
y = [y_counter.get(label) for y_counter in y_counters]
axes[0].plot(x, y, label=f"class {label}")
axes[0].legend()
axes[0].set_title("y")
axes[0].set_xlabel("Batch index")
axes[0].set_ylabel("Count in batch")
for task_id in range(scenario.nb_tasks):
y = [t_counter.get(task_id) for t_counter in t_counters]
axes[1].plot(x, y, label=f"Task id {task_id}")
axes[1].legend()
axes[1].set_title("task_id")
axes[1].set_xlabel("Batch index")
axes[1].set_ylabel("Count in batch")
plt.legend()
# plt.waitforbuttonpress(10)
# plt.show()
================================================
FILE: sequoia/settings/sl/continual/wrappers.py
================================================
from functools import partial, singledispatch
from itertools import accumulate
from typing import Any, Dict, List
import gym
import matplotlib.pyplot as plt
import numpy as np
import torch
from continuum import TaskSet
from torch import Tensor
from sequoia.common.gym_wrappers import IterableWrapper
@singledispatch
def relabel(data: Any, mapping: Dict[int, int] = None) -> Any:
"""Relabels the given data (from a task) so they all share the same action space."""
raise NotImplementedError(f"Don't know how to relabel {data} of type {type(data)}")
@relabel.register
def relabel_ndarray(y: np.ndarray, mapping: Dict[int, int] = None) -> np.ndarray:
new_y = y.copy()
mapping = mapping or {c: i for i, c in enumerate(np.unique(y))}
for old_label, new_label in mapping.items():
new_y[y == old_label] = new_label
return new_y
@relabel.register
def relabel_tensor(y: Tensor, mapping: Dict[int, int] = None) -> Tensor:
new_y = y.copy()
mapping = mapping or {c: i for i, c in enumerate(torch.unique(y))}
for old_label, new_label in mapping.items():
new_y[y == old_label] = new_label
return new_y
@relabel.register
def relabel_taskset(task_set: TaskSet, mapping: Dict[int, int] = None) -> TaskSet:
mapping = mapping or {c: i for i, c in enumerate(task_set.get_classes())}
old_y = task_set._y
new_y = relabel(old_y, mapping=mapping)
assert not task_set.target_trsf
# TODO: Two options here: Either create a new 'y' array, OR add a target_trsf that
# does the remapping. Not sure if there's a benefit in doing one vs the other atm.
# NOTE: Choosing to replace the `y` to make sure that the concatenated datasets keep
# the transformed y.
new_taskset = replace_taskset_attributes(task_set, y=new_y)
return new_taskset
from sequoia.utils.generic_functions.replace import replace
@replace.register
def replace_taskset_attributes(task_set: TaskSet, **kwargs) -> TaskSet:
new_kwargs = dict(
x=task_set._x,
y=task_set._y,
t=task_set._t,
trsf=task_set.trsf,
target_trsf=task_set.target_trsf,
data_type=task_set.data_type,
bounding_boxes=task_set.bounding_boxes,
)
new_kwargs.update(kwargs)
return type(task_set)(**new_kwargs)
class SharedActionSpaceWrapper(IterableWrapper):
# """ Wrapper that gets applied to a ContinualSLEnvironment
def __init__(self, env: gym.Env, task_classes: List[int]):
self.task_classes = task_classes
super().__init__(env=env, f=partial(relabel, task_classes=self.task_classes))
from collections import Counter
from .environment import ContinualSLEnvironment
from .objects import ObservationType, RewardType
class ShowLabelDistributionWrapper(IterableWrapper[ContinualSLEnvironment]):
"""Wrapper around a SL environment that shows the distribution of the labels.
Shows the distributions of the task labels, if applicable.
"""
def __init__(self, env: ContinualSLEnvironment, env_name: str):
super().__init__(env=env)
self.env_name = env_name
# IDEA: Could use bins for continuous values ?
# IDEA: Also use a counter for the actions?
self.counters: Dict[str, List[Counter]] = {
"y": [],
"t": [],
}
def observation(self, observation: ObservationType) -> ObservationType:
t = observation.task_labels
if t is None:
t = [None] * observation.batch_size
if isinstance(t, Tensor):
t = t.cpu().numpy()
t_count = Counter(t)
self.counters["t"].append(t_count)
return observation
def reward(self, reward: RewardType) -> RewardType:
y = reward.y.cpu().numpy()
y_count = Counter(y)
self.counters["y"].append(y_count)
return reward
def make_figure(self) -> plt.Figure:
fig: plt.Figure
axes: List[plt.Axes]
fig, axes = plt.subplots(len(self.counters))
# total_length: int = sum(sum(counter.values()) for counter in self.y_counters)
for i, (name, counters) in enumerate(self.counters.items()):
# Values for the x axis are the number of samples seen so far for each
# batch.
x = list(accumulate(sum(counter.values()) for counter in counters))
unique_values = list(sorted(set().union(*counters)))
for label in unique_values:
y = [counter.get(label) for counter in counters]
axes[i].plot(x, y, label=f"{name}={label}")
axes[i].legend()
axes[i].set_title(f"{self.env_name} {name}")
axes[i].set_xlabel("Batch index")
axes[i].set_ylabel("Count in batch")
fig.set_size_inches((6, 4), forward=False)
fig.legend()
return fig
================================================
FILE: sequoia/settings/sl/discrete/__init__.py
================================================
from .setting import DiscreteTaskAgnosticSLSetting
================================================
FILE: sequoia/settings/sl/discrete/setting.py
================================================
from dataclasses import dataclass
from sequoia.settings.assumptions.context_discreteness import DiscreteContextAssumption
from sequoia.settings.sl.continual import ContinualSLSetting
@dataclass
class DiscreteTaskAgnosticSLSetting(DiscreteContextAssumption, ContinualSLSetting):
"""Continual Supervised Learning Setting where there are clear task boundaries, but
where the task information isn't available.
"""
================================================
FILE: sequoia/settings/sl/discrete/setting_test.py
================================================
from typing import Any, ClassVar, Dict, Type
from sequoia.settings.sl.continual.setting_test import (
TestContinualSLSetting as ContinualSLSettingTests,
)
from .setting import DiscreteTaskAgnosticSLSetting
class TestDiscreteTaskAgnosticSLSetting(ContinualSLSettingTests):
Setting: ClassVar[Type[Setting]] = DiscreteTaskAgnosticSLSetting
# The kwargs to be passed to the Setting when we want to create a 'short' setting.
fast_dev_run_kwargs: ClassVar[Dict[str, Any]] = dict(
dataset="mnist",
batch_size=64,
)
================================================
FILE: sequoia/settings/sl/domain_incremental/__init__.py
================================================
from .setting import DomainIncrementalSLSetting
================================================
FILE: sequoia/settings/sl/domain_incremental/setting.py
================================================
from dataclasses import dataclass
from sequoia.settings.sl.incremental.setting import IncrementalSLSetting
from sequoia.utils.utils import constant
@dataclass
class DomainIncrementalSLSetting(IncrementalSLSetting):
"""Supervised CL Setting where the input domain shifts incrementally.
Task labels and task boundaries are given at training time, but not at test-time.
The crucial difference between the Domain-Incremental and Class-Incremental settings
is that the action space is smaller in domain-incremental learning, as it is a
`Discrete(n_classes_per_task)`, rather than the `Discrete(total_classes)` in
Class-Incremental setting.
For example: Create a classifier for odd vs even hand-written digits. It first be
trained on digits 0 and 1, then digits 2 and 3, then digits 4 and 5, etc.
At evaluation time, it will be evaluated on all digits
"""
shared_action_space: bool = constant(True)
================================================
FILE: sequoia/settings/sl/domain_incremental/setting_test.py
================================================
import itertools
from typing import Any, ClassVar, Dict, Type
import numpy as np
from gym import spaces
from gym.spaces import Discrete
from sequoia.common.metrics import ClassificationMetrics
from sequoia.common.spaces import Image, TypedDictSpace
from sequoia.settings.sl.incremental.setting_test import (
TestIncrementalSLSetting as IncrementalSLSettingTests,
)
from .setting import DomainIncrementalSLSetting
class TestDiscreteTaskAgnosticSLSetting(IncrementalSLSettingTests):
Setting: ClassVar[Type[Setting]] = DomainIncrementalSLSetting
# The kwargs to be passed to the Setting when we want to create a 'short' setting.
fast_dev_run_kwargs: ClassVar[Dict[str, Any]] = dict(
dataset="mnist",
batch_size=64,
)
# Override how we measure 'chance' accuracy for DomainIncrementalSetting.
def assert_chance_level(
self,
setting: DomainIncrementalSLSetting,
results: DomainIncrementalSLSetting.Results,
):
assert isinstance(setting, DomainIncrementalSLSetting), setting
assert isinstance(results, DomainIncrementalSLSetting.Results), results
# TODO: Remove this assertion:
assert isinstance(setting.action_space, spaces.Discrete)
# TODO: This test so far needs the 'N' to be the number of classes in total,
# not the number of classes per task.
num_classes = setting.action_space.n # <-- Should be using this instead.
average_accuracy = results.objective
# Calculate the expected 'average' chance accuracy.
# We assume that there is an equal number of classes in each task.
chance_accuracy = 1 / num_classes
assert 0.5 * chance_accuracy <= average_accuracy <= 1.5 * chance_accuracy
for i, metric in enumerate(results.final_performance_metrics):
assert isinstance(metric, ClassificationMetrics)
# TODO: Same as above: Should be using `n_classes_per_task` or something
# like it instead.
chance_accuracy = 1 / num_classes
task_accuracy = metric.accuracy
# FIXME: Look into this, we're often getting results substantially
# worse than chance, and to 'make the tests pass' (which is bad)
# we're setting the lower bound super low, which makes no sense.
assert 0.25 * chance_accuracy <= task_accuracy <= 2.1 * chance_accuracy
def test_domain_incremental_mnist_setup():
setting = DomainIncrementalSLSetting(
dataset="mnist",
increment=2,
)
setting.prepare_data(data_dir="data")
setting.setup()
assert setting.observation_space == TypedDictSpace(
x=Image(0.0, 1.0, (3, 28, 28), np.float32),
task_labels=Discrete(5),
dtype=setting.Observations,
)
assert setting.observation_space.dtype == setting.Observations
assert setting.action_space == spaces.Discrete(2)
assert setting.reward_space == spaces.Discrete(2)
for i in range(setting.nb_tasks):
setting.current_task_id = i
batch_size = 5
train_loader = setting.train_dataloader(batch_size=batch_size)
for j, (observations, rewards) in enumerate(itertools.islice(train_loader, 100)):
x = observations.x
t = observations.task_labels
y = rewards.y
print(i, j, y, t)
assert x.shape == (batch_size, 3, 28, 28)
assert ((0 <= y) & (y < setting.n_classes_per_task)).all()
assert all(t == i)
x = x.permute(0, 2, 3, 1)[0]
assert x.shape == (28, 28, 3)
rewards_ = train_loader.send([4 for _ in range(batch_size)])
assert (rewards.y == rewards_.y).all()
train_loader.close()
test_loader = setting.test_dataloader(batch_size=batch_size)
for j, (observations, rewards) in enumerate(itertools.islice(test_loader, 100)):
assert rewards is None
x = observations.x
t = observations.task_labels
assert t is None
assert x.shape == (batch_size, 3, 28, 28)
x = x.permute(0, 2, 3, 1)[0]
assert x.shape == (28, 28, 3)
rewards = test_loader.send([0 for _ in range(batch_size)])
assert rewards is not None
y = rewards.y
assert ((0 <= y) & (y < setting.n_classes_per_task)).all()
================================================
FILE: sequoia/settings/sl/environment.py
================================================
"""TODO: Creates a Gym Environment (and DataLoader) from a traditional
Supervised dataset.
"""
from collections import deque
from typing import *
import gym
import numpy as np
from gym import spaces
from gym.vector.utils import batch_space
from torch import Tensor
from torch.utils.data import DataLoader, Dataset, IterableDataset
from torch.utils.data.dataloader import _BaseDataLoaderIter
from sequoia.common.gym_wrappers.convert_tensors import add_tensor_support
from sequoia.common.gym_wrappers.utils import tile_images
from sequoia.common.spaces import Image
from sequoia.common.transforms import Transforms
from sequoia.settings.base.environment import Environment
from sequoia.settings.base.objects import (
Actions,
ActionType,
Observations,
ObservationType,
Rewards,
RewardType,
)
from sequoia.utils.logging_utils import get_logger
logger = get_logger(__name__)
class PassiveEnvironment(
DataLoader,
Environment[Tuple[ObservationType, Optional[ActionType]], ActionType, RewardType],
):
"""Environment in which actions have no influence on future observations.
Can either be iterated on like a normal DataLoader, in which case it gives
back the observation and the reward at the same time, or as a gym
Environment, in which case it gives the rewards and the next batch of
observations once an action is given.
Normal supervised datasets such as Mnist, ImageNet, etc. fit under this
category. Similarly to Environment, this just adds some methods on top of
the usual PyTorch DataLoader.
"""
passive: ClassVar[bool] = True
metadata = {"render.modes": ["rgb_array", "human"]}
def __init__(
self,
dataset: Union[IterableDataset, Dataset],
split_batch_fn: Callable[[Tuple[Any, ...]], Tuple[ObservationType, ActionType]] = None,
observation_space: gym.Space = None,
action_space: gym.Space = None,
reward_space: gym.Space = None,
n_classes: int = None,
pretend_to_be_active: bool = False,
strict: bool = False,
drop_last: bool = False,
**kwargs,
):
"""Creates the DataLoader/Environment for the given dataset.
Parameters
----------
dataset : Union[IterableDataset, Dataset]
The dataset to iterate on. Should ideally be indexable (a Map-style
dataset).
split_batch_fn : Callable[ [Tuple[Any, ...]], Tuple[ObservationType, ActionType] ], optional
A function to call on each item in the dataset in order to split it into
Observations and Rewards, by default None, in which case we assume that the
dataset items are tuples of length 2.
observation_space : gym.Space, optional
The single (non-batched) observation space. Default to `None`, in which case
this will try to infer the shape of the space using the first item in the
dataset.
action_space : gym.Space, optional
The non-batched action space. Defaults to None, in which case the
`n_classes` argument must be passed, and the action space is assumed to be
discrete (i.e. that the loader is for a classification dataset).
reward_space : gym.Space, optional
The non-batched reward (label) space. Defaults to `None`, in which case it
will be the same as the action space (as is the case in classification).
n_classes : int, optional
Number of classes in the dataset. Used in case `action_space` isn't passed.
Defaults to `None`.
pretend_to_be_active : bool, optional
Wether to withhold the rewards (labels) from the batches when being
iterated on like the usual dataloader, and to only give them back
after an action is received through the 'send' method. False by
default, in which case this behaves exactly as a normal dataloader
when being iterated on.
When False, the batches yielded by this dataloader will be of the form
`Tuple[Observations, Rewards]` (as usual in SL).
However, when set to True, the batches will be `Tuple[Observations, None]`!
Rewards will then be returned by the environment when an action is passed to
the Send method.
strict : bool, optional
[description], by default False
# Examples:
```python
train_env = PassiveEnvironment(MNIST("data"), batch_size=32, num_classes=10)
# The usual Dataloader-style:
for x, y in train_env:
# train as usual
(...)
# OpenAI Gym style:
for episode in range(5):
# NOTE: "episode" in RL is an "epoch" in SL:
obs = train_env.reset()
done = False
while not done:
actions = train_env.action_space.sample()
obs, rewards, done, info = train_env.step(actions)
```
"""
super().__init__(dataset=dataset, drop_last=drop_last, **kwargs)
self.split_batch_fn = split_batch_fn
# TODO: When the spaces aren't passed explicitly, assumes a classification dataset.
if not observation_space:
# NOTE: Assuming min/max of 0 and 1 respectively, but could actually use
# min_max of the dataset samples too.
first_item = self.dataset[0]
if isinstance(first_item, tuple):
x, *_ = first_item
else:
assert isinstance(first_item, (np.ndarray, Tensor))
x = first_item
observation_space = Image(0.0, 1.0, x.shape)
if not action_space:
assert n_classes, "must pass either `action_space`, or `n_classes` for now"
action_space = spaces.Discrete(n_classes)
elif isinstance(action_space, spaces.Discrete):
n_classes = action_space.n
if not reward_space:
# Assuming a classification dataset by default:
# (action space = reward space = Discrete(n_classes))
reward_space = action_space
assert observation_space
assert action_space
assert reward_space
self.single_observation_space: gym.Space = observation_space
self.single_action_space: gym.Space = action_space
self.single_reward_space: gym.Space = reward_space
if self.batch_size:
observation_space = batch_space(observation_space, self.batch_size)
action_space = batch_space(action_space, self.batch_size)
reward_space = batch_space(reward_space, self.batch_size)
self.observation_space: gym.Space = add_tensor_support(observation_space)
self.action_space: gym.Space = add_tensor_support(action_space)
self.reward_space: gym.Space = add_tensor_support(reward_space)
self.pretend_to_be_active = pretend_to_be_active
self._strict = strict
self._reward_queue = deque(maxlen=10)
self.n_classes: Optional[int] = n_classes
self._iterator: Optional[_BaseDataLoaderIter] = None
# NOTE: These here are never processed with self.observation or self.reward.
self._previous_batch: Optional[Tuple[ObservationType, RewardType]] = None
self._current_batch: Optional[Tuple[ObservationType, RewardType]] = None
self._next_batch: Optional[Tuple[ObservationType, RewardType]] = None
self._done: Optional[bool] = None
self._is_closed: bool = False
self._action: Optional[ActionType] = None
# from gym.envs.classic_control.rendering import SimpleImageViewer
self.viewer = None
def is_closed(self) -> bool:
return self._is_closed
def reset(self) -> ObservationType:
"""Resets the env by deleting and re-creating the dataloader iterator.
TODO: This might be pretty expensive, since it's maybe re-creating all the
worker processes. There might be an easier way of going about this.
Returns the first batch of observations.
"""
if self._is_closed:
raise gym.error.ClosedEnvironmentError("Can't reset: Env is closed.")
self._iterator = super().__iter__()
self._previous_batch = None
self._current_batch = self.get_next_batch()
self._done = False
obs = self._current_batch[0]
return self.observation(obs)
def close(self) -> None:
if not self._is_closed:
if self.viewer:
self.viewer.close()
if self.num_workers > 0 and self._iterator:
self._iterator._shutdown_workers()
self._is_closed = True
def __del__(self):
if not self._is_closed:
self.close()
def render(self, mode: str = "rgb_array") -> np.ndarray:
observations = self._current_batch[0]
if isinstance(observations, Observations):
image_batch = observations.x
else:
assert isinstance(observations, Tensor)
image_batch = observations
if isinstance(image_batch, Tensor):
image_batch = image_batch.cpu().numpy()
if self.batch_size:
image_batch = tile_images(image_batch)
image_batch = Transforms.channels_last_if_needed(image_batch)
image_batch = Transforms.three_channels(image_batch)
assert image_batch.shape[-1] in {3, 4}, image_batch.shape
if image_batch.dtype == np.float32:
assert (0 <= image_batch).all() and (image_batch <= 1).all()
image_batch = (256 * image_batch).astype(np.uint8)
assert image_batch.dtype == np.uint8
if mode == "rgb_array":
# NOTE: Need to create a single image, channels_last format, and
# possibly even of dtype uint8, in order for things like Monitor to
# work.
return image_batch
if mode == "human":
# return plt.imshow(image_batch)
if self.viewer is None:
display = None
# TODO: There seems to be a bit of a bug, tests sometime fail because
# "Can't connect to display: None" etc.
from gym.utils import pyglet_rendering
# from pyvirtualdisplay import Display
# display = Display(visible=0, size=(1366, 768))
# display.start()
self.viewer = pyglet_rendering.SimpleImageViewer()
self.viewer.imshow(image_batch)
return self.viewer.isopen
raise NotImplementedError(f"Unsuported mode {mode}")
def get_next_batch(self) -> Tuple[ObservationType, RewardType]:
"""Gets the next batch from the underlying dataset.
Uses the `split_batch_fn`, if needed. Does NOT apply the self.observation
and self.reward methods.
Returns
-------
Tuple[ObservationType, RewardType]
[description]
"""
if self._is_closed:
raise gym.error.ClosedEnvironmentError("Can't get the next batch: Env is closed.")
if self._iterator is None:
self._iterator = super().__iter__()
try:
batch = next(self._iterator)
except StopIteration:
batch = None
if self.split_batch_fn and batch is not None:
batch = self.split_batch_fn(batch)
return batch
# obs, reward = batch
# return self.observation(obs), self.reward(reward)
def step(self, action: ActionType) -> Tuple[ObservationType, RewardType, bool, Dict]:
if self._is_closed:
raise gym.error.ClosedEnvironmentError("Can't step on a closed env.")
if self._done is None:
raise gym.error.ResetNeeded("Need to reset the env before calling step.")
if self._done:
raise gym.error.ResetNeeded("Need to reset the env since it is done.")
# Transform the Action, if needed:
action = self.action(action)
# NOTE: This prev/current/next setup is so we can give the right 'done'
# signal.
self._previous_batch = self._current_batch
if self._next_batch is None:
# This should only ever happen right after resetting.
self._next_batch = self.get_next_batch()
self._current_batch = self._next_batch
self._next_batch = self.get_next_batch()
# self._next_batch = self._observations, self._rewards
assert self._previous_batch is not None
# TODO: Return done=True when the iterator is exhausted?
self._done = self._next_batch is None
obs = self._current_batch[0]
reward = self._previous_batch[1]
# Empty for now I guess?
info = {}
return obs, reward, self._done, info
def action(self, action: ActionType) -> ActionType:
"""Transform the action, if needed.
Parameters
----------
action : ActionType
[description]
Returns
-------
ActionType
[description]
"""
return action
def observation(self, observation: ObservationType) -> ObservationType:
"""Transform the observation, if needed.
Parameters
----------
observation : ObservationType
[description]
Returns
-------
ObservationType
[description]
"""
return observation
def reward(self, reward: RewardType) -> RewardType:
"""Transform the reward, if needed.
Parameters
----------
reward : RewardType
[description]
Returns
-------
RewardType
[description]
"""
return reward
def get_info(self) -> Dict:
"""Returns the dict to be returned as the 'info' in step().
IDEA: We could subclass this to change whats in the 'info' dict, maybe
add some task information?
Returns
-------
Dict
[description]
"""
return {}
def __iter__(self) -> Iterable[Tuple[ObservationType, Optional[RewardType]]]:
"""Iterate over the dataset, yielding batches of Observations and
Rewards, just like a regular DataLoader.
"""
# if self.split_batch_fn:
# return map(self.split_batch_fn, super().__iter__())
# else:
# return super().__iter__()
if self._is_closed:
raise gym.error.ClosedEnvironmentError("Can't iterate over closed env.")
for batch in super().__iter__():
if self.split_batch_fn:
observations, rewards = self.split_batch_fn(batch)
else:
if len(batch) != 2:
raise RuntimeError(
f"You need to pass a `split_batch_fn` to create "
f"observations and rewards, since batch doesn't have "
f"2 items: {batch}"
)
observations, rewards = batch
# Apply any transformations (in case this is wrapped with
# TransformObservation or something similar)
self._observations = self.observation(observations)
self._rewards = self.reward(rewards)
self._previous_batch = self._current_batch
self._current_batch = (self._observations, self._rewards)
if self.pretend_to_be_active:
self._action = None
self._reward_queue.append(self._rewards)
yield self._observations, None
if self._action is None:
if self._strict:
# IDEA: yield the same observation, as long as we dont receive an action.
raise RuntimeError("Need to send an action between each observations.")
logger.warning("Didn't receive an action, rewards will be delayed!.")
else:
yield self._observations, self._rewards
def send(self, action: Actions) -> Rewards:
"""Return the last latch of rewards from the dataset (which were
withheld if in 'active' mode)
"""
if self.pretend_to_be_active:
self._action = action
return self._reward_queue.popleft()
else:
# NOTE: What about sending the reward as well this way?
return self._rewards
================================================
FILE: sequoia/settings/sl/environment_test.py
================================================
from typing import ClassVar, Iterable, Tuple, Type
import gym
import numpy as np
import pytest
import torch
from gym import spaces
from torch import Tensor
from torch.utils.data import Subset, TensorDataset
from torchvision.datasets import MNIST
from sequoia.common.gym_wrappers import TransformObservation
from sequoia.common.spaces import Image
from sequoia.common.transforms import Compose, Transforms
from .environment import PassiveEnvironment
def check_env(env: PassiveEnvironment):
"""Perform a step gym-style and dataloader-style and check that items
fit their respective spaces.
"""
reset_obs = env.reset()
# Test out the reset & step methods (gym style)
assert reset_obs in env.observation_space, reset_obs.shape
assert env.observation_space.sample() in env.observation_space
assert env.action_space.sample() in env.action_space
assert env.reward_space == env.action_space
step_obs, step_rewards, done, info = env.step(env.action_space.sample())
assert step_obs in env.observation_space
assert step_rewards in env.reward_space
# TODO: Should passive environments return a single 'done' value? or a list
# like vectorized environments in RL?
assert not done # shouldn't be `done`.
for iter_obs, iter_rewards in env:
assert iter_obs in env.observation_space, iter_obs.shape
assert iter_rewards in env.reward_space
break
else:
assert False, "should have iterated"
class TestPassiveEnvironment:
# NOTE: Defining tests in a class like this so we can reuse them while changing some
# component, for example in the case of `env_proxy_test.py`.
PassiveEnvironment: ClassVar[Type[PassiveEnvironment]] = PassiveEnvironment
@pytest.fixture(scope="session")
def mnist_dataset(self):
transforms = Compose([Transforms.to_tensor, Transforms.three_channels])
dataset = MNIST("data", transform=transforms)
return dataset
def test_passive_environment_as_dataloader(self, mnist_dataset):
batch_size = 1
transforms = Compose([Transforms.to_tensor, Transforms.three_channels])
dataset = mnist_dataset
obs_space = Image(0, 255, (1, 28, 28), np.uint8)
obs_space = transforms(obs_space)
env: Iterable[Tuple[Tensor, Tensor]] = self.PassiveEnvironment(
dataset,
batch_size=batch_size,
n_classes=10,
observation_space=obs_space,
)
for x, y in env:
assert x.shape == (batch_size, 3, 28, 28)
x = x.permute(0, 2, 3, 1)
assert y.tolist() == [5]
break
# reward = env.send(4)
# assert reward is None, reward
# plt.imshow(x[0])
# plt.title(f"y: {y[0]}")
# plt.waitforbuttonpress(10)
def test_mnist_as_gym_env(self, mnist_dataset):
# from continuum.datasets import MNIST
dataset = mnist_dataset
batch_size = 4
env = self.PassiveEnvironment(dataset, n_classes=10, batch_size=batch_size)
assert env.observation_space.shape == (batch_size, 3, 28, 28)
assert env.action_space.shape == (batch_size,)
assert env.reward_space.shape == (batch_size,)
env.seed(123)
obs = env.reset()
assert obs.shape == (batch_size, 3, 28, 28)
for i in range(10):
obs, reward, done, info = env.step(env.action_space.sample())
assert obs.shape == (batch_size, 3, 28, 28)
assert reward.shape == (batch_size,)
assert not done
env.close()
def test_env_gives_done_on_last_item(self):
# from continuum.datasets import MNIST
max_samples = 100
batch_size = 1
dataset = MNIST(
"data", transform=Compose([Transforms.to_tensor, Transforms.three_channels])
)
dataset = Subset(dataset, list(range(max_samples)))
env = self.PassiveEnvironment(dataset, n_classes=10, batch_size=batch_size)
assert env.observation_space.shape == (batch_size, 3, 28, 28)
assert env.action_space.shape == (batch_size,)
assert env.reward_space.shape == (batch_size,)
env.seed(123)
obs = env.reset()
assert obs.shape == (batch_size, 3, 28, 28)
# Starting at 1 since reset() gives one observation already.
for i in range(1, max_samples):
obs, reward, done, info = env.step(env.action_space.sample())
assert obs.shape == (batch_size, 3, 28, 28)
assert reward.shape == (batch_size,)
assert done == (i == max_samples - 1), i
if done:
break
else:
assert False, "Should have reached done=True!"
assert i == max_samples - 1
env.close()
def test_env_done_works_with_batch_size(self):
# from continuum.datasets import MNIST
max_samples = 100
batch_size = 5
max_batches = max_samples // batch_size
dataset = MNIST(
"data", transform=Compose([Transforms.to_tensor, Transforms.three_channels])
)
dataset = Subset(dataset, list(range(max_samples)))
env = self.PassiveEnvironment(dataset, n_classes=10, batch_size=batch_size)
assert env.observation_space.shape == (batch_size, 3, 28, 28)
assert env.action_space.shape == (batch_size,)
assert env.reward_space.shape == (batch_size,)
env.seed(123)
obs = env.reset()
assert obs.shape == (batch_size, 3, 28, 28)
# Starting at 1 since reset() gives one observation already.
for i in range(1, max_batches):
obs, reward, done, info = env.step(env.action_space.sample())
assert obs.shape == (batch_size, 3, 28, 28)
assert reward.shape == (batch_size,)
assert done == (i == max_batches - 1), i
if done:
break
else:
assert False, "Should have reached done=True!"
assert i == max_batches - 1
env.close()
def test_multiple_epochs_env(self):
max_epochs = 3
max_samples = 100
batch_size = 5
max_batches = max_samples // batch_size
dataset = MNIST(
"data", transform=Compose([Transforms.to_tensor, Transforms.three_channels])
)
dataset = Subset(dataset, list(range(max_samples)))
env = self.PassiveEnvironment(dataset, n_classes=10, batch_size=batch_size)
assert env.observation_space.shape == (batch_size, 3, 28, 28)
assert env.action_space.shape == (batch_size,)
assert env.reward_space.shape == (batch_size,)
env.seed(123)
total_steps = 0
for epoch in range(max_epochs):
obs = env.reset()
total_steps += 1
assert obs.shape == (batch_size, 3, 28, 28)
# Starting at 1 since reset() gives one observation already.
for i in range(1, max_batches):
obs, reward, done, info = env.step(env.action_space.sample())
assert obs.shape == (batch_size, 3, 28, 28)
assert reward.shape == (batch_size,)
assert done == (i == max_batches - 1), i
total_steps += 1
if done:
break
else:
assert False, "Should have reached done=True!"
assert i == max_batches - 1
assert total_steps == max_batches * max_epochs
env.close()
def test_cant_iterate_after_closing_passive_env(self):
max_epochs = 3
max_samples = 200
batch_size = 5
max_batches = max_samples // batch_size
dataset = MNIST(
"data", transform=Compose([Transforms.to_tensor, Transforms.three_channels])
)
dataset = Subset(dataset, list(range(max_samples)))
env = self.PassiveEnvironment(dataset, n_classes=10, batch_size=batch_size, num_workers=4)
assert env.observation_space.shape == (batch_size, 3, 28, 28)
assert env.action_space.shape == (batch_size,)
assert env.reward_space.shape == (batch_size,)
total_steps = 0
for epoch in range(max_epochs):
for obs, reward in env:
assert obs.shape == (batch_size, 3, 28, 28)
assert reward.shape == (batch_size,)
total_steps += 1
assert total_steps == max_batches * max_epochs
env.close()
with pytest.raises(gym.error.ClosedEnvironmentError):
for _ in zip(range(3), env):
pass
with pytest.raises(gym.error.ClosedEnvironmentError):
env.reset()
with pytest.raises(gym.error.ClosedEnvironmentError):
env.get_next_batch()
with pytest.raises(gym.error.ClosedEnvironmentError):
env.step(env.action_space.sample())
def test_multiple_epochs_dataloader(self):
"""Test that we can iterate on the dataloader more than once."""
max_epochs = 3
max_samples = 200
batch_size = 5
max_batches = max_samples // batch_size
dataset = MNIST(
"data", transform=Compose([Transforms.to_tensor, Transforms.three_channels])
)
dataset = Subset(dataset, list(range(max_samples)))
env = self.PassiveEnvironment(dataset, n_classes=10, batch_size=batch_size)
assert env.observation_space.shape == (batch_size, 3, 28, 28)
assert env.action_space.shape == (batch_size,)
assert env.reward_space.shape == (batch_size,)
total_steps = 0
for epoch in range(max_epochs):
for obs, reward in env:
assert obs.shape == (batch_size, 3, 28, 28)
assert reward.shape == (batch_size,)
total_steps += 1
assert total_steps == max_batches * max_epochs
def test_multiple_epochs_dataloader_with_split_batch_fn(self):
"""Test that we can iterate on the dataloader more than once."""
max_epochs = 3
max_samples = 200
batch_size = 5
def split_batch_fn(batch):
(
x,
y,
) = batch
# some dummy function.
return torch.zeros_like(x), y
max_batches = max_samples // batch_size
dataset = MNIST(
"data", transform=Compose([Transforms.to_tensor, Transforms.three_channels])
)
dataset = Subset(dataset, list(range(max_samples)))
env = self.PassiveEnvironment(
dataset, n_classes=10, batch_size=batch_size, split_batch_fn=split_batch_fn
)
assert env.observation_space.shape == (batch_size, 3, 28, 28)
assert env.action_space.shape == (batch_size,)
assert env.reward_space.shape == (batch_size,)
total_steps = 0
for epoch in range(max_epochs):
for obs, reward in env:
assert obs.shape == (batch_size, 3, 28, 28)
assert torch.all(obs == 0)
assert reward.shape == (batch_size,)
total_steps += 1
assert total_steps == max_batches * max_epochs
def test_env_requires_reset_before_step(self):
# from continuum.datasets import MNIST
max_samples = 100
batch_size = 5
max_batches = max_samples // batch_size
dataset = MNIST(
"data", transform=Compose([Transforms.to_tensor, Transforms.three_channels])
)
dataset = Subset(dataset, list(range(max_samples)))
env = self.PassiveEnvironment(dataset, n_classes=10, batch_size=batch_size)
with pytest.raises(gym.error.ResetNeeded):
env.step(env.action_space.sample())
def test_split_batch_fn(self):
# from continuum.datasets import MNIST
batch_size = 5
max_batches = 10
def split_batch_fn(
batch: Tuple[Tensor, Tensor, Tensor]
) -> Tuple[Tuple[Tensor, Tensor], Tensor]:
x, y, t = batch
return (x, t), y
# dataset = MNIST("data", transform=Compose([Transforms.to_tensor, Transforms.three_channels]))
from continuum import ClassIncremental
from continuum.datasets import MNIST
scenario = ClassIncremental(
MNIST("data", download=True, train=True),
increment=2,
transformations=Compose([Transforms.to_tensor, Transforms.three_channels]),
)
classes_per_task = scenario.nb_classes // scenario.nb_tasks
print(f"Number of classes per task {classes_per_task}.")
for i, task_dataset in enumerate(scenario):
env = self.PassiveEnvironment(
task_dataset,
n_classes=classes_per_task,
batch_size=batch_size,
split_batch_fn=split_batch_fn,
# Need to pass the observation space, in this case.
observation_space=spaces.Dict(
x=spaces.Box(low=0, high=1, shape=(3, 28, 28)),
t=spaces.Discrete(scenario.nb_tasks), # task label
),
action_space=spaces.Box(
low=np.array([i * classes_per_task]),
high=np.array([(i + 1) * classes_per_task]),
dtype=int,
),
)
assert spaces.Box(
low=np.array([i * classes_per_task]),
high=np.array([(i + 1) * classes_per_task]),
dtype=int,
).shape == (1,)
assert isinstance(env.observation_space["x"], spaces.Box)
assert env.observation_space["x"].shape == (batch_size, 3, 28, 28)
assert env.observation_space["t"].shape == (batch_size,)
assert env.action_space.shape == (batch_size, 1)
assert env.reward_space.shape == (batch_size, 1)
env.seed(123)
obs = env.reset()
assert len(obs) == 2
x, t = obs
assert x.shape == (batch_size, 3, 28, 28)
assert t.shape == (batch_size,)
obs, reward, done, info = env.step(env.action_space.sample())
assert x.shape == (batch_size, 3, 28, 28)
assert t.shape == (batch_size,)
assert reward.shape == (batch_size,)
assert not done
env.close()
def test_observation_wrapper_applied_to_passive_environment(self):
"""Test that when we apply a gym wrapper to a PassiveEnvironment, it also
affects the observations / actions / rewards produced when iterating on the
env.
"""
batch_size = 5
transforms = Compose([Transforms.to_tensor, Transforms.three_channels])
dataset = MNIST("data", transform=transforms)
obs_space = Image(0, 255, (1, 28, 28), np.uint8)
obs_space = transforms(obs_space)
dataset.classes
env = self.PassiveEnvironment(
dataset,
n_classes=10,
batch_size=batch_size,
observation_space=obs_space,
)
assert env.observation_space == Image(0, 1, (batch_size, 3, 28, 28))
assert env.action_space.shape == (batch_size,)
assert env.reward_space == env.action_space
env.seed(123)
check_env(env)
# Apply a transformation that changes the observation space.
env = TransformObservation(env=env, f=Compose([Transforms.resize_64x64]))
assert env.observation_space == Image(0, 1, (batch_size, 3, 64, 64))
assert env.action_space.shape == (batch_size,)
assert env.reward_space.shape == (batch_size,)
env.seed(123)
check_env(env)
env.close()
# from continuum import ClassIncremental
# from continuum.datasets import MNIST
# from continuum.tasks import split_train_val
def test_passive_environment_interaction(self):
"""Test the gym.Env-style interaction with a PassiveEnvironment."""
batch_size = 5
transforms = Compose([Transforms.to_tensor, Transforms.three_channels])
dataset = MNIST(
"data", transform=Compose([Transforms.to_tensor, Transforms.three_channels])
)
max_samples = 100
dataset = Subset(dataset, list(range(max_samples)))
obs_space = Image(0, 255, (1, 28, 28), np.uint8)
obs_space = transforms(obs_space)
env = self.PassiveEnvironment(
dataset,
n_classes=10,
batch_size=batch_size,
observation_space=obs_space,
pretend_to_be_active=True,
)
assert env.observation_space == Image(0, 1, (batch_size, 3, 28, 28))
assert env.action_space.shape == (batch_size,)
assert env.reward_space == env.action_space
env.seed(123)
obs = env.reset()
assert obs in env.observation_space
obs, reward, done, info = env.step(env.action_space.sample())
assert reward is not None
assert obs in env.observation_space
for i, (obs, reward) in enumerate(env):
assert obs in env.observation_space
assert reward is None
other_reward = env.send(env.action_space.sample())
assert other_reward is not None
assert i == max_samples // batch_size - 1
def test_passive_environment_without_pretend_to_be_active(self):
"""Test the gym.Env-style interaction with a PassiveEnvironment."""
batch_size = 5
transforms = Compose([Transforms.to_tensor, Transforms.three_channels])
dataset = MNIST(
"data", transform=Compose([Transforms.to_tensor, Transforms.three_channels])
)
max_samples = 100
dataset = Subset(dataset, list(range(max_samples)))
obs_space = Image(0, 255, (1, 28, 28), np.uint8)
obs_space = transforms(obs_space)
env = self.PassiveEnvironment(
dataset,
n_classes=10,
batch_size=batch_size,
observation_space=obs_space,
pretend_to_be_active=False,
)
assert env.observation_space == Image(0, 1, (batch_size, 3, 28, 28))
assert env.action_space.shape == (batch_size,)
assert env.reward_space == env.action_space
env.seed(123)
obs = env.reset()
assert obs in env.observation_space
obs, reward, done, info = env.step(env.action_space.sample())
assert reward is not None
for i, (obs, reward) in enumerate(env):
assert reward is not None
other_reward = env.send(env.action_space.sample())
assert (other_reward == reward).all()
assert i == max_samples // batch_size - 1
def test_passive_environment_needs_actions_to_be_sent(self):
"""Test the 'active dataloader' style interaction."""
batch_size = 10
transforms = Compose([Transforms.to_tensor, Transforms.three_channels])
dataset = MNIST(
"data", transform=Compose([Transforms.to_tensor, Transforms.three_channels])
)
max_samples = 105
dataset = Subset(dataset, list(range(max_samples)))
obs_space = Image(0, 255, (1, 28, 28), np.uint8)
obs_space = transforms(obs_space)
env = PassiveEnvironment(
dataset,
n_classes=10,
batch_size=batch_size,
observation_space=obs_space,
pretend_to_be_active=True,
strict=True,
)
with pytest.raises(RuntimeError):
for i, (obs, _) in enumerate(env):
pass
env = self.PassiveEnvironment(
dataset,
n_classes=10,
batch_size=batch_size,
observation_space=obs_space,
pretend_to_be_active=True,
)
for i, (obs, _) in enumerate(env):
assert isinstance(obs, Tensor)
action = env.action_space.sample()[: obs.shape[0]]
rewards = env.send(action)
assert rewards is not None
assert rewards.shape[0] == action.shape[0]
def test_passive_environment_active_mode_action_reward_match(self):
"""Test the 'active dataloader' style interaction."""
batch_size = 10
max_samples = 105
dataset = TensorDataset(
torch.arange(max_samples).reshape([max_samples, 1, 1, 1])
* torch.ones([max_samples, 3, 32, 32]),
torch.arange(max_samples),
)
dataset = Subset(dataset, list(range(max_samples)))
env = self.PassiveEnvironment(
dataset,
n_classes=max_samples,
batch_size=batch_size,
pretend_to_be_active=True,
)
for i, (obs, _) in enumerate(env):
print(i)
expected_obs = torch.arange(i * batch_size, (i + 1) * batch_size)
expected_obs = expected_obs[: obs.shape[0]]
assert (obs == expected_obs.reshape([obs.shape[0], 1, 1, 1])).all()
action = torch.arange(i * batch_size, (i + 1) * batch_size, dtype=int)
action = action[: obs.shape[0]]
rewards = env.send(action)
assert (rewards == action).all()
================================================
FILE: sequoia/settings/sl/incremental/__init__.py
================================================
from .environment import IncrementalSLEnvironment
from .objects import Actions, ActionType, Observations, ObservationType, Rewards, RewardType
from .results import IncrementalSLResults
from .setting import IncrementalSLSetting
Environment = IncrementalSLEnvironment
ClassIncrementalSetting = IncrementalSLSetting
================================================
FILE: sequoia/settings/sl/incremental/environment.py
================================================
from typing import Any, Callable, Tuple, Union
import gym
from gym import spaces
from torch.utils.data import Dataset, IterableDataset
from sequoia.common.spaces import TypedDictSpace
from sequoia.settings.base.objects import Rewards as BaseRewards
from sequoia.settings.sl.continual.environment import ContinualSLEnvironment
from sequoia.utils.logging_utils import get_logger
from ..continual.environment import ContinualSLTestEnvironment
from .objects import Actions, ActionType, Observations, ObservationType, RewardType
logger = get_logger(__name__)
class IncrementalSLEnvironment(ContinualSLEnvironment[ObservationType, ActionType, RewardType]):
def __init__(
self,
dataset: Union[Dataset, IterableDataset],
hide_task_labels: bool = True,
observation_space: TypedDictSpace[ObservationType] = None,
action_space: gym.Space = None,
reward_space: gym.Space = None,
split_batch_fn: Callable[[Tuple[Any, ...]], Tuple[ObservationType, ActionType]] = None,
pretend_to_be_active: bool = False,
strict: bool = False,
one_epoch_only: bool = False,
**kwargs,
):
super().__init__(
dataset,
hide_task_labels=hide_task_labels,
observation_space=observation_space,
action_space=action_space,
reward_space=reward_space,
split_batch_fn=split_batch_fn,
pretend_to_be_active=pretend_to_be_active,
strict=strict,
one_epoch_only=one_epoch_only,
**kwargs,
)
import bisect
import warnings
from typing import Any, Dict
import numpy as np
import torch
from torch.nn import functional as F
from sequoia.common.gym_wrappers.utils import tile_images
from sequoia.common.metrics import ClassificationMetrics
from sequoia.common.transforms import Transforms
from sequoia.settings.assumptions.iid_results import TaskResults
from sequoia.settings.assumptions.incremental import TaskSequenceResults
from .results import IncrementalSLResults
class IncrementalSLTestEnvironment(ContinualSLTestEnvironment):
def __init__(self, env: gym.Env, *args, task_schedule: Dict[int, Any] = None, **kwargs):
super().__init__(env, *args, **kwargs)
self._steps = 0
# TODO: Maybe rework this so we don't depend on the test phase being one task at
# a time, instead store the test metrics in the task corresponding to the
# task_label in the observations.
# BUG: The problem is, right now we're depending on being passed the
# 'task schedule', which we then use to get the task ids. This
# is actually pretty bad, because if the class ordering was changed between
# training and testing, then, this wouldn't actually report the correct results!
self.task_schedule = task_schedule or {}
self.task_steps = sorted(self.task_schedule.keys())
self.results: TaskSequenceResults[ClassificationMetrics] = TaskSequenceResults(
task_results=[TaskResults() for step in self.task_steps]
)
# self._reset = False
# NOTE: The task schedule is already in terms of the number of batches.
self.boundary_steps = [step for step in self.task_schedule.keys()]
def get_results(self) -> IncrementalSLResults:
return self.results
def reset(self):
return super().reset()
# if not self._reset:
# logger.debug("Initial reset.")
# self._reset = True
# return super().reset()
# else:
# logger.debug("Resetting the env closes it.")
# self.close()
# return None
def _before_step(self, action):
self._action = action
return super()._before_step(action)
def _after_step(self, observation, reward, done, info):
if not isinstance(reward, BaseRewards):
reward = BaseRewards(y=torch.as_tensor(reward))
batch_size = reward.batch_size
action = self._action
assert action is not None
if isinstance(self.action_space, (spaces.MultiDiscrete, spaces.MultiBinary)):
n_classes = self.action_space.nvec[0]
from sequoia.settings.assumptions.task_type import ClassificationActions
if not isinstance(action, ClassificationActions):
if isinstance(action, Actions):
y_pred = action.y_pred
# 'upgrade', creating some fake logits.
else:
y_pred = torch.as_tensor(action)
fake_logits = F.one_hot(y_pred, n_classes)
action = ClassificationActions(y_pred=y_pred, logits=fake_logits)
else:
raise NotImplementedError(
f"TODO: Remove the assumption here that the env is a classification env "
f"({self.action_space}, {self.reward_space})"
)
if action.batch_size != reward.batch_size:
warnings.warn(
RuntimeWarning(
f"Truncating the action since its batch size {action.batch_size} "
f"is larger than the rewards': ({reward.batch_size})"
)
)
action = action[:, : reward.batch_size]
# TODO: Use some kind of generic `get_metrics(actions: Actions, rewards: Rewards)`
# function instead.
y = reward.y
logits = action.logits
y_pred = action.y_pred
metric = ClassificationMetrics(y=y, logits=logits, y_pred=y_pred)
reward = metric.accuracy
task_steps = sorted(self.task_schedule.keys())
assert 0 in task_steps, task_steps
nb_tasks = len(task_steps)
assert nb_tasks >= 1
# Given the step, find the task id.
task_id = bisect.bisect_right(task_steps, self._steps) - 1
self.results.task_results[task_id].metrics.append(metric)
self._steps += 1
# FIXME: Temporary fix: TODO: Make sure this doesn't truncate the number of labels
if self._steps == self.step_limit - 1:
self.close()
done = True
# Debugging issue with Monitor class:
# return super()._after_step(observation, reward, done, info)
if not self.enabled:
return done
if done and self.env_semantics_autoreset:
# For envs with BlockingReset wrapping VNCEnv, this observation will be the
# first one of the new episode
if self.config.render:
self.reset_video_recorder()
self.episode_id += 1
self._flush()
# Record stats: (TODO: accuracy serves as the 'reward'!)
reward_for_stats = metric.accuracy
self.stats_recorder.after_step(observation, reward_for_stats, done, info)
# Record video
if self.config and self.config.render:
self.video_recorder.capture_frame()
return done
def _after_reset(self, observation: Observations):
image_batch = observation.numpy().x
# Need to create a single image with the right dtype for the Monitor
# from gym to create gifs / videos with it.
if self.batch_size:
# Need to tile the image batch so it can be seen as a single image
# by the Monitor.
image_batch = tile_images(image_batch)
image_batch = Transforms.channels_last_if_needed(image_batch)
if image_batch.dtype == np.float32:
assert (0 <= image_batch).all() and (image_batch <= 1).all()
image_batch = (256 * image_batch).astype(np.uint8)
assert image_batch.dtype == np.uint8
# Debugging this issue here:
# super()._after_reset(image_batch)
# -- Code from Monitor
if not self.enabled:
return
# Reset the stat count
self.stats_recorder.after_reset(observation)
if self.config.render:
self.reset_video_recorder()
# Bump *after* all reset activity has finished
self.episode_id += 1
self._flush()
# --
def render(self, mode="human", **kwargs):
# NOTE: This doesn't get called, because the video recorder uses
# self.env.render(), rather than self.render()
# TODO: Render when the 'render' argument in config is set to True.
image_batch = super().render(mode=mode, **kwargs)
if mode == "rgb_array" and self.batch_size:
image_batch = tile_images(image_batch)
return image_batch
================================================
FILE: sequoia/settings/sl/incremental/environment_test.py
================================================
from functools import partial
from typing import ClassVar, Type
from sequoia.common.metrics import ClassificationMetrics
from sequoia.settings.assumptions.discrete_results import TaskSequenceResults
from ..continual.environment_test import (
TestContinualSLTestEnvironment as ContinualSLTestEnvironmentTests,
)
from .environment import IncrementalSLEnvironment, IncrementalSLTestEnvironment
class TestIncrementalSLTestEnvironment(ContinualSLTestEnvironmentTests):
Environment: ClassVar[Type[Environment]] = IncrementalSLEnvironment
TestEnvironment: ClassVar[Type[TestEnvironment]] = partial(
IncrementalSLTestEnvironment, task_schedule={i * 20: {} for i in range(5)}
)
def validate_results(self, results: TaskSequenceResults):
# NOTE: We're not checking that the results here represent the entire transfer
# matrix, because the test env is only used for one test loop.
# The Setting creates the transfer matrix using multiple of these
# `TaskSequenceResults` objects, each of which is obtained after training on
# a task in the training loop.
assert isinstance(results, TaskSequenceResults)
assert isinstance(results.average_metrics, ClassificationMetrics)
assert results.objective > 0
# TODO: Fix this check:
assert results.average_metrics.n_samples in [95, 100]
================================================
FILE: sequoia/settings/sl/incremental/objects.py
================================================
""" Observations/Actions/Rewards particular to an IncrementalSLSetting.
This is just meant as a cleaner way to import the Observations/Actions/Rewards.
"""
from dataclasses import dataclass
from typing import Optional, TypeVar
from torch import Tensor
from sequoia.settings.sl.discrete.setting import DiscreteTaskAgnosticSLSetting
# from sequoia.settings.sl.continual.objects import Observations, Actions, Rewards
# from sequoia.settings.assumptions.context_visibility
@dataclass(frozen=True)
class IncrementalSLObservations(DiscreteTaskAgnosticSLSetting.Observations):
"""Incremental Observations, in a supervised context."""
x: Tensor
task_labels: Optional[Tensor] = None
@dataclass(frozen=True)
class IncrementalSLActions(DiscreteTaskAgnosticSLSetting.Actions):
"""Incremental Actions, in a supervised (passive) context."""
@dataclass(frozen=True)
class IncrementalSLRewards(DiscreteTaskAgnosticSLSetting.Rewards):
"""Incremental Rewards, in a supervised context."""
Observations = IncrementalSLObservations
Actions = IncrementalSLActions
Rewards = IncrementalSLRewards
# Environment = C
# Results = IncrementalSLResults
# ObservationType = TypeVar("ObservationType", bound=Observations)
# ActionType = TypeVar("ActionType", bound=Actions)
# RewardType = TypeVar("RewardType", bound=Rewards)
ObservationType = TypeVar("ObservationType", bound=IncrementalSLObservations)
ActionType = TypeVar("ActionType", bound=IncrementalSLActions)
RewardType = TypeVar("RewardType", bound=IncrementalSLRewards)
# from .environment import IncrementalSLEnvironment
# Environment = IncrementalSLEnvironment
================================================
FILE: sequoia/settings/sl/incremental/results.py
================================================
""" Object representing the "Results" of applying a Method on a Class-Incremental Setting.
This object basically calculates the 'objective' specific to this setting as
well as provide a set of methods for making useful plots and utilities for
logging results to wandb.
"""
from typing import ClassVar
import matplotlib.pyplot as plt
import wandb
from sequoia.settings.assumptions.incremental import IncrementalAssumption
from sequoia.utils.logging_utils import get_logger
from sequoia.utils.plotting import autolabel
logger = get_logger(__name__)
class IncrementalSLResults(IncrementalAssumption.Results):
"""Results for a ClassIncrementalSetting.
The main objective in this setting is the average test accuracy over all
tasks.
The plots to generate are:
- Accuracy per task
- Average Test Accuray over the course of testing
- Confusion matrix at the end of testing
All of these will be created from the list of test metrics (Classification
metrics for now).
TODO: Add back Wandb logging somehow, even though we might be doing the
evaluation loop ourselves.
TODO: Fix this for the 'incremental regression' case.
"""
# Higher accuracy => better
lower_is_better: ClassVar[bool] = False
objective_name: ClassVar[str] = "Average Accuracy"
# Minimum runtime considered (in hours).
# (No extra points are obtained when going faster than this.)
min_runtime_hours: ClassVar[float] = 5.0 / 60.0 # 5 minutes
# Maximum runtime allowed (in hours).
max_runtime_hours: ClassVar[float] = 1.0 # one hour.
def make_plots(self):
plots_dict = {}
if wandb.run:
# TODO: Add a Histogram plot from wandb?
pass
else:
# TODO: Add back the plots.
plots_dict["task_metrics"] = self.task_accuracies_plot()
return plots_dict
def task_accuracies_plot(self):
figure: plt.Figure
axes: plt.Axes
figure, axes = plt.subplots()
x = list(range(self.num_tasks))
y = [metrics.accuracy for metrics in self.final_performance_metrics]
rects = axes.bar(x, y)
axes.set_title("Task Accuracy")
axes.set_xlabel("Task")
axes.set_ylabel("Accuracy")
axes.set_ylim(0, 1.0)
autolabel(axes, rects)
return figure
def cumul_metrics_plot(self):
"""TODO: Create a plot that shows the evolution of the test performance over
all test tasks seen so far.
(during training or during testing?)
"""
figure: plt.Figure
axes: plt.Axes
figure, axes = plt.subplots()
x = list(range(self.num_tasks))
y = []
metric_name: str = ""
for i in range(self.num_tasks):
previous_metrics = self.metrics_matrix[i][: i + 1]
cumul_metrics = sum(previous_metrics)
y.append(cumul_metrics.objective)
if not metric_name:
metric_name = cumul_metrics.objective_name
# x = [metrics.n_samples for metrics in cumulative_metrics]
# y = [metrics.accuracy for metrics in cumulative_metrics]
axes.plot(x, y)
axes.set_xlabel("# of learned tasks")
axes.set_ylabel(f"Average {metric_name} on tasks seen so far")
return figure
# def summary(self) -> str:
# s = StringIO()
# with redirect_stdout(s):
# for i, average_task_metrics in enumerate(self[-1].average_metrics_per_task):
# print(f"Test Results on task {i}: {average_task_metrics}")
# print(f"Average test metrics accross all the test tasks: {self[-1].average_metrics}")
# s.seek(0)
# return s.read()
# def to_log_dict(self) -> Dict[str, float]:
# results = {}
# results[self.objective_name] = self.objective
# average_metrics = self[-1].average_metrics
# if isinstance(average_metrics, ClassificationMetrics):
# results["accuracy/average"] = average_metrics.accuracy
# elif isinstance(average_metrics, RegressionMetrics):
# results["mse/average"] = average_metrics.mse
# else:
# results["average metrics"] = average_metrics
# for i, average_task_metrics in enumerate(self[-1].average_metrics_per_task):
# if isinstance(average_task_metrics, ClassificationMetrics):
# results[f"accuracy/task_{i}"] = average_task_metrics.accuracy
# elif isinstance(average_task_metrics, RegressionMetrics):
# results[f"mse/task_{i}"] = average_task_metrics.mse
# else:
# results[f"task_{i}"] = average_task_metrics
# return results
================================================
FILE: sequoia/settings/sl/incremental/setting.py
================================================
""" Defines a `Setting` subclass for "Class-Incremental" Continual Learning.
Example command to run a method on this setting (in debug mode):
```
python main.py --setting class_incremental --method baseline --debug \
--batch_size 128 --max_epochs 1
```
Class-Incremental definition from [iCaRL](https://arxiv.org/abs/1611.07725):
"Formally, we demand the following three properties of an algorithm to qualify
as class-incremental:
i) it should be trainable from a stream of data in which examples of
different classes occur at different times
ii) it should at any time provide a competitive multi-class classifier for
the classes observed so far,
iii) its computational requirements and memory footprint should remain
bounded, or at least grow very slowly, with respect to the number of classes
seen so far."
"""
import itertools
from dataclasses import dataclass
from pathlib import Path
from typing import Callable, ClassVar, Dict, List, Optional, Tuple, Type, Union
from continuum import ClassIncremental
from continuum.datasets import _ContinuumDataset
from continuum.scenarios.base import _BaseScenario
from simple_parsing import choice, field
from torch import Tensor
from torch.utils.data import Dataset
import wandb
from sequoia.common.config import Config
from sequoia.common.gym_wrappers import TransformObservation
from sequoia.settings.assumptions.incremental import IncrementalAssumption, IncrementalResults
from sequoia.settings.base import Method
from sequoia.settings.rl.wrappers import HideTaskLabelsWrapper
from sequoia.settings.sl.continual.wrappers import relabel
from sequoia.settings.sl.environment import Actions, PassiveEnvironment, Rewards
from sequoia.settings.sl.setting import SLSetting
from sequoia.settings.sl.wrappers import MeasureSLPerformanceWrapper
from sequoia.utils import get_logger
from ..discrete.setting import DiscreteTaskAgnosticSLSetting
from .environment import IncrementalSLEnvironment, IncrementalSLTestEnvironment
from .objects import Actions, Observations, Rewards
from .results import IncrementalSLResults
logger = get_logger(__name__)
# # NOTE: This dict reflects the observation space of the different datasets
# # *BEFORE* any transforms are applied. The resulting property on the Setting is
# # based on this 'base' observation space, passed through the transforms.
# # TODO: Make it possible to automatically add tensor support if the dtype passed to a
# # gym space is a `torch.dtype`.
# tensor_space = add_tensor_support
@dataclass
class IncrementalSLSetting(IncrementalAssumption, DiscreteTaskAgnosticSLSetting):
"""Supervised Setting where the data is a sequence of 'tasks'.
This class is basically is the supervised version of an Incremental Setting
The current task can be set at the `current_task_id` attribute.
"""
Results: ClassVar[Type[IncrementalResults]] = IncrementalSLResults
Observations: ClassVar[Type[Observations]] = Observations
Actions: ClassVar[Type[Actions]] = Actions
Rewards: ClassVar[Type[Rewards]] = Rewards
Environment: ClassVar[Type[SLSetting.Environment]] = IncrementalSLEnvironment[
Observations, Actions, Rewards
]
Results: ClassVar[Type[IncrementalSLResults]] = IncrementalSLResults
# Class variable holding a dict of the names and types of all available
# datasets.
available_datasets: ClassVar[
Dict[str, Type[_ContinuumDataset]]
] = DiscreteTaskAgnosticSLSetting.available_datasets.copy()
# A continual dataset to use. (Should be taken from the continuum package).
dataset: str = choice(available_datasets.keys(), default="mnist")
# TODO: IDEA: Adding these fields/constructor arguments so that people can pass a
# custom ready-made `Scenario` from continuum to use (not sure this is a good idea
# though)
train_cl_scenario: Optional[_BaseScenario] = field(default=None, cmd=False, to_dict=False)
test_cl_scenario: Optional[_BaseScenario] = field(default=None, cmd=False, to_dict=False)
def __post_init__(self):
"""Initializes the fields of the Setting (and LightningDataModule),
including the transforms, shapes, etc.
"""
super().__post_init__()
# TODO: For now we assume a fixed, equal number of classes per task, for
# sake of simplicity. We could take out this assumption, but it might
# make things a bit more complicated.
assert isinstance(self.increment, int)
assert isinstance(self.test_increment, int)
self.n_classes_per_task: int = self.increment
self.test_increment = self.increment
def apply(self, method: Method, config: Config = None) -> IncrementalSLResults:
"""Apply the given method on this setting to producing some results."""
# TODO: It still isn't super clear what should be in charge of creating
# the config, and how to create it, when it isn't passed explicitly.
self.config = config or self._setup_config(method)
assert self.config
method.configure(setting=self)
# Run the main loop (which is defined in IncrementalAssumption).
results: IncrementalSLResults = super().main_loop(method)
logger.info(results.summary())
method.receive_results(self, results=results)
return results
def prepare_data(self, data_dir: Path = None, **kwargs):
self.config = self.config or Config.from_args(self._argv, strict=False)
# if self.batch_size is None:
# logger.warning(UserWarning(
# f"Using the default batch size of 32. (You can set the "
# f"batch size by passing a value to the Setting constructor, or "
# f"by setting the attribute inside your 'configure' method) "
# ))
# self.batch_size = 32
# data_dir = data_dir or self.data_dir or self.config.data_dir
# self.make_dataset(data_dir, download=True)
# self.data_dir = data_dir
return super().prepare_data(data_dir=data_dir, **kwargs)
def setup(self, stage: str = None):
super().setup(stage=stage)
# TODO: Adding this temporarily just for the competition: The TestEnvironment
# needs access to this information in order to split the metrics for each task.
self.test_boundary_steps = [0] + list(itertools.accumulate(map(len, self.test_datasets)))[
:-1
]
self.test_steps = sum(map(len, self.test_datasets))
# self.test_steps = [0] + list(
# itertools.accumulate(map(len, self.test_datasets))
# )[:-1]
# def _make_train_dataset(self) -> Dataset:
# return self.train_datasets[self.current_task_id]
# def _make_val_dataset(self) -> Dataset:
# return self.val_datasets[self.current_task_id]
# def _make_test_dataset(self) -> Dataset:
# return concat(self.test_datasets)
def train_dataloader(
self, batch_size: int = None, num_workers: int = None
) -> IncrementalSLEnvironment:
"""Returns a DataLoader for the train dataset of the current task."""
# NOTE: The implementation for this is in `DiscreteTaskAgnosticSLSetting`:
# TODO: Fix the inheritance order so that clicking on this super().train_dataloader gets us
# to the right point in code.
# train_env = DiscreteTaskAgnosticSLSetting.train_dataloader(
# self, batch_size=batch_size, num_workers=num_workers
# )
train_env = super().train_dataloader(batch_size=batch_size, num_workers=num_workers)
# Overwrite the wandb prefix for the `MeasureSLPerformanceWrapper` to include
# the task id.
if self.monitor_training_performance:
# Overwrite the 'wandb prefix'
assert isinstance(train_env, MeasureSLPerformanceWrapper)
train_env.wandb_prefix = f"Train/Task {self.current_task_id}"
self.train_env = train_env
return self.train_env
def val_dataloader(self, batch_size: int = None, num_workers: int = None) -> PassiveEnvironment:
"""Returns a DataLoader for the validation dataset of the current task."""
val_env = super().val_dataloader(batch_size=batch_size, num_workers=num_workers)
return self.val_env
def test_dataloader(
self, batch_size: int = None, num_workers: int = None
) -> PassiveEnvironment["ClassIncrementalSetting.Observations", Actions, Rewards]:
"""Returns a DataLoader for the test dataset of the current task."""
if not self.has_prepared_data:
self.prepare_data()
if not self.has_setup_test:
self.setup("test")
# Join all the test datasets.
dataset = self._make_test_dataset()
batch_size = batch_size if batch_size is not None else self.batch_size
num_workers = num_workers if num_workers is not None else self.num_workers
env = self.Environment(
dataset,
batch_size=batch_size,
num_workers=num_workers,
hide_task_labels=(not self.task_labels_at_test_time),
observation_space=self.observation_space,
action_space=self.action_space,
reward_space=self.reward_space,
Observations=self.Observations,
Actions=self.Actions,
Rewards=self.Rewards,
pretend_to_be_active=True,
shuffle=False,
drop_last=self.drop_last,
)
# NOTE: The transforms from `self.transforms` (the 'base' transforms) were
# already added when creating the datasets and the CL scenario.
test_transforms = self.transforms + self.test_transforms
if test_transforms:
env = TransformObservation(env, f=test_transforms)
if self.config.device:
# TODO: Put this before or after the image transforms?
from sequoia.common.gym_wrappers.convert_tensors import ConvertToFromTensors
env = ConvertToFromTensors(env, device=self.config.device)
# TODO: Remove this, I don't think it's used anymore, since `hide_task_labels`
# is an argument to self.Environment now.
if not self.task_labels_at_test_time:
env = HideTaskLabelsWrapper(env)
# TODO: Remove this once that stuff with the 'fake' task schedule is fixed below,
# base it on the equivalent in ContinualSLSetting instead (which should actually
# be moved into DiscreteTaskAgnosticSL, now that I think about it!)
# Testing this out, we're gonna have a "test schedule" like this to try
# to imitate the MultiTaskEnvironment in RL.
transition_steps = [0] + list(itertools.accumulate(map(len, self.test_datasets)))[:-1]
# FIXME: Creating a 'task schedule' for the TestEnvironment, mimicing what's in
# the RL settings.
test_task_schedule = dict.fromkeys(
[step // (env.batch_size or 1) for step in transition_steps],
range(len(transition_steps)),
)
# TODO: Configure the 'monitoring' dir properly.
if wandb.run:
test_dir = wandb.run.dir
else:
test_dir = self.config.log_dir
test_loop_max_steps = len(dataset) // (env.batch_size or 1)
# TODO: Fix this: iteration doesn't ever end for some reason.
test_env = IncrementalSLTestEnvironment(
env,
directory=test_dir,
step_limit=test_loop_max_steps,
task_schedule=test_task_schedule,
force=True,
config=self.config,
video_callable=None if (wandb.run or self.config.render) else False,
)
if self.test_env:
self.test_env.close()
self.test_env = test_env
return self.test_env
def split_batch_function(
self, training: bool
) -> Callable[[Tuple[Tensor, ...]], Tuple[Observations, Rewards]]:
"""Returns a callable that is used to split a batch into observations and rewards."""
assert False, "TODO: Removing this."
task_classes = {i: self.task_classes(i, train=training) for i in range(self.nb_tasks)}
def split_batch(batch: Tuple[Tensor, ...]) -> Tuple[Observations, Rewards]:
"""Splits the batch into a tuple of Observations and Rewards.
Parameters
----------
batch : Tuple[Tensor, ...]
A batch of data coming from the dataset.
Returns
-------
Tuple[Observations, Rewards]
A tuple of Observations and Rewards.
"""
# In this context (class_incremental), we will always have 3 items per
# batch, because we use the ClassIncremental scenario from Continuum.
assert len(batch) == 3
x, y, t = batch
# Relabel y so it is always in [0, n_classes_per_task) for each task.
if self.shared_action_space:
y = relabel(y, task_classes)
if (training and not self.task_labels_at_train_time) or (
not training and not self.task_labels_at_test_time
):
# Remove the task labels if we're not currently allowed to have
# them.
# TODO: Using None might cause some issues. Maybe set -1 instead?
t = None
observations = self.Observations(x=x, task_labels=t)
rewards = self.Rewards(y=y)
return observations, rewards
return split_batch
def make_train_cl_scenario(self, train_dataset: _ContinuumDataset) -> _BaseScenario:
"""Creates a train ClassIncremental object from continuum."""
return ClassIncremental(
train_dataset,
nb_tasks=self.nb_tasks,
increment=self.increment,
initial_increment=self.initial_increment,
class_order=self.class_order,
transformations=self.transforms,
)
def make_test_cl_scenario(self, test_dataset: _ContinuumDataset) -> _BaseScenario:
"""Creates a test ClassIncremental object from continuum."""
return ClassIncremental(
test_dataset,
nb_tasks=self.nb_tasks,
increment=self.test_increment,
initial_increment=self.test_initial_increment,
class_order=self.test_class_order,
transformations=self.transforms,
)
def make_dataset(
self, data_dir: Path, download: bool = True, train: bool = True, **kwargs
) -> _ContinuumDataset:
# TODO: #7 Use this method here to fix the errors that happen when
# trying to create every single dataset from continuum.
data_dir = Path(data_dir)
if not data_dir.exists():
data_dir.mkdir(parents=True, exist_ok=True)
if self.dataset in self.available_datasets:
dataset_class = self.available_datasets[self.dataset]
return dataset_class(data_path=data_dir, download=download, train=train, **kwargs)
elif self.dataset in self.available_datasets.values():
dataset_class = self.dataset
return dataset_class(data_path=data_dir, download=download, train=train, **kwargs)
elif isinstance(self.dataset, Dataset):
logger.info(f"Using a custom dataset {self.dataset}")
return self.dataset
else:
raise NotImplementedError(self.dataset)
# These methods below are used by the MultiHeadModel, mostly when
# using a multihead model, to figure out how to relabel the batches, or how
# many classes there are in the current task (since we support a different
# number of classes per task).
# TODO: Remove this? Since I'm simplifying to a fixed number of classes per
# task for now...
def num_classes_in_task(self, task_id: int, train: bool) -> Union[int, List[int]]:
"""Returns the number of classes in the given task."""
increment = self.increment if train else self.test_increment
if isinstance(increment, list):
return increment[task_id]
return increment
def num_classes_in_current_task(self, train: bool = None) -> int:
"""Returns the number of classes in the current task."""
# TODO: Its ugly to have the 'method' tell us if we're currently in
# train/eval/test, no? Maybe just make a method for each?
return self.num_classes_in_task(self._current_task_id, train=train)
def task_classes(self, task_id: int, train: bool) -> List[int]:
"""Gives back the 'true' labels present in the given task."""
start_index = sum(self.num_classes_in_task(i, train) for i in range(task_id))
end_index = start_index + self.num_classes_in_task(task_id, train)
if train:
return self.class_order[start_index:end_index]
# Set the same ordering as during training, by default.
self.test_class_order = self.test_class_order or self.class_order
return self.test_class_order[start_index:end_index]
def current_task_classes(self, train: bool) -> List[int]:
"""Gives back the labels present in the current task."""
return self.task_classes(self._current_task_id, train)
def _check_environments(self):
"""Do a quick check to make sure that the dataloaders give back the
right observations / reward types.
"""
for loader_method in [
self.train_dataloader,
self.val_dataloader,
self.test_dataloader,
]:
logger.debug(f"Checking loader method {loader_method.__name__}")
env = loader_method(batch_size=5)
obs = env.reset()
assert isinstance(obs, self.Observations)
# Convert the observation to numpy arrays, to make it easier to
# check if the elements are in the spaces.
obs = obs.numpy()
# take a slice of the first batch, to get sample tensors.
first_obs = obs[:, 0]
# TODO: Here we'd like to be able to check that the first observation
# is inside the observation space, but we can't do that because the
# task label might be None, and so that would make it fail.
x, task_label = first_obs
if task_label is None:
assert x in self.observation_space["x"]
for i in range(5):
actions = env.action_space.sample()
observations, rewards, done, info = env.step(actions)
assert isinstance(observations, self.Observations), type(observations)
assert isinstance(rewards, self.Rewards), type(rewards)
actions = env.action_space.sample()
if done:
observations = env.reset()
env.close()
# def relabel(y: Tensor, task_classes: Dict[int, List[int]]) -> Tensor:
# """ Relabel the elements of 'y' to their index in the list of classes for
# their task.
# Example:
# >>> import torch
# >>> y = torch.as_tensor([2, 3, 2, 3, 2, 2])
# >>> task_classes = {0: [0, 1], 1: [2, 3]}
# >>> relabel(y, task_classes)
# tensor([0, 1, 0, 1, 0, 0])
# """
# # TODO: Double-check that this never leaves any zeros where it shouldn't.
# new_y = torch.zeros_like(y)
# # assert unique_y <= set(task_classes), (unique_y, task_classes)
# for task_id, task_true_classes in task_classes.items():
# for i, label in enumerate(task_true_classes):
# new_y[y == label] = i
# return new_y
# This is just meant as a cleaner way to import the Observations/Actions/Rewards
# than particular setting.
Observations = IncrementalSLSetting.Observations
Actions = IncrementalSLSetting.Actions
Rewards = IncrementalSLSetting.Rewards
# TODO: I wouldn't want these above to overwrite / interfere with the import of
# the "base" versions of these objects from sequoia.settings.bases.objects, which are
# imported in settings/__init__.py. Will have to check that doing
# `from .passive import *` over there doesn't actually import these here.
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: sequoia/settings/sl/incremental/setting_test.py
================================================
from typing import Any, ClassVar, Dict, Type
import pytest
from continuum import ClassIncremental
from gym import spaces
from gym.spaces import Discrete, Space
from sequoia.common.config import Config
from sequoia.common.metrics import ClassificationMetrics
from sequoia.common.spaces import Sparse
from sequoia.common.spaces.typed_dict import TypedDictSpace
from sequoia.conftest import skip_param, xfail_param, requires_pyglet
from sequoia.settings.sl.continual.envs import get_action_space
from ..discrete.setting_test import (
TestDiscreteTaskAgnosticSLSetting as DiscreteTaskAgnosticSLSettingTests,
)
from .setting import IncrementalSLSetting
from .setting import IncrementalSLSetting as ClassIncrementalSetting
class TestIncrementalSLSetting(DiscreteTaskAgnosticSLSettingTests):
Setting: ClassVar[Type[IncrementalSLSetting]] = IncrementalSLSetting
fast_dev_run_kwargs: ClassVar[Dict[str, Any]] = dict(
dataset="mnist",
batch_size=64,
)
def assert_chance_level(
self, setting: IncrementalSLSetting, results: IncrementalSLSetting.Results
):
assert isinstance(setting, ClassIncrementalSetting), setting
assert isinstance(results, ClassIncrementalSetting.Results), results
# TODO: Remove this assertion:
assert isinstance(setting.action_space, spaces.Discrete)
# TODO: This test so far needs the 'N' to be the number of classes in total,
# not the number of classes per task.
# num_classes = setting.action_space.n # <-- Should be using this instead.
if setting._using_custom_envs_foreach_task:
num_classes = get_action_space(setting.train_datasets[0]).n
else:
num_classes = get_action_space(setting.dataset).n
average_accuracy = results.objective
# Calculate the expected 'average' chance accuracy.
# We assume that there is an equal number of classes in each task.
# chance_accuracy = 1 / setting.n_classes_per_task
chance_accuracy = 1 / num_classes
assert 0.5 * chance_accuracy <= average_accuracy <= 1.5 * chance_accuracy
for i, metric in enumerate(results.final_performance_metrics):
assert isinstance(metric, ClassificationMetrics)
# TODO: Same as above: Should be using `n_classes_per_task` or something
# like it instead.
chance_accuracy = 1 / setting.n_classes_per_task
chance_accuracy = 1 / num_classes
task_accuracy = metric.accuracy
# FIXME: Look into this, we're often getting results substantially
# worse than chance, and to 'make the tests pass' (which is bad)
# we're setting the lower bound super low, which makes no sense.
assert 0.25 * chance_accuracy <= task_accuracy <= 2.1 * chance_accuracy
# TODO: Add a fixture that specifies a data folder common to all tests.
@pytest.mark.parametrize(
"dataset_name",
[
"mnist",
# "synbols",
skip_param("synbols", reason="Causes tests to hang for some reason?"),
"cifar10",
"cifar100",
"fashionmnist",
"kmnist",
xfail_param("emnist", reason="Bug in emnist, requires split positional arg?"),
xfail_param("qmnist", reason="Bug in qmnist, 229421 not in list"),
"mnistfellowship",
"cifar10",
"cifarfellowship",
],
)
@pytest.mark.timeout(60)
def test_observation_spaces_match_dataset(self, dataset_name: str):
"""Test to check that the `observation_spaces` and `reward_spaces` dict
really correspond to the entries of the corresponding datasets, before we do
anything with them.
"""
# CIFARFellowship, MNISTFellowship, ImageNet100,
# ImageNet1000, CIFAR10, CIFAR100, EMNIST, KMNIST, MNIST,
# QMNIST, FashionMNIST,
dataset_class = self.Setting.available_datasets[dataset_name]
dataset = dataset_class("data")
observation_space = self.Setting.base_observation_spaces[dataset_name]
reward_space = self.Setting.base_reward_spaces[dataset_name]
for task_dataset in ClassIncremental(dataset, nb_tasks=1):
first_item = task_dataset[0]
x, t, y = first_item
assert x.shape == observation_space.shape
assert x in observation_space, (x.min(), x.max(), observation_space)
assert y in reward_space
@pytest.mark.parametrize("dataset_name", ["mnist"])
@pytest.mark.parametrize("nb_tasks", [2, 5])
def test_task_label_space(self, dataset_name: str, nb_tasks: int):
nb_tasks = 2
setting = ClassIncrementalSetting(
dataset=dataset_name,
nb_tasks=nb_tasks,
)
task_label_space: Space = setting.observation_space.task_labels
# TODO: Should the task label space be Sparse[Discrete]? or Discrete?
assert task_label_space == Discrete(nb_tasks)
@pytest.mark.parametrize("dataset_name", ["mnist"])
def test_setting_obs_space_changes_when_transforms_change(self, dataset_name: str):
"""TODO: Test that the `observation_space` property on the
ClassIncrementalSetting reflects the data produced by the dataloaders, and
that changing a transform on a Setting also changes the value of that
property on both the Setting itself, as well as on the corresponding
dataloaders/environments.
"""
import torch
# dataset = ClassIncrementalSetting.available_datasets[dataset_name]
setting = self.Setting(
dataset=dataset_name,
nb_tasks=1,
transforms=[],
train_transforms=[],
val_transforms=[],
test_transforms=[],
batch_size=None,
num_workers=0,
config=Config(device=torch.device("cpu")),
)
base_x_space = type(setting).base_observation_spaces[dataset_name]
assert setting.observation_space.x == base_x_space
# TODO: Should the 'transforms' apply to ALL the environments, and the
# train/valid/test transforms apply only to those envs?
from sequoia.common.transforms import Transforms
from sequoia.common.transforms import Compose
transforms = Compose(
[
Transforms.to_tensor,
Transforms.three_channels,
Transforms.channels_first_if_needed,
Transforms.resize_32x32,
]
)
setting.transforms = transforms
expected_x_space = transforms(base_x_space)
# Check the the `x` property of the setting's observation space has also been transformed:
assert setting.observation_space.x == expected_x_space
# When there are no transforms in setting.train_tansforms, the observation
# space of the Setting and of the train dataloader are the same:
train_env = setting.train_dataloader(batch_size=None, num_workers=None)
assert not setting.train_transforms
assert train_env.observation_space == setting.observation_space
reset_obs = train_env.reset()
assert reset_obs["x"] in train_env.observation_space["x"], reset_obs[0].shape
assert reset_obs["task_labels"] in train_env.observation_space["task_labels"]
assert reset_obs in train_env.observation_space
assert reset_obs in setting.observation_space
assert isinstance(reset_obs, ClassIncrementalSetting.Observations)
# When we add a transform to `setting.train_transforms` the observation
# space of the Setting and of the train dataloader are different:
# NOTE: Transforms should act as the 'base', and train_transforms gets added to it.
setting.train_transforms = [Transforms.resize_64x64]
train_env = setting.train_dataloader(batch_size=None)
assert train_env.f == setting.transforms + setting.train_transforms
assert train_env.observation_space.x.shape == (3, 64, 64)
assert train_env.reset() in train_env.observation_space
# The Setting's property didn't change:
assert setting.observation_space.x.shape == (3, 32, 32)
#
# ---------- Same tests for the val_environment --------------
#
val_env = setting.val_dataloader(batch_size=None)
assert val_env.observation_space == setting.observation_space
assert val_env.reset() in val_env.observation_space
# When we add a transform to `setting.val_transforms` the observation
# space of the Setting and of the val dataloader are different:
setting.val_transforms = [Transforms.resize_64x64]
val_env = setting.val_dataloader(batch_size=None)
assert val_env.observation_space != setting.observation_space
assert val_env.observation_space.x.shape == (3, 64, 64)
assert val_env.reset() in val_env.observation_space
#
# ---------- Same tests for the test_environment --------------
#
with setting.test_dataloader(batch_size=None) as test_env:
if setting.task_labels_at_test_time:
assert test_env.observation_space == setting.observation_space
else:
assert isinstance(test_env.observation_space["task_labels"], Sparse)
obs = test_env.reset()
assert obs in test_env.observation_space
setting.test_transforms = [Transforms.resize_64x64]
with setting.test_dataloader(batch_size=None) as test_env:
# When we add a transform to `setting.test_transforms` the observation
# space of the Setting and of the test dataloader are different:
assert test_env.observation_space != setting.observation_space
assert test_env.observation_space.x.shape == (3, 64, 64)
assert test_env.reset() in test_env.observation_space
# TODO: This renders, even when we're using the pytest-xvfb plugin, which might
# mean that it's actually creating a Display somewhere?
@pytest.mark.timeout(30)
@requires_pyglet
def test_render(config: Config):
setting = ClassIncrementalSetting(dataset="mnist", config=config)
import matplotlib.pyplot as plt
plt.ion()
for task_id in range(setting.nb_tasks):
setting.current_task_id = task_id
env = setting.train_dataloader(batch_size=16, num_workers=0)
obs = env.reset()
done = False
while not done:
obs, rewards, done, info = env.step(env.action_space.sample())
env.render("human")
# break
env.close()
def test_class_incremental_random_baseline():
pass
================================================
FILE: sequoia/settings/sl/incremental/unused_batch_transforms.py
================================================
from dataclasses import dataclass, replace
from functools import partial
from typing import Callable, List, Tuple, Union
import gym
import torch
from gym.wrappers import TransformReward
from simple_parsing import list_field
from torch import Tensor
from sequoia.settings import Observations, Rewards
def relabel(y: Tensor, task_classes: List[int]) -> Tensor:
new_y = torch.zeros_like(y)
for i, label in enumerate(task_classes):
new_y[y == label] = i
return new_y
class RelabelWrapper(TransformReward):
def __init__(self, env: gym.Env, task_classes: List[int]):
self.task_classes = task_classes
super().__init__(env=env, f=partial(relabel, task_classes=self.task_classes))
@dataclass
class RelabelTransform(Callable[[Tuple[Tensor, ...]], Tuple[Tensor, ...]]):
"""Transform that puts labels back into the [0, n_classes_per_task] range.
For instance, if it's given a bunch of images that have labels [2, 3, 2]
and the `task_classes = [2, 3]`, then the new labels will be
`[0, 1, 0]`.
Note that the order in `task_classes` is perserved. For instance, in the
above example, if `task_classes = [3, 2]`, then the new labels would be
`[1, 0, 1]`.
IMPORTANT: This transform needs to be applied BEFORE ReorderTensor or
SplitBatch, because it expects the batch to be (x, y, t) order
"""
task_classes: List[int] = list_field()
def __call__(self, batch: Tuple[Tensor, ...]):
assert isinstance(batch, (list, tuple)), batch
if len(batch) == 2:
observations, rewards = batch
if len(batch) == 1:
return batch
x, y, *task_labels = batch
# if y.max() == len(self.task_classes):
# # No need to relabel this batch.
# # @lebrice: Can we really skip relabeling in this case?
# return batch
new_y = relabel(y, task_classes=self.task_classes)
return (x, new_y, *task_labels)
@dataclass
class ReorderTensors(Callable[[Tuple[Tensor, ...]], Tuple[Tensor, ...]]):
# reorder tensors in the batch so the task labels go into the observations:
# (x, y, t) -> (x, t, y)
# TODO: Change this to:
# (x, y, t) -> ((x, t), y) maybe?
def __call__(self, batch: Tuple[Tensor, ...]):
assert isinstance(batch, (list, tuple))
if len(batch) == 2:
observations, rewards = batch
if isinstance(observations, Observations) and isinstance(rewards, Rewards):
return batch
elif len(batch) == 3:
x, y, *extra_labels = batch
if len(extra_labels) == 1:
task_labels = extra_labels[0]
return (x, task_labels, y)
assert False, batch
@dataclass
class DropTaskLabels(Callable[[Tuple[Tensor, ...]], Tuple[Tensor, ...]]):
def __call__(self, batch: Union[Tuple[Tensor, ...], Observations]):
assert isinstance(batch, (tuple, list))
if len(batch) == 2:
observations, rewards = batch
if isinstance(observations, Observations) and isinstance(rewards, Rewards):
return replace(observations, task_labels=None), rewards
elif len(batch) == 3:
# This is tricky. If we're placed BEFORE the 'ReorderTensors',
# then the ordering is `x, y, t`, while if we're AFTER, the
# ordering would then be 'x, t, y'..
x, v1, v2 = batch
# IDEA: For now, we assume that the 'y' is a lot more erratic than
# the task label. Therefore, the number of unique consecutive should
# be greater for `y` than for `t`.
u1 = len(v1.unique_consecutive())
u2 = len(v2.unique_consecutive())
if u1 > u2:
y, t = v1, v2
elif u1 == u2:
# hmmm wtf?
assert False, (v1, v2, u1, u2)
else:
y, t = v2, v1
return x, y, t
assert False, f"There are no task labels to drop: {batch}"
================================================
FILE: sequoia/settings/sl/multi_task/__init__.py
================================================
from .setting import MultiTaskSLSetting
Observations = MultiTaskSLSetting.Observations
Actions = MultiTaskSLSetting.Actions
Rewards = MultiTaskSLSetting.Rewards
# TODO?
# Environment = MultiTaskSetting.Environment
================================================
FILE: sequoia/settings/sl/multi_task/setting.py
================================================
from dataclasses import dataclass
from typing import ClassVar, Type
from sequoia.settings.sl.task_incremental import TaskIncrementalSLSetting
from sequoia.utils import get_logger
# TODO: Playing around with this 'constant_property' idea as an alternative to the
# init=False of `constant` field.
from sequoia.utils.utils import constant_property
from ..task_incremental.setting import TaskIncrementalSLSetting
from ..traditional.setting import TraditionalSLSetting
logger = get_logger(__name__)
@dataclass
class MultiTaskSLSetting(TaskIncrementalSLSetting, TraditionalSLSetting):
"""IID version of the Task-Incremental Setting, where the data is shuffled.
Can be used to estimate the upper bound performance of Task-Incremental CL Methods.
"""
Results: ClassVar[Type[Results]] = TraditionalSLSetting.Results
stationary_context: bool = constant_property(True)
def __post_init__(self):
super().__post_init__()
# We reuse the training loop from Incremental, by modifying it so it
# discriminates between "phases" and "tasks".
@property
def phases(self) -> int:
return 1
# def _make_train_dataset(self) -> Dataset:
# """ Returns the training dataset, which in this case will be shuffled.
# IDEA: We could probably do it the same way in both RL and SL:
# 1. Create the 'datasets' for all the tasks;
# 2. "concatenate"+"Shuffle" the "datasets":
# - in SL: ConcatDataset / shuffle the datasets
# - in RL: Create a true `MultiTaskEnvironment` that accepts a list of envs as
# an input and alternates between environments at each episode.
# (either round-robin style, or randomly)
# Returns
# -------
# Dataset
# """
# joined_dataset = concat(self.train_datasets)
# return shuffle(joined_dataset, seed=self.config.seed)
# def _make_val_dataset(self) -> Dataset:
# joined_dataset = concat(self.val_datasets)
# return shuffle(joined_dataset, seed=self.config.seed)
# def _make_test_dataset(self) -> Dataset:
# return concat(self.test_datasets)
# def train_dataloader(
# self, batch_size: int = None, num_workers: int = None
# ) -> PassiveEnvironment:
# """Returns a DataLoader for the training dataset.
# This dataloader will yield batches which will very likely contain data from
# multiple different tasks, and will contain task labels.
# Parameters
# ----------
# batch_size : int, optional
# Batch size to use. Defaults to None, in which case the value of
# `self.batch_size` is used.
# num_workers : int, optional
# Number of workers to use. Defaults to None, in which case the value of
# `self.num_workers` is used.
# Returns
# -------
# PassiveEnvironment
# A "Passive" Dataloader/gym.Env.
# """
# return super().train_dataloader(batch_size=batch_size, num_workers=num_workers)
# def val_dataloader(
# self, batch_size: int = None, num_workers: int = None
# ) -> PassiveEnvironment:
# """Returns a DataLoader for the validation dataset.
# This dataloader will yield batches which will very likely contain data from
# multiple different tasks, and will contain task labels.
# Parameters
# ----------
# batch_size : int, optional
# Batch size to use. Defaults to None, in which case the value of
# `self.batch_size` is used.
# num_workers : int, optional
# Number of workers to use. Defaults to None, in which case the value of
# `self.num_workers` is used.
# Returns
# -------
# PassiveEnvironment
# A "Passive" Dataloader/gym.Env.
# """
# return super().val_dataloader(batch_size=batch_size, num_workers=num_workers)
# def test_dataloader(
# self, batch_size: int = None, num_workers: int = None
# ) -> PassiveEnvironment:
# """Returns a DataLoader for the test dataset.
# This dataloader will yield batches which will very likely contain data from
# multiple different tasks, and will contain task labels.
# Unlike the train and validation environments, the test environment will not
# yield rewards until the action has been sent to it using either `send` (when
# iterating in the DataLoader-style) or `step` (when interacting with the
# environment in the gym.Env style). For more info, take a look at the
# `PassiveEnvironment` class.
# Parameters
# ----------
# batch_size : int, optional
# Batch size to use. Defaults to None, in which case the value of
# `self.batch_size` is used.
# num_workers : int, optional
# Number of workers to use. Defaults to None, in which case the value of
# `self.num_workers` is used.
# Returns
# -------
# PassiveEnvironment
# A "Passive" Dataloader/gym.Env.
# """
# return super().test_dataloader(batch_size=batch_size, num_workers=num_workers)
# def test_loop(self, method: Method) -> "IncrementalAssumption.Results":
# """ Runs a multi-task test loop and returns the Results.
# """
# return super().test_loop(method)
# # TODO:
# test_env = self.test_dataloader()
# try:
# # If the Method has `test` defined, use it.
# method.test(test_env)
# test_env.close()
# # Get the metrics from the test environment
# test_results: Results = test_env.get_results()
# print(f"Test results: {test_results}")
# return test_results
# except NotImplementedError:
# logger.info(
# f"Will query the method for actions at each step, "
# f"since it doesn't implement a `test` method."
# )
# obs = test_env.reset()
# # TODO: Do we always have a maximum number of steps? or of episodes?
# # Will it work the same for Supervised and Reinforcement learning?
# max_steps: int = getattr(test_env, "step_limit", None)
# # Reset on the last step is causing trouble, since the env is closed.
# pbar = tqdm.tqdm(itertools.count(), total=max_steps, desc="Test")
# episode = 0
# for step in pbar:
# if test_env.is_closed():
# logger.debug(f"Env is closed")
# break
# # logger.debug(f"At step {step}")
# action = method.get_actions(obs, test_env.action_space)
# # logger.debug(f"action: {action}")
# # TODO: Remove this:
# if isinstance(action, Actions):
# action = action.y_pred
# if isinstance(action, Tensor):
# action = action.cpu().numpy()
# obs, reward, done, info = test_env.step(action)
# if done and not test_env.is_closed():
# # logger.debug(f"end of test episode {episode}")
# obs = test_env.reset()
# episode += 1
# test_env.close()
# test_results = test_env.get_results()
# return test_results
================================================
FILE: sequoia/settings/sl/multi_task/setting_test.py
================================================
"""
TODO: Tests for the multi-task SL setting.
- Has only one train/test 'phase'
- The nb_tasks attribute should still reflect the number of tasks.
- on_task_switch should never be called during training
- (not so sure during testing)
- Task labels should be available for both training and testing.
- Classes shouldn't be relabeled.
"""
import dataclasses
import itertools
import numpy as np
import pytest
import torch
from gym.spaces import Discrete
from sequoia.common.spaces import Image, TypedDictSpace
from sequoia.settings import Actions, Environment
from .setting import MultiTaskSLSetting
def check_is_multitask_env(env: Environment, has_rewards: bool):
# dataloader-style:
for i, (observations, rewards) in itertools.islice(enumerate(env), 10):
assert isinstance(observations, MultiTaskSLSetting.Observations)
task_labels = observations.task_labels.cpu().tolist()
assert len(set(task_labels)) > 1
if has_rewards:
assert isinstance(rewards, MultiTaskSLSetting.Rewards)
# Check that there is no relabelling happening, by checking that there are
# more different y's then there are usually classes in each batch.
assert len(set(rewards.y.cpu().tolist())) > 2
else:
assert rewards is None
# gym-style interaction:
obs = env.reset()
assert isinstance(env.observation_space, TypedDictSpace)
space_shapes = {k: s.shape for k, s in env.observation_space.spaces.items()}
space_dtypes = {k: s.dtype for k, s in env.observation_space.spaces.items()}
# assert False, (obs.keys(), obs.numpy().keys())
assert obs.shapes == space_shapes
assert obs.numpy().shapes == space_shapes
assert obs.dtypes == space_dtypes
x_space = env.observation_space.x
t_space = env.observation_space.task_labels
assert obs.x in x_space, (obs.x, x_space)
assert obs.task_labels in t_space, (obs.task_labels, t_space)
assert isinstance(obs, env.observation_space.dtype)
assert obs in env.observation_space
done = False
steps = 0
while not done and steps < 10:
action = Actions(y_pred=torch.randint(10, [env.batch_size]))
# BUG: convert_tensors seems to be causing issues again: We shouldn't have
# to manually convert obs to numpy before checking `obs in obs_space`.
# TODO: Also not super clean that we can't just do `action in action_space`.
# assert action.numpy() in env.action_space
assert action.y_pred.numpy() in env.action_space
obs, reward, done, info = env.step(action)
assert obs.numpy() in env.observation_space
assert reward.y in env.reward_space
steps += 1
assert done is False
assert steps == 10
from sequoia.common.config import Config
def test_multitask_setting(config: Config):
config = dataclasses.replace(config, device=torch.device("cpu"))
setting = MultiTaskSLSetting(dataset="mnist", config=config)
assert setting.phases == 1
assert setting.nb_tasks == 5
from sequoia.common.spaces.image import ImageTensorSpace
from sequoia.common.spaces.tensor_spaces import TensorDiscrete
assert setting.observation_space == TypedDictSpace(
x=ImageTensorSpace(0.0, 1.0, (3, 28, 28), np.float32, device=config.device),
task_labels=TensorDiscrete(5, device=config.device),
dtype=setting.Observations,
)
assert setting.action_space == Discrete(10)
# assert setting.config.device.type == "cuda" if torch.cuda.is_available() else "cpu"
with setting.train_dataloader(batch_size=32, num_workers=0) as train_env:
check_is_multitask_env(train_env, has_rewards=True)
with setting.val_dataloader(batch_size=32, num_workers=0) as val_env:
check_is_multitask_env(val_env, has_rewards=True)
@pytest.mark.xfail(reason="test environments still operate in a 'sequential tasks' way")
def test_multitask_setting_test_env():
setting = MultiTaskSLSetting(dataset="mnist")
assert setting.phases == 1
assert setting.nb_tasks == 5
assert setting.observation_space == TypedDictSpace(
x=Image(0.0, 1.0, (3, 28, 28), np.float32), task_labels=Discrete(5)
)
assert setting.action_space == Discrete(10)
# FIXME: Wait, actually, this test environment, will it be shuffled, or not?
with setting.test_dataloader(batch_size=32, num_workers=0) as test_env:
check_is_multitask_env(test_env, has_rewards=False)
from sequoia.settings.assumptions.incremental_test import DummyMethod
def test_on_task_switch_is_called_multi_task():
setting = MultiTaskSLSetting(
dataset="mnist",
nb_tasks=5,
# train_steps_per_task=100,
# max_steps=500,
# test_steps_per_task=100,
train_transforms=[],
test_transforms=[],
val_transforms=[],
)
method = DummyMethod()
results = setting.apply(method)
assert method.n_task_switches == setting.nb_tasks
assert method.received_task_ids == list(range(setting.nb_tasks))
assert method.received_while_training == [False for _ in range(setting.nb_tasks)]
================================================
FILE: sequoia/settings/sl/setting.py
================================================
from dataclasses import dataclass
from typing import ClassVar, Dict, List, Type, TypeVar
from pytorch_lightning import LightningDataModule
from simple_parsing import choice, list_field
from torch import Tensor
from sequoia.common.transforms import Transforms
from sequoia.settings import Setting
from sequoia.settings.base.environment import ActionType, ObservationType, RewardType
from .environment import PassiveEnvironment
@dataclass
class SLSetting(Setting[PassiveEnvironment[ObservationType, ActionType, RewardType]]):
"""Supervised Learning Setting.
Core assuptions:
- Current actions have no influence on future observations.
- The environment gives back "dense feedback", (the 'reward' associated with all
possible actions at each step, rather than a single action)
For example, supervised learning is a Passive setting, since predicting a
label has no effect on the reward you're given (the label) or on the next
samples you observe.
"""
@dataclass(frozen=True)
class Observations(Setting.Observations):
x: Tensor
@dataclass(frozen=True)
class Actions(Setting.Actions):
pass
@dataclass(frozen=True)
class Rewards(Setting.Rewards):
pass
Environment: ClassVar[Type[PassiveEnvironment]] = PassiveEnvironment
# TODO: rename/remove this, as it isn't used, and there could be some
# confusion with the available_datasets in task-incremental and iid.
# Also, since those are already LightningDataModules, what should we do?
available_datasets: ClassVar[Dict[str, Type[LightningDataModule]]] = {
# "mnist": MNISTDataModule,
# "fashion_mnist": FashionMNISTDataModule,
# "cifar10": CIFAR10DataModule,
# "imagenet": ImagenetDataModule,
}
# Which setup / dataset to use.
# The setups/dataset are implemented as `LightningDataModule`s.
dataset: str = choice(available_datasets.keys(), default="mnist")
# Transforms to be applied to the observatons of the train/valid/test
# environments.
transforms: List[Transforms] = list_field()
# Transforms to be applied to the training datasets.
train_transforms: List[Transforms] = list_field(Transforms.to_tensor, Transforms.three_channels)
# Transforms to be applied to the validation datasets.
val_transforms: List[Transforms] = list_field(Transforms.to_tensor, Transforms.three_channels)
# Transforms to be applied to the testing datasets.
test_transforms: List[Transforms] = list_field(Transforms.to_tensor, Transforms.three_channels)
# Wether to drop the last batch (during training). Useful if you use batchnorm, to
# avoid having an error when the batch_size is 1.
drop_last: bool = False
SettingType = TypeVar("SettingType", bound=SLSetting)
================================================
FILE: sequoia/settings/sl/task_incremental/__init__.py
================================================
""" Task Incremental Setting
Adds the additional assumption that the task labels are available at test time.
"""
# 1. Import stuff from the Parent
# NOTE: Here there doesn't seem to be a need for a custom 'Results' class for
# TaskIncremental, given how similar it is to ClassIncremental.
# 2. Import what we overwrite/customize
from .setting import TaskIncrementalSLSetting
================================================
FILE: sequoia/settings/sl/task_incremental/setting.py
================================================
""" Defines the Task-Incremental CL Setting.
Task-Incremental CL is a variant of the ClassIncrementalSetting with task labels
available at both train and test time.
"""
from dataclasses import dataclass
from typing import ClassVar, Type, TypeVar
from sequoia.settings.assumptions.task_incremental import TaskIncrementalAssumption
from sequoia.settings.sl.incremental import IncrementalSLResults as TaskIncrementalSLResults
from sequoia.settings.sl.incremental import IncrementalSLSetting
from sequoia.utils.utils import constant
@dataclass
class TaskIncrementalSLSetting(TaskIncrementalAssumption, IncrementalSLSetting):
"""Setting where data arrives in a series of Tasks, and where the task
labels are always available (both train and test time).
"""
Results: ClassVar[Type[Results]] = TaskIncrementalSLResults
# Wether task labels are available at train time. (Forced to True.)
task_labels_at_train_time: bool = constant(True)
# Wether task labels are available at test time.
# TODO: Is this really always True for all Task-Incremental Settings?
task_labels_at_test_time: bool = constant(True)
SettingType = TypeVar("SettingType", bound=TaskIncrementalSLSetting)
================================================
FILE: sequoia/settings/sl/task_incremental/setting_test.py
================================================
import itertools
import math
from typing import *
import pytest
from sequoia.common.config import Config
from sequoia.settings.assumptions.incremental_test import OtherDummyMethod
from sequoia.utils.logging_utils import get_logger
from ..incremental.setting_test import TestIncrementalSLSetting as IncrementalSLSettingTests
from .setting import TaskIncrementalSLSetting
logger = get_logger(__name__)
class TestTaskIncrementalSLSetting(IncrementalSLSettingTests):
Setting: ClassVar[Type[Setting]] = TaskIncrementalSLSetting
fast_dev_run_kwargs: ClassVar[Dict[str, Any]] = dict(
dataset="mnist",
batch_size=64,
)
def check_only_right_classes_present(setting: TaskIncrementalSLSetting):
"""Checks that only the classes within each task are present.
TODO: This should be refactored to be based more on the reward space.
"""
assert setting.task_labels_at_test_time and setting.task_labels_at_test_time
for i in range(setting.nb_tasks):
setting.current_task_id = i
batch_size = 5
train_loader = setting.train_dataloader(batch_size=batch_size)
# get the classes in the current task:
task_classes = setting.task_classes(i, train=True)
for j, (observations, rewards) in enumerate(itertools.islice(train_loader, 100)):
x = observations.x
t = observations.task_labels
if setting.task_labels_at_train_time:
assert t is not None
y = rewards.y
print(i, j, y, t)
y_in_task_classes = [y_i in task_classes for y_i in y.tolist()]
assert all(y_in_task_classes)
assert x.shape == (batch_size, 3, 28, 28)
x = x.permute(0, 2, 3, 1)[0]
assert x.shape == (28, 28, 3)
reward = train_loader.send([4 for _ in range(batch_size)])
if rewards is not None:
# IF we send somethign to the env, then it should give back the same
# labels as for the last batch.
assert (reward.y == rewards.y).all()
train_loader.close()
valid_loader = setting.val_dataloader(batch_size=batch_size)
for j, (observations, rewards) in enumerate(itertools.islice(valid_loader, 100)):
x = observations.x
t = observations.task_labels
if setting.monitor_training_performance:
assert rewards is None
if setting.task_labels_at_train_time:
assert t is not None
y = rewards.y
print(i, j, y, t)
y_in_task_classes = [y_i in task_classes for y_i in y.tolist()]
assert all(y_in_task_classes)
assert x.shape == (batch_size, 3, 28, 28)
x = x.permute(0, 2, 3, 1)[0]
assert x.shape == (28, 28, 3)
reward = valid_loader.send(valid_loader.action_space.sample())
if rewards is not None:
# IF we send somethign to the env, then it should give back the same
# labels as for the last batch.
assert (reward.y == rewards.y).all()
valid_loader.close()
# FIXME: get the classes in the current task, at test-time.
task_classes = list(range(setting.reward_space.n))
test_loader = setting.test_dataloader(batch_size=batch_size)
assert not test_loader.unwrapped._hide_task_labels
for j, (observations, rewards) in enumerate(itertools.islice(test_loader, 100)):
x = observations.x
t = observations.task_labels
if setting.task_labels_at_test_time:
assert t is not None
if rewards is None:
rewards = test_loader.send(test_loader.action_space.sample())
assert rewards is not None
assert rewards.y is not None
y = rewards.y
print(i, j, y, t)
y_in_task_classes = [y_i in task_classes for y_i in y.tolist()]
assert all(y_in_task_classes)
assert x.shape == (batch_size, 3, 28, 28)
x = x.permute(0, 2, 3, 1)[0]
assert x.shape == (28, 28, 3)
test_loader.close()
def test_task_incremental_mnist_setup():
setting = TaskIncrementalSLSetting(
dataset="mnist",
increment=2,
# BUG: When num_workers > 0, some of the tests hang, but only when running *all* the tests!
# num_workers=0,
)
assert setting.task_labels_at_test_time and setting.task_labels_at_train_time
setting.prepare_data(data_dir="data")
setting.setup()
check_only_right_classes_present(setting)
@pytest.mark.xfail(
reason=(
"TODO: Continuum actually re-labels the images to 0-10, regardless of the "
"class order. The actual images are ok though."
)
)
def test_task_incremental_mnist_setup_reversed_class_order():
setting = TaskIncrementalSLSetting(
dataset="mnist",
nb_tasks=5,
class_order=list(reversed(range(10))),
# num_workers=0,
)
assert setting.task_labels_at_train_time and setting.task_labels_at_test_time
assert (
setting.known_task_boundaries_at_train_time and setting.known_task_boundaries_at_test_time
)
setting.prepare_data(data_dir="data")
setting.setup()
check_only_right_classes_present(setting)
def test_class_incremental_mnist_setup_with_nb_tasks():
setting = TaskIncrementalSLSetting(
dataset="mnist",
nb_tasks=2,
num_workers=0,
)
assert setting.increment == 5
setting.prepare_data(data_dir="data")
setting.setup()
assert len(setting.train_datasets) == 2
assert len(setting.val_datasets) == 2
assert len(setting.test_datasets) == 2
check_only_right_classes_present(setting)
def test_action_space_always_matches_obs_batch_size(config: Config):
"""Make sure that the batch size in the observations always matches the action
space provided to the `get_actions` method.
ALSO:
- Make sure that we get asked for actions for all the observations in the test set,
even when there is a shorter last batch.
- The total number of observations match the dataset size.
"""
nb_tasks = 5
# TODO: The `drop_last` argument seems to not be used correctly by the dataloaders / test loop.
batch_size = 128
# HUH why are we doing this here?
setting = TaskIncrementalSLSetting(
dataset="mnist",
nb_tasks=nb_tasks,
batch_size=batch_size,
num_workers=4,
monitor_training_performance=True,
drop_last=False,
)
# 10_000 examples in the test dataset of mnist.
total_samples = len(setting.test_dataloader().dataset)
method = OtherDummyMethod()
_ = setting.apply(method, config=config)
# Multiply by nb_tasks because the test loop is ran after each training task.
assert sum(method.batch_sizes) == total_samples * nb_tasks
assert len(method.batch_sizes) == math.ceil(total_samples / batch_size) * nb_tasks
if total_samples % batch_size == 0:
assert set(method.batch_sizes) == {batch_size}
else:
assert set(method.batch_sizes) == {batch_size, total_samples % batch_size}
================================================
FILE: sequoia/settings/sl/traditional/__init__.py
================================================
# 1. Import stuff from the Parent
# 2. Import what we overwrite/customize
from .results import IIDResults
from .setting import TraditionalSLSetting
================================================
FILE: sequoia/settings/sl/traditional/results.py
================================================
"""Defines the Results of apply a Method to an IID Setting.
"""
from pathlib import Path
from typing import Dict, Union
import matplotlib.pyplot as plt
from sequoia.settings.sl.incremental.results import IncrementalSLResults
class IIDResults(IncrementalSLResults):
"""Results of applying a Method on an IID Setting.
# TODO: Refactor this to be based on `TaskResults`?
"""
def save_to_dir(self, save_dir: Union[str, Path]) -> None:
# TODO: Add wandb logging here somehow.
save_dir = Path(save_dir)
save_dir.mkdir(exist_ok=True, parents=True)
plots: Dict[str, plt.Figure] = self.make_plots()
# Save the actual 'results' object to a file in the save dir.
results_json_path = save_dir / "results.json"
self.save(results_json_path)
print(f"Saved a copy of the results to {results_json_path}")
print(f"\nPlots: {plots}\n")
for fig_name, figure in plots.items():
print(f"fig_name: {fig_name}")
# figure.show()
# plt.waitforbuttonpress(10)
path = (save_dir / fig_name).with_suffix(".jpg")
path.parent.mkdir(exist_ok=True, parents=True)
figure.savefig(path)
print(f"Saved figure at path {path}")
def make_plots(self) -> Dict[str, plt.Figure]:
plots_dict = super().make_plots()
# TODO: Could add a Confusion Matrix plot?
plots_dict.update({"class_accuracies": self.class_accuracies_plot()})
return plots_dict
def class_accuracies_plot(self):
figure: plt.Figure
axes: plt.Axes
figure, axes = plt.subplots()
y = self[0][0].average_metrics.class_accuracy
x = list(range(len(y)))
rects = axes.bar(x, y)
axes.set_title("Class Accuracy")
axes.set_xlabel("Class")
axes.set_ylabel("Accuracy")
axes.set_ylim(0, 1.0)
# autolabel(axes, rects)
return figure
# def summary(self) -> str:
# s = StringIO()
# with redirect_stdout(s):
# print(f"Average Accuracy: {self.average_metrics.accuracy:.2%}")
# for i, class_acc in enumerate(self.average_metrics.class_accuracy):
# print(f"Accuracy for class {i}: {class_acc:.3%}")
# s.seek(0)
# return s.read()
def to_log_dict(self, verbose: bool = False) -> Dict[str, float]:
results = super().to_log_dict(verbose=verbose)
# Remove the useless 2-levels of nesting from the log_dict
results.update(results.pop("Task 0").pop("Task 0"))
# assert False, json.dumps(results, indent="\t")
return results
================================================
FILE: sequoia/settings/sl/traditional/setting.py
================================================
""" Defines the TraditionalSLSetting, as a variant of the TaskIncremental setting with
only one task.
"""
from dataclasses import dataclass
from typing import ClassVar, List, Optional, Type, TypeVar, Union
from sequoia.utils.utils import constant
# TODO: Re-arrange the 'multiple-inheritance' with domain-incremental and
# task-incremental, this might not be 100% accurate, as the "IID" you get from
# moving down from domain-incremental (+ only one task) might not be exactly the same as
# the one you get form TaskIncremental (+ only one task)
from ..incremental import IncrementalSLSetting
from .results import IIDResults
# TODO: IDEA: Add the pytorch lightning datamodules in the list of
# 'available datasets' for the IID setting, and make sure that it doesn't mess
# up the methods in the parents (train/val loop, dataloader construction, etc.)
# IDEA: Maybe overwrite the 'train/val/test_dataloader' methods on the setting
# and when the chosen dataset is a LightnignDataModule, then just return the
# result from the corresponding method on the LightningDataModule, rather than
# from super().
# from pl_bolts.datamodules import (CIFAR10DataModule, FashionMNISTDataModule,
# ImagenetDataModule, MNISTDataModule)
@dataclass
class TraditionalSLSetting(IncrementalSLSetting):
"""Your 'usual' supervised learning Setting, where the samples are i.i.d.
This Setting is slightly different than the others, in that it can be recovered in
*two* different ways:
- As a variant of Task-Incremental learning, but where there is only one task;
- As a variant of Domain-Incremental learning, but where there is only one task.
"""
Results: ClassVar[Type[Results]] = IIDResults
# Number of tasks.
nb_tasks: int = 5
stationary_context: bool = constant(True)
# increment: Union[int, List[int]] = constant(None)
# A different task size applied only for the first task.
# Desactivated if `increment` is a list.
initial_increment: int = constant(None)
# An optional custom class order, used for NC.
class_order: Optional[List[int]] = constant(None)
# Either number of classes per task, or a list specifying for
# every task the amount of new classes (defaults to the value of
# `increment`).
test_increment: Optional[Union[List[int], int]] = constant(None)
# A different task size applied only for the first test task.
# Desactivated if `test_increment` is a list. Defaults to the
# value of `initial_increment`.
test_initial_increment: Optional[int] = constant(None)
# An optional custom class order for testing, used for NC.
# Defaults to the value of `class_order`.
test_class_order: Optional[List[int]] = constant(None)
@property
def phases(self) -> int:
"""The number of training 'phases', i.e. how many times `method.fit` will be
called.
Defaults to the number of tasks, but may be different, for instance in so-called
Multi-Task Settings, this is set to 1.
"""
return 1 if self.stationary_context else self.nb_tasks
SettingType = TypeVar("SettingType", bound=TraditionalSLSetting)
if __name__ == "__main__":
TraditionalSLSetting.main()
================================================
FILE: sequoia/settings/sl/traditional/setting_test.py
================================================
import pytest
from sequoia.methods import Method
from sequoia.settings import (
ClassIncrementalSetting,
DomainIncrementalSLSetting,
TaskIncrementalSLSetting,
)
from ..continual.setting import ContinualSLSetting
from ..discrete.setting import DiscreteTaskAgnosticSLSetting
from ..incremental.setting import IncrementalSLSetting
from ..multi_task.setting import MultiTaskSLSetting
from .setting import TraditionalSLSetting
class ContinualSLMethod(Method, target_setting=ContinualSLSetting):
pass
class DiscreteTaskAgnosticSLMethod(Method, target_setting=DiscreteTaskAgnosticSLSetting):
pass
class IncrementalSLMethod(Method, target_setting=IncrementalSLSetting):
pass
class ClassIncrementalSLMethod(Method, target_setting=ClassIncrementalSetting):
pass
class DomainIncrementalSLMethod(Method, target_setting=DomainIncrementalSLSetting):
pass
class TaskIncrementalSLMethod(Method, target_setting=TaskIncrementalSLSetting):
pass
class TraditionalSLMethod(Method, target_setting=TraditionalSLSetting):
pass
class MultiTaskSLMethod(Method, target_setting=MultiTaskSLSetting):
pass
def test_methods_applicable_to_iid_setting():
"""Test to make sure that Methods that are applicable to the Domain-Incremental
are applicable to the IID Setting, same for those targetting the Task-Incremental
setting.
"""
assert ContinualSLMethod.is_applicable(ContinualSLSetting)
assert ContinualSLMethod.is_applicable(DiscreteTaskAgnosticSLSetting)
assert ContinualSLMethod.is_applicable(IncrementalSLSetting)
assert ContinualSLMethod.is_applicable(ClassIncrementalSetting)
assert ContinualSLMethod.is_applicable(TaskIncrementalSLSetting)
assert ContinualSLMethod.is_applicable(DomainIncrementalSLSetting)
assert ContinualSLMethod.is_applicable(TraditionalSLSetting)
assert ContinualSLMethod.is_applicable(MultiTaskSLSetting)
assert not DiscreteTaskAgnosticSLMethod.is_applicable(ContinualSLSetting)
assert DiscreteTaskAgnosticSLMethod.is_applicable(DiscreteTaskAgnosticSLSetting)
assert DiscreteTaskAgnosticSLMethod.is_applicable(IncrementalSLSetting)
assert DiscreteTaskAgnosticSLMethod.is_applicable(ClassIncrementalSetting)
assert DiscreteTaskAgnosticSLMethod.is_applicable(TaskIncrementalSLSetting)
assert DiscreteTaskAgnosticSLMethod.is_applicable(DomainIncrementalSLSetting)
assert DiscreteTaskAgnosticSLMethod.is_applicable(TraditionalSLSetting)
assert DiscreteTaskAgnosticSLMethod.is_applicable(MultiTaskSLSetting)
assert not IncrementalSLMethod.is_applicable(ContinualSLSetting)
assert not IncrementalSLMethod.is_applicable(DiscreteTaskAgnosticSLSetting)
assert IncrementalSLMethod.is_applicable(IncrementalSLSetting)
assert IncrementalSLMethod.is_applicable(ClassIncrementalSetting)
assert IncrementalSLMethod.is_applicable(TaskIncrementalSLSetting)
assert IncrementalSLMethod.is_applicable(DomainIncrementalSLSetting)
assert IncrementalSLMethod.is_applicable(TraditionalSLSetting)
assert IncrementalSLMethod.is_applicable(MultiTaskSLSetting)
assert not ClassIncrementalSLMethod.is_applicable(ContinualSLSetting)
assert not ClassIncrementalSLMethod.is_applicable(DiscreteTaskAgnosticSLSetting)
assert ClassIncrementalSLMethod.is_applicable(IncrementalSLSetting)
assert ClassIncrementalSLMethod.is_applicable(ClassIncrementalSetting)
assert ClassIncrementalSLMethod.is_applicable(TaskIncrementalSLSetting)
assert ClassIncrementalSLMethod.is_applicable(DomainIncrementalSLSetting)
assert ClassIncrementalSLMethod.is_applicable(TraditionalSLSetting)
assert ClassIncrementalSLMethod.is_applicable(MultiTaskSLSetting)
assert not TaskIncrementalSLMethod.is_applicable(ContinualSLSetting)
assert not TaskIncrementalSLMethod.is_applicable(DiscreteTaskAgnosticSLSetting)
assert not TaskIncrementalSLMethod.is_applicable(IncrementalSLSetting)
assert not TaskIncrementalSLMethod.is_applicable(ClassIncrementalSetting)
assert TaskIncrementalSLMethod.is_applicable(TaskIncrementalSLSetting)
assert not TaskIncrementalSLMethod.is_applicable(DomainIncrementalSLSetting)
assert not TaskIncrementalSLMethod.is_applicable(TraditionalSLSetting)
assert TaskIncrementalSLMethod.is_applicable(MultiTaskSLSetting)
assert not DomainIncrementalSLMethod.is_applicable(ContinualSLSetting)
assert not DomainIncrementalSLMethod.is_applicable(DiscreteTaskAgnosticSLSetting)
assert not DomainIncrementalSLMethod.is_applicable(IncrementalSLSetting)
assert not DomainIncrementalSLMethod.is_applicable(ClassIncrementalSetting)
assert not DomainIncrementalSLMethod.is_applicable(TaskIncrementalSLSetting)
assert DomainIncrementalSLMethod.is_applicable(DomainIncrementalSLSetting)
assert not DomainIncrementalSLMethod.is_applicable(TraditionalSLSetting)
# TODO: What about this one?
# assert DomainIncrementalSLMethod.is_applicable(MultiTaskSLSetting)
assert not TraditionalSLMethod.is_applicable(ContinualSLSetting)
assert not TraditionalSLMethod.is_applicable(DiscreteTaskAgnosticSLSetting)
assert not TraditionalSLMethod.is_applicable(IncrementalSLSetting)
assert not TraditionalSLMethod.is_applicable(TaskIncrementalSLSetting)
assert not TraditionalSLMethod.is_applicable(DomainIncrementalSLSetting)
assert not TraditionalSLMethod.is_applicable(ClassIncrementalSetting)
assert TraditionalSLMethod.is_applicable(TraditionalSLSetting)
assert TraditionalSLMethod.is_applicable(MultiTaskSLSetting)
assert not MultiTaskSLMethod.is_applicable(ContinualSLSetting)
assert not MultiTaskSLMethod.is_applicable(DiscreteTaskAgnosticSLSetting)
assert not MultiTaskSLMethod.is_applicable(IncrementalSLSetting)
assert not MultiTaskSLMethod.is_applicable(TaskIncrementalSLSetting)
assert not MultiTaskSLMethod.is_applicable(DomainIncrementalSLSetting)
assert not MultiTaskSLMethod.is_applicable(ClassIncrementalSetting)
assert not MultiTaskSLMethod.is_applicable(TraditionalSLSetting)
assert MultiTaskSLMethod.is_applicable(MultiTaskSLSetting)
def test_get_parents():
# TODO: THis is a bit funky, now that Class-Incremental is a "pointer" to
# Incremental, and Traditional has been moved under TaskIncremental
assert TraditionalSLSetting in IncrementalSLSetting.get_children()
assert TraditionalSLSetting not in TaskIncrementalSLSetting.get_children()
assert TraditionalSLSetting in IncrementalSLSetting.immediate_children()
assert TaskIncrementalSLSetting not in TraditionalSLSetting.parents()
assert ClassIncrementalSetting in TaskIncrementalSLSetting.immediate_parents()
assert TaskIncrementalSLSetting not in TraditionalSLSetting.get_parents()
assert ClassIncrementalSetting in TraditionalSLSetting.get_parents()
assert TraditionalSLSetting not in TraditionalSLSetting.get_parents()
@pytest.mark.xfail(reason="Temporarily removing the domain-incremental<--traditional link.")
def test_get_parents_domain_incremental():
assert TraditionalSLSetting in DomainIncrementalSLSetting.get_children()
assert DomainIncrementalSLSetting in TraditionalSLSetting.get_immediate_parents()
@pytest.mark.xfail(reason="Temporarily removing the domain-incremental<--traditional link.")
def test_method_applicability_domain_incremental():
assert not DomainIncrementalSLMethod.is_applicable(ClassIncrementalSetting)
assert not DomainIncrementalSLMethod.is_applicable(TaskIncrementalSLSetting)
assert DomainIncrementalSLMethod.is_applicable(DomainIncrementalSLSetting)
assert DomainIncrementalSLMethod.is_applicable(TraditionalSLSetting)
@pytest.mark.xfail(reason="Temporarily removing the domain-incremental<--traditional link.")
def test_get_parents_domain_incremental():
assert DomainIncrementalSLSetting in TraditionalSLSetting.get_parents()
================================================
FILE: sequoia/settings/sl/wrappers/__init__.py
================================================
""" Module defining gym wrappers that are specific to SL Environments.
"""
from .measure_performance import MeasureSLPerformanceWrapper
================================================
FILE: sequoia/settings/sl/wrappers/measure_performance.py
================================================
""" TODO: Create a Wrapper that measures performance over the first epoch of training in SL.
Then maybe after we can make something more general that also works for RL.
"""
import warnings
from collections import defaultdict
""" Wrapper that gets applied onto the environment in order to measure the online
training performance.
TODO: Move this somewhere more appropriate. There's also the RL version of the wrapper
here.
"""
from typing import Dict, Iterator, Optional, Tuple
import numpy as np
from gym.utils import colorize
from torch import Tensor
import wandb
from sequoia.common.gym_wrappers.measure_performance import MeasurePerformanceWrapper
from sequoia.common.metrics import ClassificationMetrics, Metrics
from sequoia.settings.base import Actions, Observations, Rewards
from sequoia.settings.sl.environment import PassiveEnvironment
from sequoia.utils.utils import add_prefix
class MeasureSLPerformanceWrapper(
MeasurePerformanceWrapper,
# MeasurePerformanceWrapper[PassiveEnvironment] # Python 3.7
# MeasurePerformanceWrapper[PassiveEnvironment, ClassificationMetrics] # Python 3.8+
):
def __init__(
self,
env: PassiveEnvironment,
first_epoch_only: bool = False,
wandb_prefix: str = None,
):
super().__init__(env)
# Metrics mapping from step to the metrics at that step.
self._metrics: Dict[int, ClassificationMetrics] = defaultdict(Metrics)
self.first_epoch_only = first_epoch_only
self.wandb_prefix = wandb_prefix
# Counter for the number of steps.
self._steps: int = 0
assert isinstance(self.env.unwrapped, PassiveEnvironment)
if not self.env.unwrapped.pretend_to_be_active:
warnings.warn(
RuntimeWarning(
colorize(
"Your online performance "
+ ("during the first epoch " if self.first_epoch_only else "")
+ "on this environment will be monitored! "
"Since this env is Passive, i.e. a Supervised Learning "
"DataLoader, the Rewards (y) will be withheld until "
"actions are passed to the 'send' method. Make sure that "
"your training loop can handle this small tweak.",
color="yellow",
)
)
)
self.env.unwrapped.pretend_to_be_active = True
self.__epochs = 0
def reset(self) -> Observations:
return self.env.reset()
@property
def in_evaluation_period(self) -> bool:
if self.first_epoch_only:
# TODO: Double-check the iteraction of IterableDataset and __len__
return self.__epochs == 0
return True
def step(self, action: Actions):
observation, reward, done, info = self.env.step(action)
# TODO: Make this wrapper task-aware, using the task ids in this `observation`?
if self.in_evaluation_period:
# TODO: Edge case, but we also need the prediction for the last batch to be
# counted.
self._metrics[self._steps] += self.get_metrics(action, reward)
elif self.first_epoch_only:
# If we are at the last batch in the first epoch, we still keep the metrics
# for that batch, even though we're technically not in the first epoch
# anymore.
# TODO: CHeck the length through the dataset? or through a more 'clean' way
# e.g. through the `max_steps` property of a TimeLimit wrapper or something?
num_batches = len(self.unwrapped.dataset) // self.batch_size
if not self.unwrapped.drop_last:
num_batches += 1 if len(self.unwrapped.dataset) % self.batch_size else 0
# currently_at_last_batch = self._steps == num_batches - 1
currently_at_last_batch = self._steps == num_batches - 1
if self.__epochs == 1 and currently_at_last_batch:
self._metrics[self._steps] += self.get_metrics(action, reward)
self._steps += 1
return observation, reward, done, info
def send(self, action: Actions):
if not isinstance(action, Actions):
assert isinstance(action, (np.ndarray, Tensor))
action = Actions(action)
reward = self.env.send(action)
if self.in_evaluation_period:
# TODO: Edge case, but we also need the prediction for the last batch to be
# counted.
self._metrics[self._steps] += self.get_metrics(action, reward)
elif self.first_epoch_only:
# If we are at the last batch in the first epoch, we still keep the metrics
# for that batch, even though we're technically not in the first epoch
# anymore.
# TODO: CHeck the length through the dataset? or through a more 'clean' way
# e.g. through the `max_steps` property of a TimeLimit wrapper or something?
num_batches = len(self.unwrapped.dataset) // self.batch_size
if not self.unwrapped.drop_last:
num_batches += 1 if len(self.unwrapped.dataset) % self.batch_size else 0
# currently_at_last_batch = self._steps == num_batches - 1
currently_at_last_batch = self._steps == num_batches - 1
if self.__epochs == 1 and currently_at_last_batch:
self._metrics[self._steps] += self.get_metrics(action, reward)
# This is ok since we don't increment in the iterator.
self._steps += 1
return reward
def get_metrics(self, action: Actions, reward: Rewards) -> Metrics:
assert action.y_pred.shape == reward.y.shape, (action.shapes, reward.shapes)
metric = ClassificationMetrics(y_pred=action.y_pred, y=reward.y, num_classes=self.n_classes)
if wandb.run:
log_dict = metric.to_log_dict()
if self.wandb_prefix:
log_dict = add_prefix(log_dict, prefix=self.wandb_prefix, sep="/")
log_dict["steps"] = self._steps
wandb.log(log_dict)
return metric
def __iter__(self) -> Iterator[Tuple[Observations, Optional[Rewards]]]:
if self.__epochs == 1 and self.first_epoch_only:
print(
colorize(
"Your performance during the first epoch on this environment has "
"been successfully measured! The environment will now yield the "
"rewards (y) during iteration, and you are no longer required to "
"send an action for each observation.",
color="green",
)
)
self.env.unwrapped.pretend_to_be_active = False
for obs, rew in self.env.__iter__():
if self.in_evaluation_period:
yield obs, None
else:
yield obs, rew
self.__epochs += 1
================================================
FILE: sequoia/settings/sl/wrappers/measure_performance_test.py
================================================
""" TODO: Tests for the 'measure performance wrapper' to be used to get the performance
over the first "epoch"
"""
import dataclasses
from typing import Iterable, Tuple, TypeVar
import numpy as np
import pytest
import torch
from torch.utils.data import TensorDataset
from sequoia.common import Config
from sequoia.common.metrics import ClassificationMetrics
from sequoia.settings.rl.wrappers import TypedObjectsWrapper
from sequoia.settings.sl import ClassIncrementalSetting
from sequoia.settings.sl.environment import PassiveEnvironment
from sequoia.settings.sl.incremental.objects import Actions, Observations, Rewards
from .measure_performance import MeasureSLPerformanceWrapper
T = TypeVar("T")
def with_is_last(iterable: Iterable[T]) -> Iterable[Tuple[T, bool]]:
"""Function that mimics what's happening in pytorch-lightning, where the iterator
is one-offset. This can cause a bit of headache in Sequoia's wrappers when iterating
over an env, because they expect an action for each observation.
"""
iterator = iter(iterable)
sentinel = object()
previous_value = next(iterator)
current_value = next(iterator, sentinel)
while current_value is not sentinel:
yield previous_value, False
previous_value = current_value
current_value = next(iterator, sentinel)
yield previous_value, True
def test_measure_performance_wrapper():
dataset = TensorDataset(
torch.arange(100).reshape([100, 1, 1, 1]) * torch.ones([100, 3, 32, 32]),
torch.arange(100),
)
pretend_to_be_active = True
env = PassiveEnvironment(
dataset, batch_size=1, n_classes=100, pretend_to_be_active=pretend_to_be_active
)
for i, (x, y) in enumerate(env):
# print(x)
assert y is None if pretend_to_be_active else y is not None
assert (x == i).all()
action = i if i < 50 else 0
reward = env.send(action)
assert reward == i
assert i == 99
# This might be a bit weird, since .reset() will give the same obs as the first x
# when iterating.
obs = env.reset()
for i, (x, y) in enumerate(env):
# print(x)
assert y is None
assert (x == i).all()
action = i if i < 50 else 0
reward = env.send(action)
assert reward == i
assert i == 99
from sequoia.settings.sl.continual.objects import Observations, Actions, Rewards
env = TypedObjectsWrapper(
env, observations_type=Observations, actions_type=Actions, rewards_type=Rewards
)
# TODO: Do we want to require Observations / Actions / Rewards objects?
env = MeasureSLPerformanceWrapper(env, first_epoch_only=False)
for epoch in range(3):
for i, (observations, rewards) in enumerate(env):
assert observations is not None
assert rewards is None
assert (observations.x == i).all()
# Only guess correctly for the first 50 steps.
action = Actions(y_pred=np.array([i if i < 50 else 0]))
rewards = env.send(action)
assert (rewards.y == i).all()
assert i == 99
assert epoch == 2
assert set(env.get_online_performance().keys()) == set(range(100 * 3))
for i, (step, metric) in enumerate(env.get_online_performance().items()):
assert step == i
assert metric.accuracy == (1.0 if (i % 100) < 50 else 0.0), (i, step, metric)
metrics = env.get_average_online_performance()
assert isinstance(metrics, ClassificationMetrics)
# Since we guessed the correct class only during the first 50 steps.
assert metrics.accuracy == 0.5
def make_dummy_env(n_samples: int = 100, batch_size: int = 1, drop_last: bool = False):
dataset = TensorDataset(
torch.arange(n_samples).reshape([n_samples, 1, 1, 1]) * torch.ones([n_samples, 3, 32, 32]),
torch.arange(n_samples),
)
pretend_to_be_active = False
env = PassiveEnvironment(
dataset,
batch_size=batch_size,
n_classes=n_samples,
pretend_to_be_active=pretend_to_be_active,
drop_last=drop_last,
)
env = TypedObjectsWrapper(
env, observations_type=Observations, actions_type=Actions, rewards_type=Rewards
)
return env
def test_measure_performance_wrapper_first_epoch_only():
env = make_dummy_env(n_samples=100, batch_size=1)
env = MeasureSLPerformanceWrapper(env, first_epoch_only=True)
for epoch in range(2):
print(f"start epoch {epoch}")
for i, (observations, rewards) in enumerate(env):
assert observations is not None
if epoch == 0:
assert rewards is None
else:
assert rewards is not None
rewards_ = rewards # save these for a comparison below.
assert (observations.x == i).all()
# Only guess correctly for the first 50 steps.
action = Actions(y_pred=np.array([i if i < 50 else 0]))
rewards = env.send(action)
if epoch != 0:
# We should just receive what we already got by iterating.
assert rewards.y == rewards_.y
assert (rewards.y == i).all()
assert i == 99
# do another epoch, but this time don't even send actions.
for i, (observations, rewards) in enumerate(env):
assert (observations.x == i).all()
assert (rewards.y == i).all()
assert i == 99
assert set(env.get_online_performance().keys()) == set(range(100))
for i, (step, metric) in enumerate(env.get_online_performance().items()):
assert step == i
assert metric.accuracy == (1.0 if (i % 100) < 50 else 0.0), (i, step, metric)
metrics = env.get_average_online_performance()
assert isinstance(metrics, ClassificationMetrics)
# Since we guessed the correct class only during the first 50 steps.
assert metrics.accuracy == 0.5
assert metrics.n_samples == 100
def test_measure_performance_wrapper_odd_vs_even():
env = make_dummy_env(n_samples=100, batch_size=1)
env = MeasureSLPerformanceWrapper(env, first_epoch_only=True)
for i, (observations, rewards) in enumerate(env):
assert observations is not None
assert rewards is None or rewards.y is None
assert (observations.x == i).all()
# Only guess correctly for the first 50 steps.
action = Actions(y_pred=np.array([i if i % 2 == 0 else 0]))
rewards = env.send(action)
assert (rewards.y == i).all()
assert i == 99
assert set(env.get_online_performance().keys()) == set(range(100))
for i, (step, metric) in enumerate(env.get_online_performance().items()):
assert step == i
if step % 2 == 0:
assert metric.accuracy == 1.0, (i, step, metric)
else:
assert metric.accuracy == 0.0, (i, step, metric)
metrics = env.get_average_online_performance()
assert isinstance(metrics, ClassificationMetrics)
# Since we guessed the correct class only during the first 50 steps.
assert metrics.accuracy == 0.5
assert metrics.n_samples == 100
def test_measure_performance_wrapper_odd_vs_even_passive():
dataset = TensorDataset(
torch.arange(100).reshape([100, 1, 1, 1]) * torch.ones([100, 3, 32, 32]),
torch.arange(100),
)
pretend_to_be_active = False
env = PassiveEnvironment(
dataset, batch_size=1, n_classes=100, pretend_to_be_active=pretend_to_be_active
)
env = TypedObjectsWrapper(
env, observations_type=Observations, actions_type=Actions, rewards_type=Rewards
)
env = MeasureSLPerformanceWrapper(env, first_epoch_only=False)
for i, (observations, rewards) in enumerate(env):
assert observations is not None
assert rewards is None or rewards.y is None
assert (observations.x == i).all()
# Only guess correctly for the first 50 steps.
action = Actions(y_pred=np.array([i if i % 2 == 0 else 0]))
rewards = env.send(action)
assert (rewards.y == i).all()
assert i == 99
assert set(env.get_online_performance().keys()) == set(range(100))
for i, (step, metric) in enumerate(env.get_online_performance().items()):
assert step == i
if step % 2 == 0:
assert metric.accuracy == 1.0, (i, step, metric)
else:
assert metric.accuracy == 0.0, (i, step, metric)
metrics = env.get_average_online_performance()
assert isinstance(metrics, ClassificationMetrics)
# Since we guessed the correct class only during the first 50 steps.
assert metrics.accuracy == 0.5
assert metrics.n_samples == 100
def test_last_batch():
"""Test what happens with the last batch, in the case where the batch size doesn't
divide the dataset equally.
"""
env = make_dummy_env(n_samples=110, batch_size=20)
env = MeasureSLPerformanceWrapper(env, first_epoch_only=True)
for i, (obs, rew) in enumerate(env):
assert rew is None
if i != 5:
assert obs.batch_size == 20, i
else:
assert obs.batch_size == 10, i
actions = Actions(y_pred=torch.arange(i * 20, (i + 1) * 20)[: obs.batch_size])
rewards = env.send(actions)
assert (rewards.y == torch.arange(i * 20, (i + 1) * 20)[: obs.batch_size]).all()
perf = env.get_average_online_performance()
assert perf.accuracy == 1.0
assert perf.n_samples == 110
from sequoia.methods.models.base_model import BaseModel
def test_last_batch_baseline_model():
"""BUG: Baseline method is doing something weird at the last batch, and I dont know quite why."""
n_samples = 110
batch_size = 20
# Note: the y's here are different.
dataset = TensorDataset(
torch.arange(n_samples).reshape([n_samples, 1, 1, 1]) * torch.ones([n_samples, 3, 32, 32]),
torch.zeros(n_samples, dtype=int),
)
pretend_to_be_active = False
env = PassiveEnvironment(
dataset,
batch_size=batch_size,
n_classes=n_samples,
pretend_to_be_active=pretend_to_be_active,
)
env = TypedObjectsWrapper(
env, observations_type=Observations, actions_type=Actions, rewards_type=Rewards
)
env = MeasureSLPerformanceWrapper(env, first_epoch_only=True)
# FIXME: Hacky setup: Should instead have a way of using a 'test' setting with a
# configurable in-memory test dataset.
setting = ClassIncrementalSetting()
setting.train_env = env
model = BaseModel(setting=setting, hparams=BaseModel.HParams(), config=Config(debug=True))
for i, (obs, rew) in enumerate(env):
obs = dataclasses.replace(
obs, task_labels=torch.ones([obs.x.shape[0]], device=obs.x.device)
)
assert rew is None
forward_pass = model.training_step((obs, rew), batch_idx=i)
loss = model.training_step_end([forward_pass])
print(loss)
perf = env.get_average_online_performance()
assert perf.n_samples == 110
@pytest.mark.parametrize("drop_last", [False, True])
def test_delayed_actions(drop_last: bool):
"""Test that whenever some intermediate between the env and the Method is
caching some of the observations, the actions and rewards still end up lining up.
This is just to replicate what's happening in Pytorch Lightning, where they use some
function to check if the batch is the last one or not, and was causing issue before.
"""
env = make_dummy_env(n_samples=110, batch_size=20, drop_last=drop_last)
env = MeasureSLPerformanceWrapper(env, first_epoch_only=True)
i = 0
for i, ((obs, rew), is_last) in enumerate(with_is_last(env)):
print(i, obs.batch_size)
assert rew is None
if i != 5:
assert obs.batch_size == 20, i
else:
assert obs.batch_size == 10, i
actions = Actions(y_pred=torch.arange(i * 20, (i + 1) * 20)[: obs.batch_size])
rewards = env.send(actions)
assert (rewards.y == torch.arange(i * 20, (i + 1) * 20)[: obs.batch_size]).all()
assert i == (4 if drop_last else 5)
assert is_last
for i, ((obs, rew), is_last) in enumerate(with_is_last(env)):
print(i)
# We get rewards now that we're outside of the first epoch.
assert rew is not None
if i < 5:
assert obs.batch_size == 20, i
else:
assert obs.batch_size == 10, i
# actions = Actions(y_pred=torch.arange(i * 20, (i + 1) * 20)[: obs.batch_size])
# rewards = env.send(actions)
# assert (rewards.y == torch.arange(i * 20, (i + 1) * 20)[: obs.batch_size]).all()
assert i == 4 if drop_last else 5
assert len(list(env)) == 5 if drop_last else 6
assert len(list(with_is_last(env))) == 5 if drop_last else 6
perf = env.get_average_online_performance()
assert perf.accuracy == 1.0
# BUG: The number of samples for the metrics isn't quite right, should include the
# last batch, even if it doesn't have a 'full' batch.
assert perf.n_samples == (100 if drop_last else 110)
================================================
FILE: sequoia/settings.puml
================================================
@startuml settings
!include gym.puml
!include pytorch_lightning.puml
' !include common.puml
' TODO: there must be a better way to show only one thing from a
' package, without having to import all the package and then
' remove everything but that one thing!
remove gym.spaces
remove Wrapper
' remove common
namespace torch {
class DataLoader
class Tensor
}
package settings {
' !include base/base.puml
abstract class Setting extends SettingABC {
' 'root' setting.
-- static (class) attributes --
+ {static} Observations: Type[Observations]
+ {static} Actions: Type[Actions]
+ {static} Rewards: Type[Rewards]
.. attributes ..
+ observation_space: Space
+ action_space: Space
+ reward_space: Space
.. methods ..
{abstract} + apply(Method): Results
}
package assumptions as settings.assumptions {
package continual as settings.assumptions.continual {
abstract class ContinualAssumption extends Setting {
}
}
package incremental as settings.assumptions.incremental {
abstract class IncrementalAssumption extends ContinualAssumption {
+ nb_tasks: int
+ task_labels_at_train_time: bool
+ task_labels_at_test_time: bool
+ {field} known_task_boundaries_at_train_time: bool = True (constant)
+ {field} known_task_boundaries_at_test_time: bool = True (constant)
' TODO: THis is actually a constant atm, even for ContinualRL
' doesn't have this set to 'true', since there is only one task,
' so there aren't an 'task boundaries' to speak of.
+ {field} smooth_task_boundaries: bool
- _current_task_id: int
+ train_loop()
+ test_loop()
}
abstract class IncrementalObservations extends Observations {
+ task_labels: Optional[Tensor]
}
abstract class IncrementalResults extends Results {
}
}
' package task_incremental as settings.assumptions.task_incremental {
' abstract class TaskIncrementalAssumption extends IncrementalAssumption {
' }
' }
' package iid as settings.assumptions.iid {
' abstract class TraditionalSLSetting extends TaskIncrementalSLSetting {
' }
' }
}
package passive as settings.passive {
class PassiveEnvironment implements Environment {}
abstract class SLSetting extends Setting {
{abstract} + train_dataloader(): PassiveEnvironment
{abstract} + val_dataloader(): PassiveEnvironment
{abstract} + test_dataloader(): PassiveEnvironment
+ dataset: str
+ available_datasets: dict
}
' PassiveEnvironment extends DataLoader
package cl as settings.passive.cl {
class ClassIncrementalSetting implements SLSetting, IncrementalAssumption {
{static} + Results: Type[Results] = IncrementalSLResults
+ nb_tasks: int
+ task_labels_at_train_time: bool = True
+ task_labels_at_test_time: bool = False
+ transforms: List[Transforms]
+ class_order: Optional[List[int]] = None
+ relabel: bool = False
}
class IncrementalSLResults implements IncrementalResults {}
package domain_incremental as settings.passive.cl.domain_incremental {
class DomainIncrementalSetting extends ClassIncrementalSetting {
+ relabel: bool = True
}
}
package task_incremental as settings.passive.cl.task_incremental {
class TaskIncrementalSLSetting extends ClassIncrementalSetting {
{field} + task_labels_at_train_time: bool = True (constant)
{field} + task_labels_at_test_time: bool = True (constant)
}
' class TaskIncrementalResults extends IncrementalSLResults{}
package multi_task as settings.passive.cl.task_incremental.multi_task {
class MultiTaskSetting extends TaskIncrementalSLSetting {
}
}
}
package iid as settings.passive.cl.iid {
class TraditionalSLSetting extends TaskIncrementalSLSetting, DomainIncrementalSetting {
{field} + nb_tasks: int = 1 (constant)
}
class IIDResults extends IncrementalSLResults{}
}
}
}
package active as settings.active {
'note: This is currently called GymDataLoader in the repo.
class ActiveEnvironment extends Environment {}
abstract class RLSetting extends Setting {
{abstract} + train_dataloader(): ActiveEnvironment
{abstract} + val_dataloader(): ActiveEnvironment
{abstract} + test_dataloader(): ActiveEnvironment
}
package continual as settings.active.continual {
class ContinualRLSetting implements RLSetting, IncrementalAssumption {
{static} + Results: Type[Results] = RLResults
+ dataset: str = "cartpole"
+ nb_tasks: int = 1
+ train_max_steps: int = 10000
+ max_episodes: Optional[int] = None
+ steps_per_task: Optional[int] = None
+ episodes_per_task: Optional[int] = None
+ test_steps_per_task: int = 1000
+ test_steps: Optional[int] = None
+ smooth_task_boundaries: bool = True
+ train_task_schedule: dict
+ val_task_schedule: dict
+ test_task_schedule: dict
+ task_noise_std: float
+ train_wrappers: List[gym.Wrapper]
+ valid_wrappers: List[gym.Wrapper]
+ test_wrappers: List[gym.Wrapper]
+ add_done_to_observations: bool = False
}
class RLResults implements IncrementalResults
package incremental as settings.active.continual.incremental {
class IncrementalRLSetting extends ContinualRLSetting {
+ nb_tasks: int = 10
{field} + smooth_task_boundaries: bool = False (constant)
+ task_labels_at_train_time: bool = True
+ task_labels_at_test_time: bool = False
}
package task_incremental_rl as settings.active.incremental.task_incremental_rl {
class TaskIncrementalRLSetting extends IncrementalRLSetting {
{field} + task_labels_at_train_time: bool = True (constant)
{field} + task_labels_at_test_time: bool = True (constant)
}
package stationary as settings.active.incremental.task_incremental_rl.stationary {
class RLSetting extends TaskIncrementalRLSetting {
{field} + nb_tasks: int = 1 (constant)
}
}
}
}
}
}
}
IncrementalAssumption -left-> IncrementalResults : produces
IncrementalAssumption -down-> IncrementalObservations : envs yield
ClassIncrementalSetting -left-> IncrementalSLResults : produces
TaskIncrementalSLSetting -left-> TaskIncrementalResults : produces
TraditionalSLSetting -left-> IIDResults : produces
SLSetting --> PassiveEnvironment : uses
RLSetting -right-> ActiveEnvironment : uses
ContinualRLSetting -> RLResults : produces
@enduml
================================================
FILE: sequoia/utils/__init__.py
================================================
""" Miscelaneous utility functions. """
import sys
# from .generic_functions import *
from .generic_functions.singledispatchmethod import singledispatchmethod
from .logging_utils import get_logger
from .parseable import Parseable
from .serialization import Serializable
from .encode import encode
# from .utils import
================================================
FILE: sequoia/utils/categorical.py
================================================
from typing import Any, Iterable, Optional, Union
import torch
from torch import Tensor
from torch.distributions import Categorical as Categorical_
class Categorical(Categorical_):
"""Simple little addition to the `torch.distributions.Categorical`,
allowing it to be 'split' into a sequence of distributions (to help with the
splitting in the output
heads)
"""
def __init__(
self,
probs: Optional[Tensor] = None,
logits: Optional[Tensor] = None,
validate_args: bool = None,
):
super().__init__(probs=probs, logits=logits, validate_args=validate_args)
self._device: torch.device = probs.device if probs is not None else logits.device
def __getitem__(self, index: Optional[int]) -> "Categorical":
return Categorical(logits=self.logits[index])
# return Categorical(probs=self.probs[index])
def __iter__(self) -> Iterable["Categorical"]:
for index in range(self.logits.shape[0]):
yield self[index]
def __add__(self, other: Union["Categorical_", Any]) -> "Categorical":
# Idea:, how about we return a wrapped version of `self` whose
# 'sample' returns self.sample() + `other`?
return NotImplemented
def __mul__(self, other: Union["Categorical_", Any]) -> "Categorical":
# Idea: Idea, how about we return a wrapped version of `self` whose
# 'sample' returns self.sample() * `other`?
return NotImplemented
@property
def device(self) -> torch.device:
"""The device of the tensors of this distribution.
@lebrice: Not sure why this isn't already part of torch.Distribution base-class.
"""
return self._device
def to(self, device: Union[str, torch.device]) -> "Categorical":
"""Moves this distribution to another device.
@lebrice: Not sure why this isn't already part of torch.Distribution base-class.
"""
return type(self)(logits=self.logits.to(device=device))
================================================
FILE: sequoia/utils/data_utils.py
================================================
import os
from pathlib import Path
from typing import Dict, Iterable, Iterator, Sized, Tuple
import numpy as np
import torch
from torch import Tensor, nn
from torch.utils.data import DataLoader, Subset
from torchvision.datasets import CIFAR100, VisionDataset
from sequoia.utils.logging_utils import get_logger
logger = get_logger(__name__)
def train_valid_split(
train_dataset: VisionDataset, valid_fraction: float = 0.2
) -> Tuple[VisionDataset, VisionDataset]:
n = len(train_dataset)
valid_len: int = int((n * valid_fraction))
train_len: int = n - valid_len
indices = np.arange(n, dtype=int)
np.random.shuffle(indices)
valid_indices = indices[:valid_len]
train_indices = indices[valid_len:]
train = Subset(train_dataset, train_indices)
valid = Subset(train_dataset, valid_indices)
logger.info(f"Training samples: {len(train)}, Valid samples: {len(valid)}")
return train, valid
def unbatch(dataloader: Iterable[Tuple[Tensor, Tensor]]) -> Iterable[Tuple[Tensor, Tensor]]:
"""Unbatches a dataloader.
NOTE: this is a generator for a single pass through the dataloader, not multiple.
"""
for batch in dataloader:
if isinstance(batch, tuple):
yield from zip(*batch)
else:
yield from batch
class unlabeled(Iterable[Tuple[Tensor]], Sized):
"""Given a DataLoader, returns an Iterable that drops the labels."""
def __init__(self, labeled_dataloader: DataLoader):
self.loader = labeled_dataloader
def __iter__(self) -> Iterator[Tuple[Tensor]]:
for batch in self.loader:
assert isinstance(batch, tuple)
x = batch[0]
yield x,
def __len__(self) -> int:
return len(self.loader)
def keep_in_memory(dataset: VisionDataset) -> None:
"""Converts the dataset's `data` and `targets` attributes to Tensors.
This has the consequence of keeping the entire dataset in memory.
"""
if hasattr(dataset, "data") and not isinstance(dataset.data, (np.ndarray, Tensor)):
dataset.data = torch.as_tensor(dataset.data)
if not isinstance(dataset.targets, (np.ndarray, Tensor)):
dataset.targets = torch.as_tensor(dataset.targets)
if isinstance(dataset, CIFAR100):
# TODO: Cifar100 seems to want its 'data' to a numpy ndarray.
dataset.data = np.asarray(dataset.data)
class FixChannels(nn.Module):
"""Transform that fixes the number of channels in input images.
For instance, if the input shape is:
[28, 28] -> [3, 28, 28] (copy the image three times)
[1, 28, 28] -> [3, 28, 28] (same idea)
[10, 1, 28, 28] -> [10, 3, 28, 28] (keep batch intact, do the same again.)
"""
def __call__(self, x: Tensor) -> Tensor:
if x.ndim == 2:
x = x.reshape([1, *x.shape])
x = x.repeat(3, 1, 1)
if x.ndim == 3 and x.shape[0] == 1:
x = x.repeat(3, 1, 1)
if x.ndim == 4 and x.shape[1] == 1:
x = x.repeat(1, 3, 1, 1)
return x
def get_imagenet_location() -> Path:
from socket import gethostname
hostname = gethostname()
# For each hostname prefix, the location where the torchvision ImageNet dataset can be found.
# TODO: Add the location for your own machine.
imagenet_locations: Dict[str, Path] = {
"mila": Path("/network/datasets/imagenet.var/imagenet_torchvision"),
"": Path("/network/datasets/imagenet.var/imagenet_torchvision"),
}
for prefix, v in imagenet_locations.items():
if hostname.startswith(prefix):
return v
if "IMAGENET_DIR" in os.environ:
return Path(os.environ["IMAGENET_DIR"])
raise RuntimeError(
f"Could not find the ImageNet dataset on this machine with hostname "
f"{hostname}. Known location> pairs: {imagenet_locations}"
)
================================================
FILE: sequoia/utils/encode.py
================================================
""" Registers more datatypes to be used by the 'encode' function from
simple-parsing when serializing objects to json or yaml.
"""
import enum
import inspect
from pathlib import Path
from typing import Any, List, Type, Union
import numpy as np
import torch
from simple_parsing.helpers.serialization import encode, register_decoding_fn
from torch import Tensor, nn, optim
# Register functions for decoding Tensor and ndarray fields from json/yaml.
register_decoding_fn(Tensor, torch.as_tensor)
register_decoding_fn(np.ndarray, np.asarray)
register_decoding_fn(Type[nn.Module], lambda v: v)
register_decoding_fn(Type[optim.Optimizer], lambda v: v)
# NOTE: Uncomment this to enable logging tensors as-is when calling to_dict on a
# Serializable dataclass
@encode.register(Tensor)
def no_op_encode(value: Any):
return value
# TODO: Look deeper into how things are pickled and moved by pytorch-lightning.
# Right now there is a warning by pytorch-lightning saying that some metrics
# will not be included in a checkpoint because they are lists instead of Tensors.
# This is because they got encoded with the function below when they shouldn't
# have.
# @encode.register(Tensor)
@encode.register(np.ndarray)
def encode_tensor(obj: Union[Tensor, np.ndarray]) -> List:
return obj.tolist()
@encode.register
def encode_type(obj: type) -> List:
if inspect.isclass(obj):
return str(obj.__qualname__)
elif inspect.isfunction(obj):
return str(obj.__name__)
return str(obj)
@encode.register
def encode_path(obj: Path) -> str:
return str(obj)
@encode.register
def encode_device(obj: torch.device) -> str:
return str(obj)
@encode.register
def encode_enum(value: enum.Enum):
return value.value
================================================
FILE: sequoia/utils/generic_functions/__init__.py
================================================
""" Defines a bunch of single-dispatch generic functions, that are applicable
on structured objects, numpy arrays, tensors, spaces, etc.
"""
from ._namedtuple import NamedTuple, is_namedtuple
from .concatenate import concatenate
from .detach import detach
from .move import move
from .replace import replace
from .singledispatchmethod import singledispatchmethod
from .slicing import get_slice, set_slice
from .stack import stack
from .to_from_tensor import from_tensor, to_tensor
================================================
FILE: sequoia/utils/generic_functions/_namedtuple.py
================================================
""" Small 'patch' for the NamedTuple type, just so we can use
isinstance(obj, NamedTuple) and issubclass(some_class, NamedTuple) work
correctly.
"""
from inspect import isclass
from typing import Any, NamedTuple, Type
def is_namedtuple(obj: Any) -> bool:
"""Taken from https://stackoverflow.com/a/62692640/6388696"""
return isinstance(obj, tuple) and hasattr(obj, "_asdict") and hasattr(obj, "_fields")
def is_namedtuple_type(obj: Type) -> bool:
"""Taken from https://stackoverflow.com/a/62692640/6388696"""
return obj is NamedTuple or (
isclass(obj)
and issubclass(obj, tuple)
and hasattr(obj, "_asdict")
and hasattr(obj, "_fields")
)
================================================
FILE: sequoia/utils/generic_functions/_namedtuple_test.py
================================================
from typing import NamedTuple
import pytest
from sequoia.utils.generic_functions._namedtuple import is_namedtuple, is_namedtuple_type
class DummyTuple(NamedTuple):
a: int
b: str
def test_is_namedtuple():
bob = DummyTuple(1, "bob")
assert is_namedtuple(bob)
def test_is_namedtuple_type():
assert is_namedtuple_type(DummyTuple)
assert is_namedtuple_type(NamedTuple)
assert not is_namedtuple_type(tuple)
assert not is_namedtuple_type(list)
assert not is_namedtuple_type(dict)
@pytest.mark.xfail(reason="Not sure this is actually a good idea.")
def test_instance_check():
bob = DummyTuple(1, "bob")
assert isinstance(bob, DummyTuple)
assert isinstance(bob, NamedTuple)
assert isinstance(bob, tuple)
@pytest.mark.xfail(reason="Not sure this is actually a good idea.")
def test_instance_check():
assert issubclass(DummyTuple, NamedTuple)
assert issubclass(DummyTuple, tuple)
assert issubclass(DummyTuple, DummyTuple)
assert not issubclass(list, DummyTuple)
assert not issubclass(tuple, DummyTuple)
assert not issubclass(NamedTuple, DummyTuple)
================================================
FILE: sequoia/utils/generic_functions/concatenate.py
================================================
""" Generic function for concatenating ndarrays/tensors/distributions/Mappings
etc.
Extremely similar to `stack.py`, but concatenates along the described axis.
"""
from collections.abc import Mapping
from functools import singledispatch
from typing import Any, Dict, List, Sequence, TypeVar, Union
import numpy as np
import torch
from continuum import TaskSet
from continuum.tasks import concat as _continuum_concat
from torch import Tensor
from torch.utils.data import ChainDataset, ConcatDataset, Dataset, IterableDataset
from sequoia.utils.categorical import Categorical
T = TypeVar("T")
# @overload
# def concatenate(first_item: List[T], **kwargs) -> Sequence[T]:
# ...
# @overload
# def concatenate(first_item: T, *others: T, **kwargs) -> Sequence[T]:
# ...
@singledispatch
def concatenate(first_item: Union[T, List[T]], *others: T, **kwargs) -> Union[Sequence[T], Any]:
# By default, if we don't know how to handle the item type, just
# returns an ndarray with with all the items.
if not others:
# If this was called like concatenate(tensor_list), then we just split off
# the list of items.
assert isinstance(first_item, (list, tuple))
if len(first_item) == 1:
# Called like `concatenate([some_tensor])` -> returns `some_tensor`.
return first_item[0]
assert len(first_item) > 1
items = first_item
return concatenate(items[0], *items[1:], **kwargs)
return np.asarray([first_item, *others], **kwargs)
@concatenate.register(type(None))
def _concatenate_ndarrays(first_item: None, *others: None, **kwargs) -> None:
# NOTE: Concatenating a list of 'None' values will produce a single None output rather
# than an ndarray of Nones.
assert not any(other is not None for other in others)
return None
@concatenate.register(np.ndarray)
def _concatenate_ndarrays(first_item: np.ndarray, *others: np.ndarray, **kwargs) -> np.ndarray:
if not first_item.shape:
# can't concatenate 0-dimensional arrays, so we stack them instead:
return np.stack([first_item, *others], **kwargs)
return np.concatenate([first_item, *others], **kwargs)
@concatenate.register(Tensor)
def _concatenate_tensors(first_item: Tensor, *others: Tensor, **kwargs) -> Tensor:
if not first_item.shape:
# can't concatenate 0-dimensional tensors, so we stack them instead.
return torch.stack([first_item, *others], **kwargs)
return torch.cat([first_item, *others], **kwargs)
@concatenate.register(Mapping)
def _concatenate_dicts(first_item: Dict, *others: Dict, **kwargs) -> Dict:
return type(first_item)(
**{
key: concatenate(first_item[key], *(other[key] for other in others), **kwargs)
for key in first_item.keys()
}
)
@concatenate.register(Categorical)
def _concatenate_distributions(
first_item: Categorical, *others: Categorical, **kwargs
) -> Categorical:
return Categorical(
logits=torch.cat([first_item.logits, *(other.logits for other in others)], *kwargs)
)
@concatenate.register
def _concatenate_tasksets(first_item: TaskSet, *others: TaskSet) -> TaskSet:
return _continuum_concat([first_item, *others])
@concatenate.register(Dataset)
def _concatenate_datasets(first_item: Dataset[T], *others: Dataset[T]) -> ConcatDataset[T]:
return ConcatDataset([first_item, *others])
@concatenate.register
def _concatenate_iterable_datasets(
first_item: IterableDataset, *others: IterableDataset
) -> ChainDataset:
return ChainDataset([first_item, *others])
================================================
FILE: sequoia/utils/generic_functions/detach.py
================================================
from collections.abc import Mapping
from functools import singledispatch
from typing import Any, Dict, Sequence, TypeVar
import numpy as np
from sequoia.utils.generic_functions._namedtuple import is_namedtuple
from ..categorical import Categorical
T = TypeVar("T")
@singledispatch
def detach(value: T) -> T:
"""Detaches a value when possible, else returns the value unchanged."""
if hasattr(value, "detach") and callable(value.detach):
return value.detach()
raise NotImplementedError(f"Don't know how to detach value {value}!")
# else:
# return value
@detach.register(np.ndarray)
@detach.register(type(None))
@detach.register(str)
@detach.register(int)
@detach.register(bool)
@detach.register(float)
def no_op_detach(v: Any) -> Any:
return v
@detach.register(list)
@detach.register(tuple)
@detach.register(set)
def _detach_sequence(x: Sequence[T]) -> Sequence[T]:
if is_namedtuple(x):
return type(x)(*[detach(v) for v in x])
return type(x)(detach(v) for v in x)
@detach.register(Mapping)
def _detach_dict(d: Dict[str, Any]) -> Dict[str, Any]:
"""Detaches all the keys and tensors in a dict, as well as all nested dicts."""
return type(d)(**{detach(k): detach(v) for k, v in d.items()})
@detach.register
def _detach_categorical(v: Categorical) -> Categorical:
return type(v)(logits=v.logits.detach())
================================================
FILE: sequoia/utils/generic_functions/move.py
================================================
"""Defines a singledispatch function to move objects to a given device.
"""
from functools import singledispatch
from typing import Dict, Sequence, TypeVar, Union
import torch
from sequoia.utils.generic_functions._namedtuple import is_namedtuple
T = TypeVar("T")
K = TypeVar("K")
V = TypeVar("V")
@singledispatch
def move(x: T, device: Union[str, torch.device]) -> T:
"""Moves x to the specified device if possible, else returns x unchanged.
NOTE: This works for Tensors or any collection of Tensors.
"""
if hasattr(x, "to") and callable(x.to) and device:
return x.to(device=device)
return x
@move.register(dict)
def move_dict(x: Dict[K, V], device: Union[str, torch.device]) -> Dict[K, V]:
return type(x)(**{move(k, device): move(v, device) for k, v in x.items()})
@move.register(list)
@move.register(tuple)
@move.register(set)
def move_sequence(x: Sequence[T], device: Union[str, torch.device]) -> Sequence[T]:
if is_namedtuple(x):
return type(x)(*[move(v, device) for v in x])
return type(x)(move(v, device) for v in x)
================================================
FILE: sequoia/utils/generic_functions/replace.py
================================================
""" Generic function for replacing items in an object. """
import dataclasses
from collections.abc import Sequence
from functools import singledispatch
from typing import Dict, Tuple, TypeVar
from gym import spaces
from sequoia.utils.generic_functions._namedtuple import is_namedtuple
T = TypeVar("T")
class Dataclass(type):
"""Used so we can do `isinstance(obj, Dataclass)`, or maybe even
register dataclass handlers for singledispatch generic functions.
"""
def __instancecheck__(self, instance) -> bool:
# Return true if instance should be considered a (direct or indirect)
# instance of class. If defined, called to implement
# isinstance(instance, class).
return dataclasses.is_dataclass(instance)
def __subclasscheck__(self, subclass) -> bool:
# Return true if subclass should be considered a (direct or indirect)
# subclass of class. If defined, called to implement
# issubclass(subclass, class).
return dataclasses.is_dataclass(subclass)
@singledispatch
def replace(obj: T, **items) -> T:
"""Replaces the value at `key` in `obj` with `new_value`. Returns the
modified object, either in-place (same instance as obj) or new.
"""
raise NotImplementedError(
f"TODO: Don't know how to set items '{items}' in obj {obj}, "
f"(no handler registered for objects of type {obj}."
)
@replace.register(Dataclass)
def _replace_dataclass_attribute(obj: Dataclass, **items) -> Dataclass:
assert dataclasses.is_dataclass(obj)
return dataclasses.replace(obj, **items)
@replace.register(dict)
def _replace_dict_item(obj: Dict, **items) -> Dict:
assert isinstance(obj, dict)
assert all(
key in obj for key in items
), "replace should only be used to replace items, not to add new ones."
new_obj = obj.copy()
new_obj.update(items)
return new_obj
@replace.register(list)
@replace.register(tuple)
def _replace_sequence_items(obj: Sequence, **items) -> Tuple:
if is_namedtuple(obj):
return obj._replace(**items)
return type(obj)(items[i] if i in items else val for i, val in enumerate(obj))
@replace.register
def _replace_dict_items(obj: spaces.Dict, **items) -> Dict:
"""Handler for Dict spaces."""
return type(obj)(replace(obj.spaces, **items))
================================================
FILE: sequoia/utils/generic_functions/replace_test.py
================================================
""" Tests for the `replace` generic function. """
================================================
FILE: sequoia/utils/generic_functions/singledispatchmethod.py
================================================
""" Little 'patch' that imports a backport of 'singledispatchmethod', if the
python version is < 3.8.
"""
import sys
if sys.version_info >= (3, 8):
from functools import singledispatchmethod # type: ignore
else:
try:
pass
except ImportError as e:
print(f"Couldn't import singledispatchmethod: {e}")
print(
"Since you're running python version below 3.8, you need to "
"install the backport for singledispatchmethod (which was added "
"to functools in python 3.8), using the following command:\n"
"> pip install singledispatchmethod"
)
exit()
================================================
FILE: sequoia/utils/generic_functions/slicing.py
================================================
""" Extendable utility functions for getting and settings slices of arbitrarily
nested objects.
"""
from functools import singledispatch
from typing import Any, Dict, Sequence, Tuple, TypeVar
import numpy as np
from torch import Tensor
from ._namedtuple import is_namedtuple
K = TypeVar("K")
V = TypeVar("V")
T = TypeVar("T")
@singledispatch
def get_slice(value: T, indices: Sequence[int]) -> T:
"""Returns a slices of `value` at the given indices."""
if value is None:
return None
return value[indices]
@get_slice.register(dict)
def _get_dict_slice(value: Dict[K, V], indices: Sequence[int]) -> Dict[K, V]:
return type(value)((k, get_slice(v, indices)) for k, v in value.items())
@get_slice.register(tuple)
def _get_tuple_slice(value: Tuple[T, ...], indices: Sequence[int]) -> Tuple[T, ...]:
# NOTE: we use type(value)( ... ) to create the output dicts or tuples, in
# case a subclass of tuple or dict is being used (e.g. NamedTuples).
if is_namedtuple(value):
return type(value)(*[get_slice(v, indices) for v in value])
return type(value)([get_slice(v, indices) for v in value])
@singledispatch
def set_slice(target: Any, indices: Sequence[int], values: Sequence[Any]) -> None:
"""Sets `values` at positions `indices` in `target`.
Modifies the `target` in-place.
"""
target[indices] = values
from sequoia.utils.categorical import Categorical
@set_slice.register
def _set_slice_categorical(
target: Categorical, indices: Sequence[int], values: Sequence[Any]
) -> None:
target.logits[indices] = values.logits
@set_slice.register(np.ndarray)
def _set_slice_ndarray(target: np.ndarray, indices: Sequence[int], values: Sequence[Any]) -> None:
if isinstance(indices, Tensor):
indices = indices.cpu().numpy()
if isinstance(values, Tensor):
values = values.cpu().numpy()
target[indices] = values
@set_slice.register(Tensor)
def _set_slice_ndarray(target: Tensor, indices: Sequence[int], values: Sequence[Any]) -> None:
target[indices] = values
@set_slice.register(dict)
def _set_dict_slice(
target: Dict[K, Sequence[V]], indices: Sequence[int], values: Dict[K, Sequence[V]]
) -> None:
for key, target_values in target.items():
set_slice(target_values, indices, values[key])
@set_slice.register(tuple)
def _set_tuple_slice(target: Tuple[T, ...], indices: Sequence[int], values: Tuple[T, ...]) -> None:
assert isinstance(values, tuple)
assert len(target) == len(values)
for target_item, values_item in zip(target, values):
set_slice(target_item, indices, values_item)
================================================
FILE: sequoia/utils/generic_functions/slicing_test.py
================================================
from typing import NamedTuple
import numpy as np
import pytest
from .slicing import get_slice, set_slice
class DummyTuple(NamedTuple):
a: np.ndarray
b: np.ndarray
@pytest.mark.parametrize(
"source, indices, expected",
[
(np.arange(10), np.arange(5), np.arange(5)),
(
{"a": np.arange(10), "b": np.arange(10)},
np.arange(5),
{"a": np.arange(5), "b": np.arange(5)},
),
(({"a": np.arange(10)}, np.arange(10) + 5), 3, ({"a": 3}, 8)),
( # Test with namedtuples.
{
"a": np.array([0, 1, 2]),
"b": DummyTuple(a=np.zeros([3, 4]), b=np.ones([5, 4])),
},
np.arange(2),
{"a": np.array([0, 1]), "b": DummyTuple(a=np.zeros([2, 4]), b=np.ones([2, 4]))},
),
],
)
def test_get_slice(source, indices, expected):
assert str(get_slice(source, indices)) == str(expected)
@pytest.mark.parametrize(
"target, indices, values, result",
[
(
np.arange(10, dtype=float),
np.arange(5),
np.zeros(5),
np.concatenate([np.zeros(5), np.arange(5) + 5.0]),
),
(
{"a": np.arange(10, dtype=float), "b": np.zeros(10)},
np.arange(10),
{"a": np.ones(10), "b": np.ones(10)},
{"a": np.ones(10), "b": np.ones(10)},
),
(
({"a": np.arange(10)}, np.arange(10) + 5),
0,
({"a": 3}, 8),
(
{"a": np.concatenate([np.array([3]), 1 + np.arange(9)])},
np.concatenate([np.array([8]), 6 + np.arange(9)]),
),
),
( # Test with NamedTuples.
{
"a": np.array([0, 1, 2]),
"b": DummyTuple(a=np.zeros(5), b=np.ones(5)),
},
np.arange(2),
{"a": np.array([5, 7]), "b": DummyTuple(a=np.ones(2), b=np.zeros(2))},
{
"a": np.array([5, 7, 2]),
"b": DummyTuple(
a=np.array([1.0, 1.0, 0.0, 0.0, 0.0]), b=np.array([0.0, 0.0, 1.0, 1.0, 1.0])
),
},
),
],
)
def test_set_slice(target, indices, values, result):
set_slice(target, indices, values)
assert str(target) == str(result)
@pytest.mark.xfail(
reason="Removed the 'concatenate' generic function, since "
"there wasn't really a use for it anywhere."
)
@pytest.mark.parametrize(
"a, b, kwargs, expected",
[
(np.array([0, 1, 2]), np.array([3, 4, 5, 6]), {}, np.arange(7)),
(
{
"a": np.array([0, 1, 2]),
"b": DummyTuple(a=np.zeros(3), b=np.ones(3)),
},
{
"a": np.array([3, 4, 5]),
"b": DummyTuple(a=np.zeros(4), b=np.ones(4)),
},
{},
{
"a": np.array([0, 1, 2, 3, 4, 5]),
"b": DummyTuple(a=np.zeros(7), b=np.ones(7)),
},
),
(
{
"a": np.array([[0], [1], [2]]), # [3, 1]
"b": DummyTuple(a=np.zeros([1, 4]), b=np.ones([1, 4])),
},
{
"a": np.array([[3], [4], [5], [6]]), # shape [4, 1]
"b": DummyTuple(a=np.zeros([2, 4]), b=np.ones([3, 4])),
},
{"axis": 0},
{
"a": np.array([[0], [1], [2], [3], [4], [5], [6]]),
"b": DummyTuple(a=np.zeros([3, 4]), b=np.ones([4, 4])),
},
),
],
)
def test_concat(a, b, kwargs, expected):
from .slicing import concatenate
assert str(concatenate(a, b, **kwargs)) == str(expected)
================================================
FILE: sequoia/utils/generic_functions/stack.py
================================================
""" Generic function for concatenating ndarrays/tensors/distributions/Mappings
etc.
"""
from collections.abc import Mapping
from functools import singledispatch
from typing import Any, Dict, List, TypeVar, Union
import numpy as np
import torch
from torch import Tensor
from sequoia.utils.categorical import Categorical
T = TypeVar("T")
# @overload
# def stack(first_item: List[T]) -> Sequence[T]:
# ...
# @overload
# def stack(first_item: T, *others: T) -> Sequence[T]:
# ...
@singledispatch
def stack(first_item: Union[T, List[T]], *others: T, **kwargs) -> Any:
# By default, if we don't know how to handle the item type, just
# return an ndarray with with all the items.
# note: We could also try to return a tensor, rather than an ndarray
# but I'd rather keep it simple for now.
if not others:
# If this was called like stack(tensor_list), then we just split off
# the list of items.
if first_item is None:
# Stacking a list of 'None' items returns None.
return None
assert isinstance(first_item, (list, tuple)), first_item
# assert len(first_item) > 1, first_item
items = first_item
return stack(items[0], *items[1:], **kwargs)
np_stack_kwargs = kwargs.copy()
if "dim" in np_stack_kwargs:
np_stack_kwargs["axis"] = np_stack_kwargs.pop("dim")
return np.stack([first_item, *others], **np_stack_kwargs)
@stack.register(type(None))
def _stack_none(first_item: None, *others: None, **kwargs) -> Union[None, np.ndarray]:
# TODO: Should we return an ndarray with 'None' entries, of dtype np.object_? or
# just a single None?
# Opting for a single None for now, as it's easier to work with. (`v is None` works)
if all(v is None for v in others):
return None
return np.array([first_item, *others])
# if not others:
# return None
# return np.array([None, *others])
@stack.register(np.ndarray)
def _stack_ndarrays(first_item: np.ndarray, *others: np.ndarray, **kwargs) -> np.ndarray:
return np.stack([first_item, *others], **kwargs)
@stack.register(Tensor)
def _stack_tensors(first_item: Tensor, *others: Tensor, **kwargs) -> Tensor:
return torch.stack([first_item, *others], **kwargs)
@stack.register(Mapping)
def _stack_dicts(first_item: Dict, *others: Dict, **kwargs) -> Dict:
return type(first_item)(
**{
key: stack(first_item[key], *(other[key] for other in others), **kwargs)
for key in first_item.keys()
}
)
@stack.register(Categorical)
def _stack_distributions(first_item: Categorical, *others: Categorical, **kwargs) -> Categorical:
return Categorical(
logits=torch.stack([first_item.logits, *(other.logits for other in others)], **kwargs)
)
================================================
FILE: sequoia/utils/generic_functions/to_from_tensor.py
================================================
from functools import singledispatch
from typing import Any, Dict, Mapping, Optional, Tuple, TypeVar, Union
import numpy as np
import torch
from gym import Space, spaces
from torch import Tensor
T = TypeVar("T")
@singledispatch
def from_tensor(space: Space, sample: Union[Tensor, Any]) -> Union[np.ndarray, Any]:
"""Converts a Tensor into a sample from the given space."""
if isinstance(sample, Tensor):
return sample.cpu().numpy()
return sample
@from_tensor.register
def _(space: spaces.Discrete, sample: Tensor) -> int:
if isinstance(sample, Tensor):
v = sample.item()
int_v = int(v)
if int_v != v:
raise ValueError(f"Value {sample} isn't an integer, so it can't be from space {space}!")
return int_v
elif isinstance(sample, np.ndarray):
assert sample.size == 1, sample
return int(sample)
return sample
@from_tensor.register
def _(
space: spaces.Dict, sample: Dict[str, Union[Tensor, Any]]
) -> Dict[str, Union[np.ndarray, Any]]:
return {key: from_tensor(space[key], value) for key, value in sample.items()}
from sequoia.utils.generic_functions._namedtuple import is_namedtuple
@from_tensor.register
def _(space: spaces.Tuple, sample: Tuple[Union[Tensor, Any]]) -> Tuple[Union[np.ndarray, Any]]:
if not isinstance(sample, tuple):
# BUG: Sometimes instead of having a sample of Tuple(Discrete(2))
# be `(1,)`, its `array([1])` instead.
sample = tuple(sample)
values_gen = (from_tensor(space[i], value) for i, value in enumerate(sample))
if is_namedtuple(sample):
return type(sample)(*values_gen)
return tuple(values_gen)
@singledispatch
def to_tensor(
space: Space, sample: Union[np.ndarray, Any], device: torch.device = None
) -> Union[np.ndarray, Any]:
"""Converts a sample from the given space into a Tensor."""
if sample is None:
return sample
return torch.as_tensor(sample, device=device)
@to_tensor.register
def _(
space: spaces.MultiBinary, sample: np.ndarray, device: torch.device = None
) -> Dict[str, Union[Tensor, Any]]:
return torch.as_tensor(sample, device=device, dtype=torch.bool)
@to_tensor.register
def _(
space: spaces.Tuple,
sample: Tuple[Union[np.ndarray, Any], ...],
device: torch.device = None,
) -> Tuple[Union[Tensor, Any], ...]:
if sample is None:
assert all(isinstance(item_space, Sparse) for item_space in space.spaces)
assert all(item_space.sparsity == 1.0 for item_space in space.spaces)
# todo: What to do in this context?
return None
return np.full(
[
len(space.spaces),
],
fill_value=None,
dtype=np.object_,
)
if any(v is None for v in sample):
assert False, (space, sample, device)
return tuple(to_tensor(subspace, sample[i], device) for i, subspace in enumerate(space.spaces))
from typing import NamedTuple
from sequoia.common.spaces.named_tuple import NamedTupleSpace
@to_tensor.register
def _(space: NamedTupleSpace, sample: NamedTuple, device: torch.device = None):
return space.dtype(
**{
key: to_tensor(space[i], sample[i], device=device)
for i, key in enumerate(space._spaces.keys())
}
)
from sequoia.common.spaces.sparse import Sparse
@to_tensor.register(Sparse)
def sparse_sample_to_tensor(
space: Sparse, sample: Union[Optional[Any], np.ndarray], device: torch.device = None
) -> Optional[Union[Tensor, np.ndarray]]:
if space.sparsity == 1.0:
if isinstance(space.base, spaces.MultiDiscrete):
assert all(v == None for v in sample)
return np.array([None if v == None else v for v in sample])
if sample is not None:
assert isinstance(sample, np.ndarray) and sample.dtype == np.object
assert not sample.shape
return None
if space.sparsity == 0.0:
# Do we need to convert dtypes here though?
return to_tensor(space.base, sample, device)
# 0 < sparsity < 1
if isinstance(sample, np.ndarray) and sample.dtype == np.object:
return np.array([None if v == None else v for v in sample])
assert False, (space, sample)
================================================
FILE: sequoia/utils/logging_utils.py
================================================
import inspect
import logging
from functools import wraps
from pathlib import Path
from typing import Any, Callable, Dict, Iterable, List, TypeVar, Union
import torch.multiprocessing as mp
import tqdm
from torch import Tensor
from sequoia.utils.utils import unique_consecutive
logging.basicConfig(
format="%(asctime)s,%(msecs)d %(levelname)-8s [%(name)s:%(lineno)d] %(message)s",
datefmt="%Y-%m-%d:%H:%M:%S",
level=logging.INFO,
)
logging.getLogger("simple_parsing").setLevel(logging.ERROR)
root_logger = logging.getLogger("")
T = TypeVar("T")
def pbar(dataloader: Iterable[T], description: str = "", *args, **kwargs) -> Iterable[T]:
kwargs.setdefault("dynamic_ncols", True)
pbar = tqdm.tqdm(dataloader, *args, **kwargs)
if description:
pbar.set_description(description)
return pbar
def get_logger(name: str, level: int = None) -> logging.Logger:
"""Gets a logger for the given file. Sets a nice default format.
TODO: figure out if we should add handlers, etc.
"""
name_is_path: bool = False
try:
p = Path(name)
if p.exists():
name = str(p.absolute().relative_to(Path.cwd()).as_posix())
name_is_path = True
except:
pass
from sys import argv
logger = root_logger.getChild(name)
debug_flags: List[str] = ["-d", "--debug", "-vv", "-vvv" "--verbose"]
if level is None and any(v in argv for v in debug_flags):
level = logging.DEBUG
if level is None:
level = logging.INFO
logger.setLevel(level)
# if the name is already something like foo.py:256
# if not name_is_path and name[-1].isdigit():
# formatter = logging.Formatter('%(asctime)s, %(levelname)-8s log [%(name)s] %(message)s')
# sh = logging.StreamHandler(sys.stdout)
# sh.setFormatter(formatter)
# sh.setLevel(level)
# logger.addHandler(sh)
# logger = logging.getLogger(name)
# tqdm_handler = TqdmLoggingHandler()
# tqdm_handler.setLevel(level)
# logger.addHandler(tqdm_handler)
return logger
def log_calls(function: Callable, level=logging.INFO) -> Callable:
"""Decorates a function and logs the calls to it and the passed args."""
callerframerecord = inspect.stack()[1] # 0 represents this line
# 1 represents line at caller
frame = callerframerecord[0]
info = inspect.getframeinfo(frame)
p = Path(info.filename)
name = str(p.absolute().relative_to(Path.cwd()).as_posix())
logger = get_logger(f"{name}:{info.lineno}")
@wraps(function)
def _wrapped(*args, **kwargs):
process_name = mp.current_process().name
logger.log(
level,
(
f"Process {process_name} called {function.__name__} with "
f"args={args} and kwargs={kwargs}."
),
)
return function(*args, **kwargs)
return _wrapped
def get_new_file(file: Path) -> Path:
"""Creates a new file, adding _{i} suffixes until the file doesn't exist.
Args:
file (Path): A path.
Returns:
Path: a path that is new. Might have a new _{i} suffix.
"""
if not file.exists():
return file
else:
i = 0
file_i = file.with_name(file.stem + f"_{i}" + file.suffix)
while file_i.exists():
i += 1
file_i = file.with_name(file.stem + f"_{i}" + file.suffix)
file = file_i
return file
def cleanup(
message: Dict[str, Union[Dict, str, float, Any]],
sep: str = "/",
keys_to_remove: List[str] = None,
) -> Dict[str, Union[float, Tensor]]:
"""Cleanup a message dict before it is logged to wandb.
TODO: Describe what this does in more detail.
Args:
message (Dict[str, Union[Dict, str, float, Any]]): [description]
sep (str, optional): [description]. Defaults to "/".
Returns:
Dict[str, Union[float, Tensor]]: Cleaned up dict.
"""
# Flatten the log dictionary
from sequoia.utils.utils import flatten_dict
message = flatten_dict(message, separator=sep)
keys_to_remove = keys_to_remove or []
for k in list(message.keys()):
if any(flag in k for flag in keys_to_remove):
message.pop(k)
continue
v = message.pop(k)
# Example input:
# "Task_losses/Task1/losses/Test/losses/rotate/losses/270/metrics/270/accuracy"
# Simplify the key, by getting rid of all the '/losses/' and '/metrics/' etc.
things_to_remove: List[str] = [f"{sep}losses{sep}", f"{sep}metrics{sep}"]
for thing in things_to_remove:
while thing in k:
k = k.replace(thing, sep)
# --> "Task_losses/Task1/Test/rotate/270/270/accuracy"
# Get rid of repetitive modifiers (ex: "/270/270" above)
parts = k.split(sep)
parts = [s for s in parts if not s.isspace()]
k = sep.join(unique_consecutive(parts))
# Will become:
# "Task_losses/Task1/Test/rotate/270/accuracy"
message[k] = v
return message
class TqdmLoggingHandler(logging.Handler):
def __init__(self, level=logging.NOTSET):
super().__init__(level)
def emit(self, record):
try:
msg = self.format(record)
tqdm.tqdm.write(msg)
self.flush()
except (KeyboardInterrupt, SystemExit):
raise
except:
self.handleError(record)
================================================
FILE: sequoia/utils/module_dict.py
================================================
""" Typed wrapper around `nn.ModuleDict`, just that just adds a get method. """
from typing import Any, MutableMapping, TypeVar, Union
from torch import nn
M = TypeVar("M", bound=nn.Module)
T = TypeVar("T")
class ModuleDict(nn.ModuleDict, MutableMapping[str, M]):
def get(self, key: str, default: Any = None) -> Union[M, Any]:
"""Returns the module at `self[key]` if present, else `default`.
Args:
key (str): a key.
default (Union[M, nn.Module], optional): Default value to return.
Defaults to None.
Returns:
Union[Optional[nn.Module], Optional[M]]: The nn.Module at that key.
"""
return self[key] if key in self else default
================================================
FILE: sequoia/utils/parseable.py
================================================
import dataclasses
import shlex
import sys
from argparse import Namespace
from dataclasses import is_dataclass
from typing import List, Optional, Tuple, Type, TypeVar, Union
from pytorch_lightning import LightningDataModule
from simple_parsing import ArgumentParser
from sequoia.utils.utils import camel_case
from .logging_utils import get_logger
logger = get_logger(__name__)
P = TypeVar("P", bound="Parseable")
class Parseable:
_argv: Optional[List[str]] = None
@classmethod
def add_argparse_args(cls, parser: ArgumentParser) -> None:
"""Add the command-line arguments for this class to the given parser.
Override this if you don't use simple-parsing to add the args.
Parameters
----------
parser : ArgumentParser
The ArgumentParser.
"""
if is_dataclass(cls):
dest = camel_case(cls.__qualname__)
parser.add_arguments(cls, dest=dest)
elif issubclass(cls, LightningDataModule):
# TODO: Test this case out (using a LightningDataModule as a Setting).
super().add_argparse_args(parser) # type: ignore
else:
raise NotImplementedError(
f"Don't know how to add command-line arguments for class "
f"{cls}, since it isn't a dataclass and doesn't override the "
f"`add_argparse_args` method!\n"
f"Either make class {cls} a dataclass and add command-line "
f"arguments as fields, or add an implementation for the "
f"`add_argparse_args` and `from_argparse_args` classmethods."
)
@classmethod
def from_argparse_args(cls: Type[P], args: Namespace) -> P:
"""Extract the parsed command-line arguments from the namespace and
return an instance of class `cls`.
Override this if you don't use simple-parsing.
Parameters
----------
args : Namespace
The namespace containing all the parsed command-line arguments.
dest : str, optional
The , by default None
Returns
-------
cls
An instance of the class `cls`.
"""
if is_dataclass(cls):
dest = camel_case(cls.__qualname__)
return getattr(args, dest)
# if issubclass(cls, LightningDataModule):
# # TODO: Test this case out (using a LightningDataModule as a Setting).
# return super()._from_argparse_args(args) # type: ignore
raise NotImplementedError(
f"Don't know how to extract the command-line arguments for class "
f"{cls} from the namespace, since {cls} isn't a dataclass and "
f"doesn't override the `from_argparse_args` classmethod."
)
@classmethod
def from_args(
cls: Type[P], argv: Union[str, List[str]] = None, reorder: bool = True, strict: bool = True
) -> P:
"""Parse an instance of this class from the command-line args.
Parameters
----------
cls : Type[P]
The class to instantiate. This only supports dataclasses by default.
For other classes, you'll have to implement this method yourself.
argv : Union[str, List[str]], optional
The command-line string or list of string arguments in the style of
sys.argv. Could also be the unused_args returned by
.from_known_args(), for example. By default None
reorder : bool, optional
Wether to attempt to re-order positional arguments. Only really
useful when using subparser actions. By default True.
strict : bool, optional
Wether to raise an error if there are extra arguments. By default
False
TODO: Might be a good idea to actually change this default to 'True'
to avoid potential subtle bugs in various places. This would however
make the code slightly more difficult to read, since we'd have to
pass some unused_args around. Also might be a problem when the same
argument e.g. batch_size (at some point) is in both the Setting and
the Method, because then the arg would be 'consumed', and not passed
to the second parser in the chain.
Returns
-------
P
The parsed instance of this class.
Raises
------
NotImplementedError
[description]
"""
# if not is_dataclass(cls):
# raise NotImplementedError(
# f"Don't know how to create an instance of class {cls} from the "
# f"command-line, as it isn't a dataclass. You'll have to "
# f"override the `from_args` or `from_known_args` classmethods."
# )
if isinstance(argv, str):
argv = shlex.split(argv)
instance, unused_args = cls.from_known_args(
argv=argv,
reorder=reorder,
strict=strict,
)
assert not (strict and unused_args), "an error should have been raised"
return instance
@classmethod
def from_known_args(
cls, argv: Union[str, List[str]] = None, reorder: bool = True, strict: bool = False
) -> Tuple[P, List[str]]:
# if not is_dataclass(cls):
# raise NotImplementedError(
# f"Don't know how to parse an instance of class {cls} from the "
# f"command-line, as it isn't a dataclass or doesn't have the "
# f"`add_arpargse_args` and `from_argparse_args` classmethods. "
# f"You'll have to override the `from_known_args` classmethod."
# )
if argv is None:
argv = sys.argv[1:]
logger.debug(f"parsing an instance of class {cls} from argv {argv}")
if isinstance(argv, str):
argv = shlex.split(argv)
parser = ArgumentParser(description=cls.__doc__, add_dest_to_option_strings=False)
cls.add_argparse_args(parser)
# TODO: Set temporarily on the class, so its accessible in the class constructor
cls_argv = cls._argv
cls._argv = argv
instance: P
if strict:
args = parser.parse_args(argv)
unused_args = []
else:
args, unused_args = parser.parse_known_args(argv, attempt_to_reorder=reorder)
if unused_args:
logger.debug(
RuntimeWarning(f"Unknown/unused args when parsing class {cls}: {unused_args}")
)
instance = cls.from_argparse_args(args)
# Save the argv that were used to create the instance on its `_argv`
# attribute.
instance._argv = argv
cls._argv = cls_argv
return instance, unused_args
def upgrade(self, target_type: Type[P]) -> P:
"""Upgrades the hparams `self` to the given `target_type`, filling in
any missing values by parsing them from the command-line.
If `self` was created from the command-line, then the same argv that
were used to create `self` will be used to create the new object.
Returns
-------
type(self).HParams
Hparams of the type `self.HParams`, with the original values
preserved and any new values parsed from the command-line.
"""
# NOTE: This (getting the wrong hparams class) could happen for
# instance when parsing a BaseMethod from the command-line, the
# default type of hparams on the method is BaseModel.HParams,
# whose `output_head` field doesn't have the right type exactly.
current_type = type(self)
current_hparams = dataclasses.asdict(self)
# NOTE: If a value is not at its current default, keep it.
default_hparams = target_type()
missing_fields = [
f.name
for f in dataclasses.fields(target_type)
if f.name not in current_hparams
or current_hparams[f.name] == getattr(current_type(), f.name, None)
or current_hparams[f.name] == getattr(default_hparams, f.name)
]
logger.warning(
RuntimeWarning(
f"Upgrading the hparams from type {current_type} to "
f"type {target_type}. This will try to fetch the values for "
f"the missing fields {missing_fields} from the command-line. "
)
)
# Get the missing values
if self._argv:
return target_type.from_args(argv=self._argv, strict=False)
hparams = target_type.from_args(argv=self._argv, strict=False)
for missing_field in missing_fields:
current_hparams[missing_field] = getattr(hparams, missing_field)
return target_type(**current_hparams)
# @classmethod
# def fields(cls) -> Dict[str, Field]:
# return {f.name: f for f in dataclasses.fields(cls)}
================================================
FILE: sequoia/utils/plotting.py
================================================
from dataclasses import dataclass
from typing import List
import matplotlib.pyplot as plt
def autolabel(axis, rects: List[plt.Rectangle], bar_height_scale: float = 1.0):
"""Attach a text label above each bar in *rects*, displaying its height.
Taken from https://matplotlib.org/gallery/lines_bars_and_markers/barchart.html#sphx-glr-gallery-lines-bars-and-markers-barchart-py
"""
for rect in rects:
height = rect.get_height()
bottom = rect.get_y()
value = height / bar_height_scale
if value != 0.0:
axis.annotate(
f"{value:.0%}",
xy=(rect.get_x() + rect.get_width() / 2, bottom + height),
xytext=(0, 3), # 3 points vertical offset
textcoords="offset points",
ha="center",
va="bottom",
)
def maximize_figure():
fig_manager = plt.get_current_fig_manager()
try:
fig_manager.window.showMaximized()
except:
try:
fig_manager.window.state("zoomed") # works fine on Windows!
except:
try:
fig_manager.frame.Maximize(True)
except:
print("Couldn't maximize the figure.")
@dataclass
class PlotSectionLabel:
"""Used to label a section of a plot between `start_step` and `stop_step` with a label of `description`."""
start_step: int
stop_step: int
description: str = ""
@property
def middle(self) -> float:
return (self.start_step + self.stop_step) / 2
@property
def width(self) -> int:
return self.stop_step - self.start_step
def annotate(self, ax: plt.Axes, height: float = -0.1):
"""Annotate the corresponding region of the axis.
Adds vertical lines at the `start_step` and `end_step` along with a text
label for the description in between.
Args:
ax (plt.Axes): An Axis to annotate.
height (float): The height at which to place the text.
"""
ax.axvline(self.start_step, linestyle=":", color="gray")
ax.axvline(self.stop_step, linestyle=":", color="gray")
ax.text(self.middle, height, self.description, ha="center")
================================================
FILE: sequoia/utils/pretrained_utils.py
================================================
from typing import Callable, Optional, Tuple, Union
from torch import nn
from sequoia.utils.logging_utils import get_logger
logger = get_logger(__name__)
def get_pretrained_encoder(
encoder_model: Callable,
pretrained: bool = True,
freeze_pretrained_weights: bool = False,
new_hidden_size: int = None,
) -> Tuple[nn.Module, int]:
"""Returns a pretrained encoder on ImageNet from `torchvision.models`
If `new_hidden_size` is True, will try to replace the classification layer
block with a `nn.Linear(, new_hidden_size)`, where corresponds to the
hidden size of the model. This last layer will always be trainable, even if
`freeze_pretrained_weights` is True.
Args:
encoder_model (Callable): Which encoder model to use. Should usually be
one of the models in the `torchvision.models` module.
pretrained (bool, optional): Wether to try and download the pretrained
weights. Defaults to True.
freeze_pretrained_weights (bool, optional): Wether the pretrained
(downloaded) weights should be frozen. Has no effect when
`pretrained` is False. Defaults to False.
new_hidden_size (int): The hidden size of the resulting model.
Returns:
Tuple[nn.Module, int]: the pretrained encoder, with the classification
head removed, and the resulting output size (hidden dims)
"""
logger.debug(f"Using encoder model {encoder_model.__name__}")
logger.debug(f"pretrained: {pretrained}")
logger.debug(f"freezing the pretrained weights: {freeze_pretrained_weights}")
try:
encoder = encoder_model(pretrained=pretrained)
except TypeError as e:
encoder = encoder_model()
if pretrained and freeze_pretrained_weights:
# Fix the parameters of the model.
for param in encoder.parameters():
param.requires_grad = False
replace_classifier = new_hidden_size is not None
# We want to replace the last layer (the classification layer) with a
# projection from their hidden space dimension to ours.
new_classifier: Optional[nn.Linear] = None
classifier = None
if not replace_classifier:
# We will create the 'new classifier' but then not add it.
# this allows us to also get the 'hidden_size' of the resulting encoder.
new_hidden_size = 1
for attr in ["classifier", "fc"]:
if hasattr(encoder, attr):
classifier: Union[nn.Sequential, nn.Linear] = getattr(encoder, attr)
new_classifier: Optional[nn.Linear] = None
# Get the number of input features.
if isinstance(classifier, nn.Linear):
new_classifier = nn.Linear(
in_features=classifier.in_features, out_features=new_hidden_size
)
elif isinstance(classifier, nn.Sequential):
# if there is a classifier "block", get the number of
# features from the first encountered dense layer.
for layer in classifier.children():
if isinstance(layer, nn.Linear):
new_classifier = nn.Linear(layer.in_features, new_hidden_size)
break
break
if new_classifier is None:
raise RuntimeError(
f"Can't detect the hidden size of the model '{encoder_model.__name__}'!"
f" (last layer is :{classifier}).\n"
)
if not replace_classifier:
new_hidden_size = new_classifier.in_features
new_classifier = nn.Sequential()
else:
logger.debug(
f"Replacing the attribute '{attr}' of the "
f"{encoder_model.__name__} model with a new classifier: "
f"{new_classifier}"
)
setattr(encoder, attr, new_classifier)
return encoder, new_hidden_size
================================================
FILE: sequoia/utils/readme.py
================================================
import os
import textwrap
from contextlib import redirect_stdout
from inspect import getsourcefile
from io import StringIO
from pathlib import Path
from typing import TYPE_CHECKING, List, Type
from sequoia.settings import Setting
if TYPE_CHECKING:
from sequoia.settings import Setting
# NOTE: Update this if we move this `readme.py` somewhere else.
SEQUOIA_ROOT_DIR = Path(os.path.abspath(os.path.dirname(__file__))).parent.parent
def get_relative_path_to(something: Type) -> Path:
"""Attempts to give the relative path from the current working directory to the
file where somethign is defined. If that's not possible, returns an absolute path
instead.
"""
# This isn't quite right: Should be a relative path to the source file:
current_dir = Path.cwd()
source_file = Path(getsourcefile(something)).relative_to(current_dir)
return source_file
def get_tree_string(
root_setting: Type["Setting"] = Setting,
with_methods: bool = False,
with_assumptions: bool = False,
with_docstrings: bool = False,
) -> str:
"""Get a string representation of the tree!
I want to return something like this:
```
"Setting"
├── active
│ └── rl
├── base
└── passive
└── cl
└── task_incremental
└── iid
```
"""
if with_assumptions:
raise NotImplementedError(
f"TODO: display the assumptions for each setting into the tree string " f"somehow."
)
setting: Type["Setting"] = root_setting
# prefix: str = ""
message: List[str] = []
source_file = get_relative_path_to(setting)
message += [f"{setting.get_name()} found in [{setting.__name__}]({source_file})"]
applicable_methods = setting.get_applicable_methods()
n_children = len(setting.get_immediate_children())
bar = "│" if n_children else " "
if with_docstrings:
p = f"{bar} "
docstring = setting.__doc__
# Note: why not use something like textwrap.indent?
message.extend([p + line for line in docstring.splitlines()])
message += [p]
if with_methods:
p = f"{bar} "
message += [f"{p} Applicable methods: "]
for method in applicable_methods:
source_file = get_relative_path_to(method)
message += [f"{p} * [{method.__name__}]({source_file})"]
message += [f"{p} "]
# message = "\n".join(message) + "\n"
# print(f"Children: {setting.get_children()}")
# print(f"Children[0]'s children: {setting.get_children()[0].children}")
for i, child_setting in enumerate(setting.get_immediate_children()):
# Recurse!
child_message = get_tree_string(child_setting)
child_message_lines = child_message.splitlines()
for j, line in enumerate(child_message_lines):
first: str = "x " # just for debugging, shouldn't be an x left after.
if j == 0:
if i == n_children - 1:
# Last child uses different graphic
first = "└──"
else:
first = "├──"
else:
if i == n_children - 1:
first = " "
else:
first = "│ "
message += [first + line]
first_line = f"─ {message[0]}\n"
message_str = "\n".join(message[1:])
message_str = textwrap.indent(message_str, " ")
return first_line + message_str
def get_tree_string_markdown(
root_setting: Type["Setting"] = Setting,
with_methods: bool = False,
with_docstring: bool = False,
):
"""Get a string representation of the tree!
I want to return something like this:
- "Setting"
- active
- rl
- base
- passive
- cl
- task_incremental
* iid
"""
setting = root_setting
message_lines: List[str] = []
source_file = get_relative_path_to(setting)
message_lines += [f"- ## [{setting.__name__}]({source_file})"]
applicable_methods = setting.get_applicable_methods()
tab = " "
if with_docstring:
message_lines += [""]
docstring: str = setting.__doc__
docstring_lines = docstring.splitlines()
# The first line is always less indented than the rest, which looks weird:
first_line = docstring_lines[0].lstrip()
# Remove the common indent in the rest of the docstring lines:
other_lines = textwrap.dedent("\n".join(docstring_lines[1:]))
# re-indent the docstring, with all equal indentation now:
docstring = first_line + "\n" + other_lines
# docstring = textwrap.shorten(docstring, replace_whitespace=False, width=130)
# docstring = textwrap.fill(docstring, max_lines=10)
# print(setting)
# print(docstring)
# exit()
docstring = textwrap.indent(docstring, tab)
message_lines.extend(docstring.splitlines())
message_lines += [""]
if with_methods:
message_lines += [""]
message_lines += ["Applicable methods: "]
for method in applicable_methods:
source_file = get_relative_path_to(method)
message_lines += [f" * [{method.__name__}]({source_file})"]
message_lines += [""]
# message = "\n".join(message) + "\n"
# print(f"Children: {setting.get_children()}")
# print(f"Children[0]'s children: {setting.get_children()[0].children}")
for child_setting in setting.get_immediate_children():
child_message = get_tree_string_markdown(
child_setting, with_methods=with_methods, with_docstring=with_docstring
)
child_message = textwrap.indent(child_message, tab)
message_lines += [""]
message_lines.extend(child_message.splitlines())
message_lines += [""]
return "\n".join(message_lines)
def print_methods():
from sequoia.methods import all_methods
for method in all_methods:
source_file = get_relative_path_to(method)
target_setting: Type["Setting"] = method.target_setting
setting_file = get_relative_path_to(target_setting)
method_name = method.__name__
if method.get_family() != "methods":
method_name = method.get_family() + "." + method_name
print(f"- ## [{method_name}]({source_file}) ")
print()
print(f"\t - Target setting: [{target_setting.__name__}]({setting_file})")
print()
docstring: str = method.__doc__
docstring_lines = docstring.splitlines()
# The first line is always less indented than the rest, which looks weird:
first_line = docstring_lines[0].lstrip()
# Remove the common indent in the rest of the docstring lines:
other_lines = textwrap.dedent("\n".join(docstring_lines[1:]))
# re-indent the docstring, with all equal indentation now:
docstring = first_line + "\n" + other_lines
print(textwrap.indent(docstring, "\t"))
def add_stuff_to_readme(readme_path=Path("README.md"), settings: bool = True, methods: bool = True):
token = "\n"
assert settings or methods
lines: List[str] = []
with open(readme_path) as f:
with StringIO(f.read()) as f:
lines = f.readlines()
if token not in lines:
print("didn't find token!")
exit()
tree_index = lines.index(token) + 1
# print(get_tree_string_markdown(with_methods=False, with_docstring=True))
# exit()
with open(readme_path, "w") as f:
# with nullcontext():
with redirect_stdout(f):
# with nullcontext():
# reversed insert?
# Print the existing lines back:
print(*lines[: tree_index + 1], sep="")
if settings:
print("\n\n## Available Settings:\n")
print()
print(get_tree_string_markdown(with_methods=False, with_docstring=True))
print()
# print("```")
# print(get_tree_string())
# print("```")
if methods:
print("\n\n## Registered Methods (so far):\n")
print_methods()
print()
if __name__ == "__main__":
# print(get_tree_string())
# print(get_tree_string_markdown(with_methods=False, with_docstring=True))
add_stuff_to_readme(readme_path=Path("sequoia/settings/README.md"), methods=False)
add_stuff_to_readme(readme_path=Path("sequoia/methods/README.md"), settings=False)
================================================
FILE: sequoia/utils/serialization.py
================================================
from dataclasses import dataclass, fields
from inspect import isfunction
from pathlib import Path
from typing import Any, Dict, Iterable, Tuple, Type, TypeVar, Union, get_type_hints
import torch
from simple_parsing.helpers import Serializable as SerializableBase
from simple_parsing.helpers.serialization import register_decoding_fn
from sequoia.utils.generic_functions import detach
from .generic_functions.detach import detach
from .generic_functions.move import move
from .logging_utils import get_logger
from .utils import dict_union
register_decoding_fn(torch.device, torch.device)
T = TypeVar("T")
logger = get_logger(__name__)
def cpu(x: Any) -> Any:
return move(x, "cpu")
class Pickleable:
"""Helps make a class pickleable."""
def __getstate__(self):
"""We implement this to just make sure to detach the tensors if any
before pickling.
"""
# We use `vars(self)` to get all the attributes, not just the fields.
state_dict = vars(self)
return cpu(detach(state_dict))
def __setstate__(self, state: Dict):
# logger.debug(f"__setstate__ was called")
self.__dict__.update(state)
S = TypeVar("S", bound="Serializable")
@dataclass
class Serializable(SerializableBase, Pickleable, decode_into_subclasses=True): # type: ignore
# NOTE: This currently doesn't add much compared to `Serializable` from simple-parsing apart
# from not dropping the keys.
def save(self, path: Union[str, Path], **kwargs) -> None:
path = Path(path)
path.parent.mkdir(parents=True, exist_ok=True)
# Save to temp file, so we don't corrupt the save file.
save_path_tmp = path.with_name(path.stem + "_temp" + path.suffix)
# write out to the temp file.
super().save(save_path_tmp, **kwargs)
# Rename the temp file to the right path, overwriting it if it exists.
save_path_tmp.replace(path)
def detach(self: S) -> S:
return type(self)(
**detach(
{
field.name: getattr(self, field.name)
for field in fields(self)
if field.metadata.get("to_dict", True)
}
)
)
def to(self, device: Union[str, torch.device]):
"""Returns a new object with all the attributes 'moved' to `device`.
NOTE: This doesn't implement anything related to the other args like
memory format or dtype.
TODO: Maybe add something to convert everything that is a Tensor or
numpy array to a given dtype?
"""
return type(self)(**{name: move(item, device) for name, item in self.items()})
def items(self) -> Iterable[Tuple[str, Any]]:
for field in fields(self):
yield field.name, getattr(self, field.name)
def cpu(self):
return self.to("cpu")
def cuda(self, device: Union[str, torch.device] = None):
return self.to(device or "cuda")
def merge(self, other: "Serializable") -> "Serializable":
"""Overwrite values in `self` present in 'other' with the values from
`other`.
Also merges child elements recursively.
Returns a new object, i.e. this doesn't modify `self` in-place.
"""
self_dict = self.to_dict()
if isinstance(other, SerializableBase):
other = other.to_dict()
elif not isinstance(other, dict):
raise RuntimeError(f"Can't merge self with {other}.")
return type(self).from_dict(dict_union(self_dict, other))
class decode:
@staticmethod
def register(fn_or_type: Type = None):
"""Decorator to be used to register a decoding function for a given type.
This can be used in two different ways. The type annotation can either be
explicit, like so:
```python
@decode.register(SomeType)
def decode_some_type(v: str):
return SomeType(v) # return an instance of SomeType from a string.
```
or implicitly determined through the return type annotation, like so:
```
@decode.register
def decode_some_type(v: str) -> SomeType:
(...)
```
In the end, this just calls `register_decoding_fn(SomeType, decode_some_type)`.
"""
def _wrapper(fn):
if fn_or_type is not None:
type_ = fn_or_type
else:
type_hints = get_type_hints(fn)
if "return" not in type_hints:
raise RuntimeError(
f"Need to either explicitly pass a type to `register`, or use "
f"a return type annotation (e.g. `-> Foo:`) on the function!"
)
type_ = type_hints["return"]
register_decoding_fn(type_, fn)
return fn
if isfunction(fn_or_type):
fn = fn_or_type
fn_or_type = None
return _wrapper(fn)
return _wrapper
================================================
FILE: sequoia/utils/utils.py
================================================
""" Miscelaneous utility functions. """
import functools
import hashlib
import inspect
import itertools
import operator
import re
import warnings
from collections import defaultdict
from dataclasses import Field, fields
from functools import reduce
from inspect import getsourcefile, isclass
from itertools import filterfalse, groupby
from pathlib import Path
from typing import Any, Callable, Dict, Iterable, List, Optional, Set, Tuple, Type, TypeVar, Union
from simple_parsing import field
from torch import Tensor, cuda
cuda_available = cuda.is_available()
gpus_available = cuda.device_count()
T = TypeVar("T")
K = TypeVar("K")
V = TypeVar("V")
Dataclass = TypeVar("Dataclass")
def field_dict(dataclass: Dataclass) -> Dict[str, Field]:
return {field.name: field for field in fields(dataclass)}
def mean(values: Iterable[T]) -> T:
values = list(values)
return sum(values) / len(values)
def pairwise(iterable: Iterable[T]) -> Iterable[Tuple[T, T]]:
"s -> (s0,s1), (s1,s2), (s2, s3), ..."
a, b = itertools.tee(iterable)
next(b, None)
return zip(a, b)
def n_consecutive(items: Iterable[T], n: int = 2, yield_last_batch=True) -> Iterable[Tuple[T, ...]]:
"""Collect data into chunks of up to `n` elements.
When `yield_last_batch` is True, the final chunk (which might have fewer
than `n` items) will also be yielded.
>>> list(n_consecutive("ABCDEFG", 3))
[('A', 'B', 'C'), ('D', 'E', 'F'), ('G',)]
"""
values: List[T] = []
for item in items:
values.append(item)
if len(values) == n:
yield tuple(values)
values.clear()
if values and yield_last_batch:
yield tuple(values)
def fix_channels(x_batch: Tensor) -> Tensor:
# TODO: Move this to data_utils.py
if x_batch.dim() == 3:
return x_batch.unsqueeze(1)
else:
if x_batch.shape[1] != min(x_batch.shape[1:]):
return x_batch.transpose(1, -1)
else:
return x_batch
def to_dict_of_lists(list_of_dicts: Iterable[Dict[str, Any]]) -> Dict[str, List[Tensor]]:
"""Returns a dict of lists given a list of dicts.
Assumes that all dictionaries have the same keys as the first dictionary.
Args:
list_of_dicts (Iterable[Dict[str, Any]]): An iterable of dicts.
Returns:
Dict[str, List[Tensor]]: A Dict of lists.
"""
result: Dict[str, List[Any]] = defaultdict(list)
for i, d in enumerate(list_of_dicts):
for key, value in d.items():
result[key].append(value)
assert d.keys() == result.keys(), f"Dict {d} at index {i} does not contain all the keys!"
return result
def add_prefix(some_dict: Dict[str, T], prefix: str = "", sep=" ") -> Dict[str, T]:
"""Adds the given prefix to all the keys in the dictionary that don't already start with it.
Parameters
----------
- some_dict : Dict[str, T]
Some dictionary.
- prefix : str, optional, by default ""
A string prefix to append.
- sep : str, optional, by default " "
A string separator to add between the `prefix` and the existing keys
(which do no start by `prefix`).
Returns
-------
Dict[str, T]
A new dictionary where all keys start with the prefix.
Examples:
-------
>>> add_prefix({"a": 1}, prefix="bob", sep="")
{'boba': 1}
>>> add_prefix({"a": 1}, prefix="bob")
{'bob a': 1}
>>> add_prefix({"a": 1}, prefix="a")
{'a': 1}
>>> add_prefix({"a": 1}, prefix="a ")
{'a': 1}
>>> add_prefix({"a": 1}, prefix="a", sep="/")
{'a': 1}
"""
if not prefix:
return some_dict
result: Dict[str, T] = type(some_dict)()
if sep and prefix.endswith(sep):
prefix = prefix.rstrip(sep)
for key, value in some_dict.items():
new_key = key if key.startswith(prefix) else (prefix + sep + key)
result[new_key] = value
return result
def loss_str(loss_tensor: Tensor) -> str:
loss = loss_tensor.item()
if loss == 0:
return "0"
elif abs(loss) < 1e-3 or abs(loss) > 1e3:
return f"{loss:.1e}"
else:
return f"{loss:.3f}"
def set_seed(seed: int):
"""Set the pytorch/numpy random seed."""
import random
import numpy as np
import torch
random.seed(seed)
torch.manual_seed(seed)
np.random.seed(seed)
if torch.cuda.is_available():
torch.cuda.manual_seed_all(seed)
def compute_identity(size: int = 16, **sample) -> str:
"""Compute a unique hash out of a dictionary
Parameters
----------
size: int
size of the unique hash
**sample:
Dictionary to compute the hash from
"""
sample_hash = hashlib.sha256()
for k, v in sorted(sample.items()):
sample_hash.update(k.encode("utf8"))
if isinstance(v, dict):
sample_hash.update(compute_identity(size, **v).encode("utf8"))
else:
sample_hash.update(str(v).encode("utf8"))
return sample_hash.hexdigest()[:size]
def prod(iterable: Iterable[T]) -> T:
"""Like sum() but returns the product of all numbers in the iterable.
>>> prod(range(1, 5))
24
"""
return reduce(operator.mul, iterable, 1)
def common_fields(a, b) -> Iterable[Tuple[str, Tuple[Field, Field]]]:
# If any attributes are common to both the Experiment and the State,
# copy them over to the Experiment.
a_fields = fields(a)
b_fields = fields(b)
for field_a in a_fields:
name_a: str = field_a.name
value_a = getattr(a, field_a.name)
for field_b in b_fields:
name_b: str = field_b.name
value_b = getattr(b, field_b.name)
if name_a == name_b:
yield name_a, (value_a, value_b)
def add_dicts(d1: Dict, d2: Dict, add_values=True) -> Dict:
result = d1.copy()
for key, v2 in d2.items():
if key not in d1:
result[key] = v2
elif isinstance(v2, dict):
result[key] = add_dicts(d1[key], v2, add_values=add_values)
elif not add_values:
result[key] = v2
else:
result[key] = d1[key] + v2
return result
def rsetattr(obj: Any, attr: str, val: Any) -> None:
"""Taken from https://stackoverflow.com/questions/31174295/getattr-and-setattr-on-nested-subobjects-chained-properties"""
pre, _, post = attr.rpartition(".")
return setattr(rgetattr(obj, pre) if pre else obj, post, val)
# using wonder's beautiful simplification: https://stackoverflow.com/questions/31174295/getattr-and-setattr-on-nested-objects/31174427?noredirect=1#comment86638618_31174427
def rgetattr(obj: Any, attr: str, *args):
"""Taken from https://stackoverflow.com/questions/31174295/getattr-and-setattr-on-nested-subobjects-chained-properties"""
def _getattr(obj, attr):
return getattr(obj, attr, *args)
return functools.reduce(_getattr, [obj] + attr.split("."))
def is_nonempty_dir(path: Path) -> bool:
return path.is_dir() and len(list(path.iterdir())) > 0
D = TypeVar("D", bound=Dict)
def flatten_dict(d: D, separator: str = "/") -> D:
"""Flattens the given nested dict, adding `separator` between keys at different nesting levels.
Args:
d (Dict): A nested dictionary
separator (str, optional): Separator to use. Defaults to "/".
Returns:
Dict: A flattened dictionary.
"""
result = type(d)()
for k, v in d.items():
if isinstance(v, dict):
for ki, vi in flatten_dict(v, separator=separator).items():
key = f"{k}{separator}{ki}"
result[key] = vi
else:
result[k] = v
return result
def unique_consecutive(iterable: Iterable[T], key: Callable[[T], Any] = None) -> Iterable[T]:
"""List unique elements, preserving order. Remember only the element just seen.
NOTE: If `key` is passed, it is only used to test for equality, the outputs of `key`
for each sample won't be returned.
>>> list(unique_consecutive('AAAABBBCCDAABBB'))
['A', 'B', 'C', 'D', 'A', 'B']
>>> list(unique_consecutive('ABBCcAD', str.lower))
['A', 'B', 'C', 'A', 'D']
Recipe taken from itertools docs: https://docs.python.org/3/library/itertools.html
"""
return map(next, map(operator.itemgetter(1), groupby(iterable, key)))
def unique_consecutive_with_index(
iterable: Iterable[T], key: Callable[[T], Any] = None
) -> Iterable[Tuple[int, T]]:
"""List unique elements, preserving order. Remember only the element just seen.
Yields tuples of the index and the values.
NOTE: If `key` is passed, it is only used to test for equality, the outputs of `key`
for each sample won't be returned. If you want to save some compute, use a map as
the input.
>>> list(unique_consecutive_with_index('AAAABBBCCDAABBB'))
[(0, 'A'), (4, 'B'), (7, 'C'), (9, 'D'), (10, 'A'), (12, 'B')]
>>> list(unique_consecutive_with_index('ABBCcAD', str.lower))
[(0, 'A'), (1, 'B'), (3, 'C'), (5, 'A'), (6, 'D')]
"""
_key = lambda i_v: key(i_v[1]) if key is not None else i_v[1]
for v, group_iterator in groupby(enumerate(iterable), _key):
index, first_val = next(group_iterator)
yield index, first_val
def roundrobin(*iterables: Iterable[T]) -> Iterable[T]:
"""
roundrobin('ABC', 'D', 'EF') --> A D E B F C
Recipe taken from itertools docs: https://docs.python.org/3/library/itertools.html
"""
# Recipe credited to George Sakkis
num_active = len(iterables)
nexts = itertools.cycle(iter(it).__next__ for it in iterables)
while num_active:
try:
for next_ in nexts:
yield next_()
except StopIteration:
# Remove the iterator we just exhausted from the cycle.
num_active -= 1
nexts = itertools.cycle(itertools.islice(nexts, num_active))
def take(iterable: Iterable[T], n: Optional[int]) -> Iterable[T]:
"""Takes only the first `n` elements from `iterable`.
if `n` is None, returns the entire iterable.
"""
return itertools.islice(iterable, n) if n is not None else iterable
def camel_case(name):
s1 = re.sub("(.)([A-Z][a-z]+)", r"\1_\2", name)
s2 = re.sub("([a-z0-9])([A-Z])", r"\1_\2", s1).lower()
while "__" in s2:
s2 = s2.replace("__", "_")
return s2
def constant(v: T, **kwargs) -> T:
metadata = kwargs.setdefault("metadata", {})
metadata["constant"] = v
metadata["decoding_fn"] = lambda _: v
metadata["to_dict"] = lambda _: v
return field(default=v, init=False, **kwargs)
def flag(default: bool, *args, **kwargs):
return field(default=default, nargs="?", *args, **kwargs)
def dict_union(*dicts: Dict[K, V], recurse: bool = True, dict_factory=dict) -> Dict[K, V]:
"""Simple dict union until we use python 3.9
If `recurse` is True, also does the union of nested dictionaries.
NOTE: The returned dictionary has keys sorted alphabetically.
>>> a = dict(a=1, b=2, c=3)
>>> b = dict(c=5, d=6, e=7)
>>> dict_union(a, b)
{'a': 1, 'b': 2, 'c': 5, 'd': 6, 'e': 7}
>>> a = dict(a=1, b=dict(c=2, d=3))
>>> b = dict(a=2, b=dict(c=3, e=6))
>>> dict_union(a, b)
{'a': 2, 'b': {'c': 3, 'd': 3, 'e': 6}}
"""
result: Dict = dict_factory()
if not dicts:
return result
assert len(dicts) >= 1
all_keys: Set[str] = set()
all_keys.update(*dicts)
all_keys = sorted(all_keys)
# Create a neat generator of generators, to save some memory.
all_values: Iterable[Tuple[V, Iterable[K]]] = (
(k, (d[k] for d in dicts if k in d)) for k in all_keys
)
for k, values in all_values:
sub_dicts: List[Dict] = []
new_value: V = None
n_values = 0
for v in values:
if isinstance(v, dict) and recurse:
sub_dicts.append(v)
else:
# Overwrite the new value for that key.
new_value = v
n_values += 1
if len(sub_dicts) == n_values and recurse:
# We only get here if all values for key `k` were dictionaries,
# and if recurse was True.
new_value = dict_union(*sub_dicts, recurse=True, dict_factory=dict_factory)
result[k] = new_value
return result
K = TypeVar("K")
V = TypeVar("V")
M = TypeVar("M")
def zip_dicts(*dicts: Dict[K, V], missing: M = None) -> Iterable[Tuple[K, Tuple[Union[M, V], ...]]]:
"""Iterator over the union of all keys, giving the value from each dict if
present, else `missing`.
"""
# If any attributes are common to both the Experiment and the State,
# copy them over to the Experiment.
keys = set(itertools.chain(*dicts))
for key in keys:
yield (key, tuple(d.get(key, missing) for d in dicts))
def dict_intersection(*dicts: Dict[K, V]) -> Iterable[Tuple[K, Tuple[V, ...]]]:
"""Gives back an iterator over the keys and values common to all dicts."""
dicts = [dict(d.items()) for d in dicts]
common_keys = set(dicts[0])
for d in dicts:
common_keys.intersection_update(d)
for key in common_keys:
yield (key, tuple(d[key] for d in dicts))
def try_get(d: Dict[K, V], *keys: K, default: V = None) -> Optional[V]:
for k in keys:
try:
return d[k]
except KeyError:
pass
return default
def remove_suffix(s: str, suffix: str) -> str:
"""Remove the suffix from string s if present.
Doing this manually until we start using python 3.9.
>>> remove_suffix("bob.com", ".com")
'bob'
>>> remove_suffix("Henrietta", "match")
'Henrietta'
"""
i = s.rfind(suffix)
if i == -1:
# return s if not found.
return s
return s[:i]
def remove_prefix(s: str, prefix: str) -> str:
"""Remove the prefix from string s if present.
Doing this manually until we start using python 3.9.
>>> remove_prefix("bob.com", "bo")
'b.com'
>>> remove_prefix("Henrietta", "match")
'Henrietta'
"""
if not s.startswith(prefix):
return s
return s[len(prefix) :]
def get_all_subclasses_of(cls: Type[T]) -> Iterable[Type[T]]:
scope_dict: Dict = globals()
for name, var in scope_dict.items():
if isclass(var) and issubclass(var, cls):
yield var
def get_all_concrete_subclasses_of(cls: Type[T]) -> Iterable[Type[T]]:
yield from filterfalse(inspect.isabstract, get_all_subclasses_of(cls))
def get_path_to_source_file(cls: Type) -> Path:
"""Attempts to give a relative path to the given source path. If not possible, then
gives back an absolute path to the source file instead.
"""
cwd = Path.cwd()
source_file = getsourcefile(cls)
assert isinstance(source_file, str), f"can't locate source file for {cls}?"
source_path = Path(source_file).absolute()
try:
return source_path.relative_to(cwd)
except ValueError:
# If we can't find the relative path, for instance when sequoia is
# installed in site_packages (not with `pip install -e .``), give back
# the absolute path instead.
return source_path
def constant_property(fixed_value: T) -> T:
def constant_field(v: T, **kwargs) -> T:
metadata = kwargs.setdefault("metadata", {})
metadata["constant"] = v
metadata["decoding_fn"] = lambda _: v
metadata["to_dict"] = lambda _: v
return field(default=v, init=False, **kwargs)
def setter(_, value: Any):
if isinstance(value, property):
# This happens in the __init__ that is generated by dataclasses, so we
# do nothing here.
pass
elif value != fixed_value:
raise RuntimeError(RuntimeWarning(f"This attribute is fixed at value {fixed_value}."))
def getter(_) -> T:
return fixed_value
return property(fget=getter, fset=setter)
def deprecated_property(old_name: str, new_name: str):
"""Marks a property as being deprecated, redirectly any changes to its value to the
property with name 'new_name'.
"""
def setter(self, value: Any):
warnings.warn(
DeprecationWarning(f"'{old_name}' property is deprecated, use '{new_name}' instead."),
category=DeprecationWarning,
stacklevel=2,
)
if isinstance(value, property):
# This happens in the __init__ that is generated by dataclasses, so we
# do nothing here.
pass
else:
setattr(self, new_name, value)
# raise RuntimeError(f"'{old_name}' property is deprecated, use '{new_name}' instead.")
def getter(self):
warnings.warn(
DeprecationWarning(f"'{old_name}' property is deprecated, use '{new_name}' instead."),
category=DeprecationWarning,
stacklevel=2,
)
return getattr(self, new_name)
doc = f"Deprecated property, Please use '{new_name}' instead."
return property(fget=getter, fset=setter, doc=doc)
if __name__ == "__main__":
import doctest
doctest.testmod()
================================================
FILE: setup.cfg
================================================
[versioneer]
VCS=git
style=pep440-post
versionfile_source=sequoia/_version.py
versionfile_build=sequoia/_version.py
tag_prefix=v
parentdir_prefix=sequoia-
[metadata]
license_file=LICENSE
================================================
FILE: setup.py
================================================
import os
from typing import Dict, List, Union
from setuptools import find_packages, setup
import versioneer
with open(os.path.join(os.path.dirname(__file__), "requirements.txt"), "r") as file:
lines = [ln.strip() for ln in file.readlines()]
packages_to_export = find_packages(where=".", exclude=["tests*", "examples*"], include="sequoia*")
required_packages = [line for line in lines if line and not line.startswith("#")]
extras_require: Dict[str, Union[str, List[str]]] = {
"monsterkong": [
"meta_monsterkong @ git+https://github.com/lebrice/MetaMonsterkong.git#egg=meta_monsterkong"
],
"atari": ["gym[atari] @ git+https://www.github.com/lebrice/gym@easier_custom_spaces#egg=gym"],
"hpo": ["orion>=0.1.15", "orion.algo.skopt>=0.1.6"],
"avalanche": [
"gdown", # BUG: Avalanche needs this to download cub200 dataset.
"avalanche @ git+https://github.com/ContinualAI/avalanche.git@83b3cb9a92b75a59c1b9d31fc6f0dce9436e5fc5#egg=avalanche-lib",
],
# NOTE: Removing this for now, because it has very strict requirements, and includes
# a lot of copy-pasted code, and doesn't really add anything compared to metaworld.
# This isn't right.
# "mtenv": [
# "mtenv @ git+https://github.com/facebookresearch/mtenv.git@main#egg='mtenv[metaworld]'"
# ],
"ctrl": "ctrl-benchmark==0.0.4",
"mujoco": [
"mujoco_py",
],
"metaworld": [
"metaworld @ git+https://github.com/rlworkgroup/metaworld.git@29fe5d6d95cf9ad86f63eac38db8c0aef3837994#egg=metaworld"
],
"sb3": "stable-baselines3==1.2.0",
}
# Add-up all the optional requirements, and then remove any duplicates.
extras_require["all"] = sum(
[
extra_requirements if isinstance(extra_requirements, list) else [extra_requirements]
for extra_requirements in extras_require.values()
],
[],
)
extras_require["all"] = list(set(extras_require["all"]))
extras_require["no_mujoco"] = sum(
[
extra_dependencies if isinstance(extra_dependencies, list) else [extra_dependencies]
for extra_name, extra_dependencies in extras_require.items()
if extra_name not in ["all", "mujoco", "metaworld"]
],
[],
)
extras_require["no_mujoco"] = list(set(extras_require["no_mujoco"]))
setup(
name="sequoia",
version=versioneer.get_version(),
cmdclass=versioneer.get_cmdclass(),
description="The Research Tree - A playground for research at the intersection of Continual, Reinforcement, and Self-Supervised Learning.",
url="https://github.com/lebrice/Sequoia",
author="Fabrice Normandin",
author_email="fabrice.normandin@gmail.com",
license="GPLv3",
packages=packages_to_export,
extras_require=extras_require,
install_requires=required_packages,
python_requires=">=3.7",
tests_require=["pytest"],
classifiers=[
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.7",
"Programming Language :: Python :: 3.8",
"License :: OSI Approved :: GNU General Public License v3 (GPLv3)",
],
entry_points={
"console_scripts": [
"sequoia = sequoia.main:main",
# TODO: This entry-point is added temporarily while we redesign the
# command-line API (See https://github.com/lebrice/Sequoia/issues/47)
# "sequoia_sweep = sequoia.experiments.hpo_sweep:main",
],
},
)
================================================
FILE: versioneer.py
================================================
# Version: 0.19
"""The Versioneer - like a rocketeer, but for versions.
The Versioneer
==============
* like a rocketeer, but for versions!
* https://github.com/python-versioneer/python-versioneer
* Brian Warner
* License: Public Domain
* Compatible with: Python 3.6, 3.7, 3.8, 3.9 and pypy3
* [![Latest Version][pypi-image]][pypi-url]
* [![Build Status][travis-image]][travis-url]
This is a tool for managing a recorded version number in distutils-based
python projects. The goal is to remove the tedious and error-prone "update
the embedded version string" step from your release process. Making a new
release should be as easy as recording a new tag in your version-control
system, and maybe making new tarballs.
## Quick Install
* `pip install versioneer` to somewhere in your $PATH
* add a `[versioneer]` section to your setup.cfg (see [Install](INSTALL.md))
* run `versioneer install` in your source tree, commit the results
* Verify version information with `python setup.py version`
## Version Identifiers
Source trees come from a variety of places:
* a version-control system checkout (mostly used by developers)
* a nightly tarball, produced by build automation
* a snapshot tarball, produced by a web-based VCS browser, like github's
"tarball from tag" feature
* a release tarball, produced by "setup.py sdist", distributed through PyPI
Within each source tree, the version identifier (either a string or a number,
this tool is format-agnostic) can come from a variety of places:
* ask the VCS tool itself, e.g. "git describe" (for checkouts), which knows
about recent "tags" and an absolute revision-id
* the name of the directory into which the tarball was unpacked
* an expanded VCS keyword ($Id$, etc)
* a `_version.py` created by some earlier build step
For released software, the version identifier is closely related to a VCS
tag. Some projects use tag names that include more than just the version
string (e.g. "myproject-1.2" instead of just "1.2"), in which case the tool
needs to strip the tag prefix to extract the version identifier. For
unreleased software (between tags), the version identifier should provide
enough information to help developers recreate the same tree, while also
giving them an idea of roughly how old the tree is (after version 1.2, before
version 1.3). Many VCS systems can report a description that captures this,
for example `git describe --tags --dirty --always` reports things like
"0.7-1-g574ab98-dirty" to indicate that the checkout is one revision past the
0.7 tag, has a unique revision id of "574ab98", and is "dirty" (it has
uncommitted changes).
The version identifier is used for multiple purposes:
* to allow the module to self-identify its version: `myproject.__version__`
* to choose a name and prefix for a 'setup.py sdist' tarball
## Theory of Operation
Versioneer works by adding a special `_version.py` file into your source
tree, where your `__init__.py` can import it. This `_version.py` knows how to
dynamically ask the VCS tool for version information at import time.
`_version.py` also contains `$Revision$` markers, and the installation
process marks `_version.py` to have this marker rewritten with a tag name
during the `git archive` command. As a result, generated tarballs will
contain enough information to get the proper version.
To allow `setup.py` to compute a version too, a `versioneer.py` is added to
the top level of your source tree, next to `setup.py` and the `setup.cfg`
that configures it. This overrides several distutils/setuptools commands to
compute the version when invoked, and changes `setup.py build` and `setup.py
sdist` to replace `_version.py` with a small static file that contains just
the generated version data.
## Installation
See [INSTALL.md](./INSTALL.md) for detailed installation instructions.
## Version-String Flavors
Code which uses Versioneer can learn about its version string at runtime by
importing `_version` from your main `__init__.py` file and running the
`get_versions()` function. From the "outside" (e.g. in `setup.py`), you can
import the top-level `versioneer.py` and run `get_versions()`.
Both functions return a dictionary with different flavors of version
information:
* `['version']`: A condensed version string, rendered using the selected
style. This is the most commonly used value for the project's version
string. The default "pep440" style yields strings like `0.11`,
`0.11+2.g1076c97`, or `0.11+2.g1076c97.dirty`. See the "Styles" section
below for alternative styles.
* `['full-revisionid']`: detailed revision identifier. For Git, this is the
full SHA1 commit id, e.g. "1076c978a8d3cfc70f408fe5974aa6c092c949ac".
* `['date']`: Date and time of the latest `HEAD` commit. For Git, it is the
commit date in ISO 8601 format. This will be None if the date is not
available.
* `['dirty']`: a boolean, True if the tree has uncommitted changes. Note that
this is only accurate if run in a VCS checkout, otherwise it is likely to
be False or None
* `['error']`: if the version string could not be computed, this will be set
to a string describing the problem, otherwise it will be None. It may be
useful to throw an exception in setup.py if this is set, to avoid e.g.
creating tarballs with a version string of "unknown".
Some variants are more useful than others. Including `full-revisionid` in a
bug report should allow developers to reconstruct the exact code being tested
(or indicate the presence of local changes that should be shared with the
developers). `version` is suitable for display in an "about" box or a CLI
`--version` output: it can be easily compared against release notes and lists
of bugs fixed in various releases.
The installer adds the following text to your `__init__.py` to place a basic
version in `YOURPROJECT.__version__`:
from ._version import get_versions
__version__ = get_versions()['version']
del get_versions
## Styles
The setup.cfg `style=` configuration controls how the VCS information is
rendered into a version string.
The default style, "pep440", produces a PEP440-compliant string, equal to the
un-prefixed tag name for actual releases, and containing an additional "local
version" section with more detail for in-between builds. For Git, this is
TAG[+DISTANCE.gHEX[.dirty]] , using information from `git describe --tags
--dirty --always`. For example "0.11+2.g1076c97.dirty" indicates that the
tree is like the "1076c97" commit but has uncommitted changes (".dirty"), and
that this commit is two revisions ("+2") beyond the "0.11" tag. For released
software (exactly equal to a known tag), the identifier will only contain the
stripped tag, e.g. "0.11".
Other styles are available. See [details.md](details.md) in the Versioneer
source tree for descriptions.
## Debugging
Versioneer tries to avoid fatal errors: if something goes wrong, it will tend
to return a version of "0+unknown". To investigate the problem, run `setup.py
version`, which will run the version-lookup code in a verbose mode, and will
display the full contents of `get_versions()` (including the `error` string,
which may help identify what went wrong).
## Known Limitations
Some situations are known to cause problems for Versioneer. This details the
most significant ones. More can be found on Github
[issues page](https://github.com/python-versioneer/python-versioneer/issues).
### Subprojects
Versioneer has limited support for source trees in which `setup.py` is not in
the root directory (e.g. `setup.py` and `.git/` are *not* siblings). The are
two common reasons why `setup.py` might not be in the root:
* Source trees which contain multiple subprojects, such as
[Buildbot](https://github.com/buildbot/buildbot), which contains both
"master" and "slave" subprojects, each with their own `setup.py`,
`setup.cfg`, and `tox.ini`. Projects like these produce multiple PyPI
distributions (and upload multiple independently-installable tarballs).
* Source trees whose main purpose is to contain a C library, but which also
provide bindings to Python (and perhaps other languages) in subdirectories.
Versioneer will look for `.git` in parent directories, and most operations
should get the right version string. However `pip` and `setuptools` have bugs
and implementation details which frequently cause `pip install .` from a
subproject directory to fail to find a correct version string (so it usually
defaults to `0+unknown`).
`pip install --editable .` should work correctly. `setup.py install` might
work too.
Pip-8.1.1 is known to have this problem, but hopefully it will get fixed in
some later version.
[Bug #38](https://github.com/python-versioneer/python-versioneer/issues/38) is tracking
this issue. The discussion in
[PR #61](https://github.com/python-versioneer/python-versioneer/pull/61) describes the
issue from the Versioneer side in more detail.
[pip PR#3176](https://github.com/pypa/pip/pull/3176) and
[pip PR#3615](https://github.com/pypa/pip/pull/3615) contain work to improve
pip to let Versioneer work correctly.
Versioneer-0.16 and earlier only looked for a `.git` directory next to the
`setup.cfg`, so subprojects were completely unsupported with those releases.
### Editable installs with setuptools <= 18.5
`setup.py develop` and `pip install --editable .` allow you to install a
project into a virtualenv once, then continue editing the source code (and
test) without re-installing after every change.
"Entry-point scripts" (`setup(entry_points={"console_scripts": ..})`) are a
convenient way to specify executable scripts that should be installed along
with the python package.
These both work as expected when using modern setuptools. When using
setuptools-18.5 or earlier, however, certain operations will cause
`pkg_resources.DistributionNotFound` errors when running the entrypoint
script, which must be resolved by re-installing the package. This happens
when the install happens with one version, then the egg_info data is
regenerated while a different version is checked out. Many setup.py commands
cause egg_info to be rebuilt (including `sdist`, `wheel`, and installing into
a different virtualenv), so this can be surprising.
[Bug #83](https://github.com/python-versioneer/python-versioneer/issues/83) describes
this one, but upgrading to a newer version of setuptools should probably
resolve it.
## Updating Versioneer
To upgrade your project to a new release of Versioneer, do the following:
* install the new Versioneer (`pip install -U versioneer` or equivalent)
* edit `setup.cfg`, if necessary, to include any new configuration settings
indicated by the release notes. See [UPGRADING](./UPGRADING.md) for details.
* re-run `versioneer install` in your source tree, to replace
`SRC/_version.py`
* commit any changed files
## Future Directions
This tool is designed to make it easily extended to other version-control
systems: all VCS-specific components are in separate directories like
src/git/ . The top-level `versioneer.py` script is assembled from these
components by running make-versioneer.py . In the future, make-versioneer.py
will take a VCS name as an argument, and will construct a version of
`versioneer.py` that is specific to the given VCS. It might also take the
configuration arguments that are currently provided manually during
installation by editing setup.py . Alternatively, it might go the other
direction and include code from all supported VCS systems, reducing the
number of intermediate scripts.
## Similar projects
* [setuptools_scm](https://github.com/pypa/setuptools_scm/) - a non-vendored build-time
dependency
* [minver](https://github.com/jbweston/miniver) - a lightweight reimplementation of
versioneer
## License
To make Versioneer easier to embed, all its code is dedicated to the public
domain. The `_version.py` that it creates is also in the public domain.
Specifically, both are released under the Creative Commons "Public Domain
Dedication" license (CC0-1.0), as described in
https://creativecommons.org/publicdomain/zero/1.0/ .
[pypi-image]: https://img.shields.io/pypi/v/versioneer.svg
[pypi-url]: https://pypi.python.org/pypi/versioneer/
[travis-image]:
https://img.shields.io/travis/com/python-versioneer/python-versioneer.svg
[travis-url]: https://travis-ci.com/github/python-versioneer/python-versioneer
"""
import configparser
import errno
import json
import os
import re
import subprocess
import sys
class VersioneerConfig:
"""Container for Versioneer configuration parameters."""
def get_root():
"""Get the project root directory.
We require that all commands are run from the project root, i.e. the
directory that contains setup.py, setup.cfg, and versioneer.py .
"""
root = os.path.realpath(os.path.abspath(os.getcwd()))
setup_py = os.path.join(root, "setup.py")
versioneer_py = os.path.join(root, "versioneer.py")
if not (os.path.exists(setup_py) or os.path.exists(versioneer_py)):
# allow 'python path/to/setup.py COMMAND'
root = os.path.dirname(os.path.realpath(os.path.abspath(sys.argv[0])))
setup_py = os.path.join(root, "setup.py")
versioneer_py = os.path.join(root, "versioneer.py")
if not (os.path.exists(setup_py) or os.path.exists(versioneer_py)):
err = (
"Versioneer was unable to run the project root directory. "
"Versioneer requires setup.py to be executed from "
"its immediate directory (like 'python setup.py COMMAND'), "
"or in a way that lets it use sys.argv[0] to find the root "
"(like 'python path/to/setup.py COMMAND')."
)
raise VersioneerBadRootError(err)
try:
# Certain runtime workflows (setup.py install/develop in a setuptools
# tree) execute all dependencies in a single python process, so
# "versioneer" may be imported multiple times, and python's shared
# module-import table will cache the first one. So we can't use
# os.path.dirname(__file__), as that will find whichever
# versioneer.py was first imported, even in later projects.
me = os.path.realpath(os.path.abspath(__file__))
me_dir = os.path.normcase(os.path.splitext(me)[0])
vsr_dir = os.path.normcase(os.path.splitext(versioneer_py)[0])
if me_dir != vsr_dir:
print(
"Warning: build in %s is using versioneer.py from %s"
% (os.path.dirname(me), versioneer_py)
)
except NameError:
pass
return root
def get_config_from_root(root):
"""Read the project setup.cfg file to determine Versioneer config."""
# This might raise EnvironmentError (if setup.cfg is missing), or
# configparser.NoSectionError (if it lacks a [versioneer] section), or
# configparser.NoOptionError (if it lacks "VCS="). See the docstring at
# the top of versioneer.py for instructions on writing your setup.cfg .
setup_cfg = os.path.join(root, "setup.cfg")
parser = configparser.ConfigParser()
with open(setup_cfg, "r") as f:
parser.read_file(f)
VCS = parser.get("versioneer", "VCS") # mandatory
def get(parser, name):
if parser.has_option("versioneer", name):
return parser.get("versioneer", name)
return None
cfg = VersioneerConfig()
cfg.VCS = VCS
cfg.style = get(parser, "style") or ""
cfg.versionfile_source = get(parser, "versionfile_source")
cfg.versionfile_build = get(parser, "versionfile_build")
cfg.tag_prefix = get(parser, "tag_prefix")
if cfg.tag_prefix in ("''", '""'):
cfg.tag_prefix = ""
cfg.parentdir_prefix = get(parser, "parentdir_prefix")
cfg.verbose = get(parser, "verbose")
return cfg
class NotThisMethod(Exception):
"""Exception raised if a method is not valid for the current scenario."""
# these dictionaries contain VCS-specific tools
LONG_VERSION_PY = {}
HANDLERS = {}
def register_vcs_handler(vcs, method): # decorator
"""Create decorator to mark a method as the handler of a VCS."""
def decorate(f):
"""Store f in HANDLERS[vcs][method]."""
if vcs not in HANDLERS:
HANDLERS[vcs] = {}
HANDLERS[vcs][method] = f
return f
return decorate
def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, env=None):
"""Call the given command(s)."""
assert isinstance(commands, list)
p = None
for c in commands:
try:
dispcmd = str([c] + args)
# remember shell=False, so use git.cmd on windows, not just git
p = subprocess.Popen(
[c] + args,
cwd=cwd,
env=env,
stdout=subprocess.PIPE,
stderr=(subprocess.PIPE if hide_stderr else None),
)
break
except EnvironmentError:
e = sys.exc_info()[1]
if e.errno == errno.ENOENT:
continue
if verbose:
print("unable to run %s" % dispcmd)
print(e)
return None, None
else:
if verbose:
print("unable to find command, tried %s" % (commands,))
return None, None
stdout = p.communicate()[0].strip().decode()
if p.returncode != 0:
if verbose:
print("unable to run %s (error)" % dispcmd)
print("stdout was %s" % stdout)
return None, p.returncode
return stdout, p.returncode
LONG_VERSION_PY[
"git"
] = r'''
# This file helps to compute a version number in source trees obtained from
# git-archive tarball (such as those provided by githubs download-from-tag
# feature). Distribution tarballs (built by setup.py sdist) and build
# directories (produced by setup.py build) will contain a much shorter file
# that just contains the computed version number.
# This file is released into the public domain. Generated by
# versioneer-0.19 (https://github.com/python-versioneer/python-versioneer)
"""Git implementation of _version.py."""
import errno
import os
import re
import subprocess
import sys
def get_keywords():
"""Get the keywords needed to look up the version information."""
# these strings will be replaced by git during git-archive.
# setup.py/versioneer.py will grep for the variable names, so they must
# each be defined on a line of their own. _version.py will just call
# get_keywords().
git_refnames = "%(DOLLAR)sFormat:%%d%(DOLLAR)s"
git_full = "%(DOLLAR)sFormat:%%H%(DOLLAR)s"
git_date = "%(DOLLAR)sFormat:%%ci%(DOLLAR)s"
keywords = {"refnames": git_refnames, "full": git_full, "date": git_date}
return keywords
class VersioneerConfig:
"""Container for Versioneer configuration parameters."""
def get_config():
"""Create, populate and return the VersioneerConfig() object."""
# these strings are filled in when 'setup.py versioneer' creates
# _version.py
cfg = VersioneerConfig()
cfg.VCS = "git"
cfg.style = "%(STYLE)s"
cfg.tag_prefix = "%(TAG_PREFIX)s"
cfg.parentdir_prefix = "%(PARENTDIR_PREFIX)s"
cfg.versionfile_source = "%(VERSIONFILE_SOURCE)s"
cfg.verbose = False
return cfg
class NotThisMethod(Exception):
"""Exception raised if a method is not valid for the current scenario."""
LONG_VERSION_PY = {}
HANDLERS = {}
def register_vcs_handler(vcs, method): # decorator
"""Create decorator to mark a method as the handler of a VCS."""
def decorate(f):
"""Store f in HANDLERS[vcs][method]."""
if vcs not in HANDLERS:
HANDLERS[vcs] = {}
HANDLERS[vcs][method] = f
return f
return decorate
def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False,
env=None):
"""Call the given command(s)."""
assert isinstance(commands, list)
p = None
for c in commands:
try:
dispcmd = str([c] + args)
# remember shell=False, so use git.cmd on windows, not just git
p = subprocess.Popen([c] + args, cwd=cwd, env=env,
stdout=subprocess.PIPE,
stderr=(subprocess.PIPE if hide_stderr
else None))
break
except EnvironmentError:
e = sys.exc_info()[1]
if e.errno == errno.ENOENT:
continue
if verbose:
print("unable to run %%s" %% dispcmd)
print(e)
return None, None
else:
if verbose:
print("unable to find command, tried %%s" %% (commands,))
return None, None
stdout = p.communicate()[0].strip().decode()
if p.returncode != 0:
if verbose:
print("unable to run %%s (error)" %% dispcmd)
print("stdout was %%s" %% stdout)
return None, p.returncode
return stdout, p.returncode
def versions_from_parentdir(parentdir_prefix, root, verbose):
"""Try to determine the version from the parent directory name.
Source tarballs conventionally unpack into a directory that includes both
the project name and a version string. We will also support searching up
two directory levels for an appropriately named parent directory
"""
rootdirs = []
for i in range(3):
dirname = os.path.basename(root)
if dirname.startswith(parentdir_prefix):
return {"version": dirname[len(parentdir_prefix):],
"full-revisionid": None,
"dirty": False, "error": None, "date": None}
else:
rootdirs.append(root)
root = os.path.dirname(root) # up a level
if verbose:
print("Tried directories %%s but none started with prefix %%s" %%
(str(rootdirs), parentdir_prefix))
raise NotThisMethod("rootdir doesn't start with parentdir_prefix")
@register_vcs_handler("git", "get_keywords")
def git_get_keywords(versionfile_abs):
"""Extract version information from the given file."""
# the code embedded in _version.py can just fetch the value of these
# keywords. When used from setup.py, we don't want to import _version.py,
# so we do it with a regexp instead. This function is not used from
# _version.py.
keywords = {}
try:
f = open(versionfile_abs, "r")
for line in f.readlines():
if line.strip().startswith("git_refnames ="):
mo = re.search(r'=\s*"(.*)"', line)
if mo:
keywords["refnames"] = mo.group(1)
if line.strip().startswith("git_full ="):
mo = re.search(r'=\s*"(.*)"', line)
if mo:
keywords["full"] = mo.group(1)
if line.strip().startswith("git_date ="):
mo = re.search(r'=\s*"(.*)"', line)
if mo:
keywords["date"] = mo.group(1)
f.close()
except EnvironmentError:
pass
return keywords
@register_vcs_handler("git", "keywords")
def git_versions_from_keywords(keywords, tag_prefix, verbose):
"""Get version information from git keywords."""
if not keywords:
raise NotThisMethod("no keywords at all, weird")
date = keywords.get("date")
if date is not None:
# Use only the last line. Previous lines may contain GPG signature
# information.
date = date.splitlines()[-1]
# git-2.2.0 added "%%cI", which expands to an ISO-8601 -compliant
# datestamp. However we prefer "%%ci" (which expands to an "ISO-8601
# -like" string, which we must then edit to make compliant), because
# it's been around since git-1.5.3, and it's too difficult to
# discover which version we're using, or to work around using an
# older one.
date = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
refnames = keywords["refnames"].strip()
if refnames.startswith("$Format"):
if verbose:
print("keywords are unexpanded, not using")
raise NotThisMethod("unexpanded keywords, not a git-archive tarball")
refs = set([r.strip() for r in refnames.strip("()").split(",")])
# starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
# just "foo-1.0". If we see a "tag: " prefix, prefer those.
TAG = "tag: "
tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)])
if not tags:
# Either we're using git < 1.8.3, or there really are no tags. We use
# a heuristic: assume all version tags have a digit. The old git %%d
# expansion behaves like git log --decorate=short and strips out the
# refs/heads/ and refs/tags/ prefixes that would let us distinguish
# between branches and tags. By ignoring refnames without digits, we
# filter out many common branch names like "release" and
# "stabilization", as well as "HEAD" and "master".
tags = set([r for r in refs if re.search(r'\d', r)])
if verbose:
print("discarding '%%s', no digits" %% ",".join(refs - tags))
if verbose:
print("likely tags: %%s" %% ",".join(sorted(tags)))
for ref in sorted(tags):
# sorting will prefer e.g. "2.0" over "2.0rc1"
if ref.startswith(tag_prefix):
r = ref[len(tag_prefix):]
if verbose:
print("picking %%s" %% r)
return {"version": r,
"full-revisionid": keywords["full"].strip(),
"dirty": False, "error": None,
"date": date}
# no suitable tags, so version is "0+unknown", but full hex is still there
if verbose:
print("no suitable tags, using unknown + full revision id")
return {"version": "0+unknown",
"full-revisionid": keywords["full"].strip(),
"dirty": False, "error": "no suitable tags", "date": None}
@register_vcs_handler("git", "pieces_from_vcs")
def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command):
"""Get version from 'git describe' in the root of the source tree.
This only gets called if the git-archive 'subst' keywords were *not*
expanded, and _version.py hasn't already been rewritten with a short
version string, meaning we're inside a checked out source tree.
"""
GITS = ["git"]
if sys.platform == "win32":
GITS = ["git.cmd", "git.exe"]
out, rc = run_command(GITS, ["rev-parse", "--git-dir"], cwd=root,
hide_stderr=True)
if rc != 0:
if verbose:
print("Directory %%s not under git control" %% root)
raise NotThisMethod("'git rev-parse --git-dir' returned error")
# if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty]
# if there isn't one, this yields HEX[-dirty] (no NUM)
describe_out, rc = run_command(GITS, ["describe", "--tags", "--dirty",
"--always", "--long",
"--match", "%%s*" %% tag_prefix],
cwd=root)
# --long was added in git-1.5.5
if describe_out is None:
raise NotThisMethod("'git describe' failed")
describe_out = describe_out.strip()
full_out, rc = run_command(GITS, ["rev-parse", "HEAD"], cwd=root)
if full_out is None:
raise NotThisMethod("'git rev-parse' failed")
full_out = full_out.strip()
pieces = {}
pieces["long"] = full_out
pieces["short"] = full_out[:7] # maybe improved later
pieces["error"] = None
# parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty]
# TAG might have hyphens.
git_describe = describe_out
# look for -dirty suffix
dirty = git_describe.endswith("-dirty")
pieces["dirty"] = dirty
if dirty:
git_describe = git_describe[:git_describe.rindex("-dirty")]
# now we have TAG-NUM-gHEX or HEX
if "-" in git_describe:
# TAG-NUM-gHEX
mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe)
if not mo:
# unparseable. Maybe git-describe is misbehaving?
pieces["error"] = ("unable to parse git-describe output: '%%s'"
%% describe_out)
return pieces
# tag
full_tag = mo.group(1)
if not full_tag.startswith(tag_prefix):
if verbose:
fmt = "tag '%%s' doesn't start with prefix '%%s'"
print(fmt %% (full_tag, tag_prefix))
pieces["error"] = ("tag '%%s' doesn't start with prefix '%%s'"
%% (full_tag, tag_prefix))
return pieces
pieces["closest-tag"] = full_tag[len(tag_prefix):]
# distance: number of commits since tag
pieces["distance"] = int(mo.group(2))
# commit: short hex revision ID
pieces["short"] = mo.group(3)
else:
# HEX: no tags
pieces["closest-tag"] = None
count_out, rc = run_command(GITS, ["rev-list", "HEAD", "--count"],
cwd=root)
pieces["distance"] = int(count_out) # total number of commits
# commit date: see ISO-8601 comment in git_versions_from_keywords()
date = run_command(GITS, ["show", "-s", "--format=%%ci", "HEAD"],
cwd=root)[0].strip()
# Use only the last line. Previous lines may contain GPG signature
# information.
date = date.splitlines()[-1]
pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
return pieces
def plus_or_dot(pieces):
"""Return a + if we don't already have one, else return a ."""
if "+" in pieces.get("closest-tag", ""):
return "."
return "+"
def render_pep440(pieces):
"""Build up version string, with post-release "local version identifier".
Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you
get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty
Exceptions:
1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty]
"""
if pieces["closest-tag"]:
rendered = pieces["closest-tag"]
if pieces["distance"] or pieces["dirty"]:
rendered += plus_or_dot(pieces)
rendered += "%%d.g%%s" %% (pieces["distance"], pieces["short"])
if pieces["dirty"]:
rendered += ".dirty"
else:
# exception #1
rendered = "0+untagged.%%d.g%%s" %% (pieces["distance"],
pieces["short"])
if pieces["dirty"]:
rendered += ".dirty"
return rendered
def render_pep440_pre(pieces):
"""TAG[.post0.devDISTANCE] -- No -dirty.
Exceptions:
1: no tags. 0.post0.devDISTANCE
"""
if pieces["closest-tag"]:
rendered = pieces["closest-tag"]
if pieces["distance"]:
rendered += ".post0.dev%%d" %% pieces["distance"]
else:
# exception #1
rendered = "0.post0.dev%%d" %% pieces["distance"]
return rendered
def render_pep440_post(pieces):
"""TAG[.postDISTANCE[.dev0]+gHEX] .
The ".dev0" means dirty. Note that .dev0 sorts backwards
(a dirty tree will appear "older" than the corresponding clean one),
but you shouldn't be releasing software with -dirty anyways.
Exceptions:
1: no tags. 0.postDISTANCE[.dev0]
"""
if pieces["closest-tag"]:
rendered = pieces["closest-tag"]
if pieces["distance"] or pieces["dirty"]:
rendered += ".post%%d" %% pieces["distance"]
if pieces["dirty"]:
rendered += ".dev0"
rendered += plus_or_dot(pieces)
rendered += "g%%s" %% pieces["short"]
else:
# exception #1
rendered = "0.post%%d" %% pieces["distance"]
if pieces["dirty"]:
rendered += ".dev0"
rendered += "+g%%s" %% pieces["short"]
return rendered
def render_pep440_old(pieces):
"""TAG[.postDISTANCE[.dev0]] .
The ".dev0" means dirty.
Exceptions:
1: no tags. 0.postDISTANCE[.dev0]
"""
if pieces["closest-tag"]:
rendered = pieces["closest-tag"]
if pieces["distance"] or pieces["dirty"]:
rendered += ".post%%d" %% pieces["distance"]
if pieces["dirty"]:
rendered += ".dev0"
else:
# exception #1
rendered = "0.post%%d" %% pieces["distance"]
if pieces["dirty"]:
rendered += ".dev0"
return rendered
def render_git_describe(pieces):
"""TAG[-DISTANCE-gHEX][-dirty].
Like 'git describe --tags --dirty --always'.
Exceptions:
1: no tags. HEX[-dirty] (note: no 'g' prefix)
"""
if pieces["closest-tag"]:
rendered = pieces["closest-tag"]
if pieces["distance"]:
rendered += "-%%d-g%%s" %% (pieces["distance"], pieces["short"])
else:
# exception #1
rendered = pieces["short"]
if pieces["dirty"]:
rendered += "-dirty"
return rendered
def render_git_describe_long(pieces):
"""TAG-DISTANCE-gHEX[-dirty].
Like 'git describe --tags --dirty --always -long'.
The distance/hash is unconditional.
Exceptions:
1: no tags. HEX[-dirty] (note: no 'g' prefix)
"""
if pieces["closest-tag"]:
rendered = pieces["closest-tag"]
rendered += "-%%d-g%%s" %% (pieces["distance"], pieces["short"])
else:
# exception #1
rendered = pieces["short"]
if pieces["dirty"]:
rendered += "-dirty"
return rendered
def render(pieces, style):
"""Render the given version pieces into the requested style."""
if pieces["error"]:
return {"version": "unknown",
"full-revisionid": pieces.get("long"),
"dirty": None,
"error": pieces["error"],
"date": None}
if not style or style == "default":
style = "pep440" # the default
if style == "pep440":
rendered = render_pep440(pieces)
elif style == "pep440-pre":
rendered = render_pep440_pre(pieces)
elif style == "pep440-post":
rendered = render_pep440_post(pieces)
elif style == "pep440-old":
rendered = render_pep440_old(pieces)
elif style == "git-describe":
rendered = render_git_describe(pieces)
elif style == "git-describe-long":
rendered = render_git_describe_long(pieces)
else:
raise ValueError("unknown style '%%s'" %% style)
return {"version": rendered, "full-revisionid": pieces["long"],
"dirty": pieces["dirty"], "error": None,
"date": pieces.get("date")}
def get_versions():
"""Get version information or return default if unable to do so."""
# I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have
# __file__, we can work backwards from there to the root. Some
# py2exe/bbfreeze/non-CPython implementations don't do __file__, in which
# case we can only use expanded keywords.
cfg = get_config()
verbose = cfg.verbose
try:
return git_versions_from_keywords(get_keywords(), cfg.tag_prefix,
verbose)
except NotThisMethod:
pass
try:
root = os.path.realpath(__file__)
# versionfile_source is the relative path from the top of the source
# tree (where the .git directory might live) to this file. Invert
# this to find the root from __file__.
for i in cfg.versionfile_source.split('/'):
root = os.path.dirname(root)
except NameError:
return {"version": "0+unknown", "full-revisionid": None,
"dirty": None,
"error": "unable to find root of source tree",
"date": None}
try:
pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose)
return render(pieces, cfg.style)
except NotThisMethod:
pass
try:
if cfg.parentdir_prefix:
return versions_from_parentdir(cfg.parentdir_prefix, root, verbose)
except NotThisMethod:
pass
return {"version": "0+unknown", "full-revisionid": None,
"dirty": None,
"error": "unable to compute version", "date": None}
'''
@register_vcs_handler("git", "get_keywords")
def git_get_keywords(versionfile_abs):
"""Extract version information from the given file."""
# the code embedded in _version.py can just fetch the value of these
# keywords. When used from setup.py, we don't want to import _version.py,
# so we do it with a regexp instead. This function is not used from
# _version.py.
keywords = {}
try:
f = open(versionfile_abs, "r")
for line in f.readlines():
if line.strip().startswith("git_refnames ="):
mo = re.search(r'=\s*"(.*)"', line)
if mo:
keywords["refnames"] = mo.group(1)
if line.strip().startswith("git_full ="):
mo = re.search(r'=\s*"(.*)"', line)
if mo:
keywords["full"] = mo.group(1)
if line.strip().startswith("git_date ="):
mo = re.search(r'=\s*"(.*)"', line)
if mo:
keywords["date"] = mo.group(1)
f.close()
except EnvironmentError:
pass
return keywords
@register_vcs_handler("git", "keywords")
def git_versions_from_keywords(keywords, tag_prefix, verbose):
"""Get version information from git keywords."""
if not keywords:
raise NotThisMethod("no keywords at all, weird")
date = keywords.get("date")
if date is not None:
# Use only the last line. Previous lines may contain GPG signature
# information.
date = date.splitlines()[-1]
# git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant
# datestamp. However we prefer "%ci" (which expands to an "ISO-8601
# -like" string, which we must then edit to make compliant), because
# it's been around since git-1.5.3, and it's too difficult to
# discover which version we're using, or to work around using an
# older one.
date = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
refnames = keywords["refnames"].strip()
if refnames.startswith("$Format"):
if verbose:
print("keywords are unexpanded, not using")
raise NotThisMethod("unexpanded keywords, not a git-archive tarball")
refs = set([r.strip() for r in refnames.strip("()").split(",")])
# starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
# just "foo-1.0". If we see a "tag: " prefix, prefer those.
TAG = "tag: "
tags = set([r[len(TAG) :] for r in refs if r.startswith(TAG)])
if not tags:
# Either we're using git < 1.8.3, or there really are no tags. We use
# a heuristic: assume all version tags have a digit. The old git %d
# expansion behaves like git log --decorate=short and strips out the
# refs/heads/ and refs/tags/ prefixes that would let us distinguish
# between branches and tags. By ignoring refnames without digits, we
# filter out many common branch names like "release" and
# "stabilization", as well as "HEAD" and "master".
tags = set([r for r in refs if re.search(r"\d", r)])
if verbose:
print("discarding '%s', no digits" % ",".join(refs - tags))
if verbose:
print("likely tags: %s" % ",".join(sorted(tags)))
for ref in sorted(tags):
# sorting will prefer e.g. "2.0" over "2.0rc1"
if ref.startswith(tag_prefix):
r = ref[len(tag_prefix) :]
if verbose:
print("picking %s" % r)
return {
"version": r,
"full-revisionid": keywords["full"].strip(),
"dirty": False,
"error": None,
"date": date,
}
# no suitable tags, so version is "0+unknown", but full hex is still there
if verbose:
print("no suitable tags, using unknown + full revision id")
return {
"version": "0+unknown",
"full-revisionid": keywords["full"].strip(),
"dirty": False,
"error": "no suitable tags",
"date": None,
}
@register_vcs_handler("git", "pieces_from_vcs")
def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command):
"""Get version from 'git describe' in the root of the source tree.
This only gets called if the git-archive 'subst' keywords were *not*
expanded, and _version.py hasn't already been rewritten with a short
version string, meaning we're inside a checked out source tree.
"""
GITS = ["git"]
if sys.platform == "win32":
GITS = ["git.cmd", "git.exe"]
out, rc = run_command(GITS, ["rev-parse", "--git-dir"], cwd=root, hide_stderr=True)
if rc != 0:
if verbose:
print("Directory %s not under git control" % root)
raise NotThisMethod("'git rev-parse --git-dir' returned error")
# if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty]
# if there isn't one, this yields HEX[-dirty] (no NUM)
describe_out, rc = run_command(
GITS,
["describe", "--tags", "--dirty", "--always", "--long", "--match", "%s*" % tag_prefix],
cwd=root,
)
# --long was added in git-1.5.5
if describe_out is None:
raise NotThisMethod("'git describe' failed")
describe_out = describe_out.strip()
full_out, rc = run_command(GITS, ["rev-parse", "HEAD"], cwd=root)
if full_out is None:
raise NotThisMethod("'git rev-parse' failed")
full_out = full_out.strip()
pieces = {}
pieces["long"] = full_out
pieces["short"] = full_out[:7] # maybe improved later
pieces["error"] = None
# parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty]
# TAG might have hyphens.
git_describe = describe_out
# look for -dirty suffix
dirty = git_describe.endswith("-dirty")
pieces["dirty"] = dirty
if dirty:
git_describe = git_describe[: git_describe.rindex("-dirty")]
# now we have TAG-NUM-gHEX or HEX
if "-" in git_describe:
# TAG-NUM-gHEX
mo = re.search(r"^(.+)-(\d+)-g([0-9a-f]+)$", git_describe)
if not mo:
# unparseable. Maybe git-describe is misbehaving?
pieces["error"] = "unable to parse git-describe output: '%s'" % describe_out
return pieces
# tag
full_tag = mo.group(1)
if not full_tag.startswith(tag_prefix):
if verbose:
fmt = "tag '%s' doesn't start with prefix '%s'"
print(fmt % (full_tag, tag_prefix))
pieces["error"] = "tag '%s' doesn't start with prefix '%s'" % (full_tag, tag_prefix)
return pieces
pieces["closest-tag"] = full_tag[len(tag_prefix) :]
# distance: number of commits since tag
pieces["distance"] = int(mo.group(2))
# commit: short hex revision ID
pieces["short"] = mo.group(3)
else:
# HEX: no tags
pieces["closest-tag"] = None
count_out, rc = run_command(GITS, ["rev-list", "HEAD", "--count"], cwd=root)
pieces["distance"] = int(count_out) # total number of commits
# commit date: see ISO-8601 comment in git_versions_from_keywords()
date = run_command(GITS, ["show", "-s", "--format=%ci", "HEAD"], cwd=root)[0].strip()
# Use only the last line. Previous lines may contain GPG signature
# information.
date = date.splitlines()[-1]
pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
return pieces
def do_vcs_install(manifest_in, versionfile_source, ipy):
"""Git-specific installation logic for Versioneer.
For Git, this means creating/changing .gitattributes to mark _version.py
for export-subst keyword substitution.
"""
GITS = ["git"]
if sys.platform == "win32":
GITS = ["git.cmd", "git.exe"]
files = [manifest_in, versionfile_source]
if ipy:
files.append(ipy)
try:
me = __file__
if me.endswith(".pyc") or me.endswith(".pyo"):
me = os.path.splitext(me)[0] + ".py"
versioneer_file = os.path.relpath(me)
except NameError:
versioneer_file = "versioneer.py"
files.append(versioneer_file)
present = False
try:
f = open(".gitattributes", "r")
for line in f.readlines():
if line.strip().startswith(versionfile_source):
if "export-subst" in line.strip().split()[1:]:
present = True
f.close()
except EnvironmentError:
pass
if not present:
f = open(".gitattributes", "a+")
f.write("%s export-subst\n" % versionfile_source)
f.close()
files.append(".gitattributes")
run_command(GITS, ["add", "--"] + files)
def versions_from_parentdir(parentdir_prefix, root, verbose):
"""Try to determine the version from the parent directory name.
Source tarballs conventionally unpack into a directory that includes both
the project name and a version string. We will also support searching up
two directory levels for an appropriately named parent directory
"""
rootdirs = []
for i in range(3):
dirname = os.path.basename(root)
if dirname.startswith(parentdir_prefix):
return {
"version": dirname[len(parentdir_prefix) :],
"full-revisionid": None,
"dirty": False,
"error": None,
"date": None,
}
else:
rootdirs.append(root)
root = os.path.dirname(root) # up a level
if verbose:
print(
"Tried directories %s but none started with prefix %s"
% (str(rootdirs), parentdir_prefix)
)
raise NotThisMethod("rootdir doesn't start with parentdir_prefix")
SHORT_VERSION_PY = """
# This file was generated by 'versioneer.py' (0.19) from
# revision-control system data, or from the parent directory name of an
# unpacked source archive. Distribution tarballs contain a pre-generated copy
# of this file.
import json
version_json = '''
%s
''' # END VERSION_JSON
def get_versions():
return json.loads(version_json)
"""
def versions_from_file(filename):
"""Try to determine the version from _version.py if present."""
try:
with open(filename) as f:
contents = f.read()
except EnvironmentError:
raise NotThisMethod("unable to read _version.py")
mo = re.search(r"version_json = '''\n(.*)''' # END VERSION_JSON", contents, re.M | re.S)
if not mo:
mo = re.search(r"version_json = '''\r\n(.*)''' # END VERSION_JSON", contents, re.M | re.S)
if not mo:
raise NotThisMethod("no version_json in _version.py")
return json.loads(mo.group(1))
def write_to_version_file(filename, versions):
"""Write the given version number to the given _version.py file."""
os.unlink(filename)
contents = json.dumps(versions, sort_keys=True, indent=1, separators=(",", ": "))
with open(filename, "w") as f:
f.write(SHORT_VERSION_PY % contents)
print("set %s to '%s'" % (filename, versions["version"]))
def plus_or_dot(pieces):
"""Return a + if we don't already have one, else return a ."""
if "+" in pieces.get("closest-tag", ""):
return "."
return "+"
def render_pep440(pieces):
"""Build up version string, with post-release "local version identifier".
Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you
get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty
Exceptions:
1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty]
"""
if pieces["closest-tag"]:
rendered = pieces["closest-tag"]
if pieces["distance"] or pieces["dirty"]:
rendered += plus_or_dot(pieces)
rendered += "%d.g%s" % (pieces["distance"], pieces["short"])
if pieces["dirty"]:
rendered += ".dirty"
else:
# exception #1
rendered = "0+untagged.%d.g%s" % (pieces["distance"], pieces["short"])
if pieces["dirty"]:
rendered += ".dirty"
return rendered
def render_pep440_pre(pieces):
"""TAG[.post0.devDISTANCE] -- No -dirty.
Exceptions:
1: no tags. 0.post0.devDISTANCE
"""
if pieces["closest-tag"]:
rendered = pieces["closest-tag"]
if pieces["distance"]:
rendered += ".post0.dev%d" % pieces["distance"]
else:
# exception #1
rendered = "0.post0.dev%d" % pieces["distance"]
return rendered
def render_pep440_post(pieces):
"""TAG[.postDISTANCE[.dev0]+gHEX] .
The ".dev0" means dirty. Note that .dev0 sorts backwards
(a dirty tree will appear "older" than the corresponding clean one),
but you shouldn't be releasing software with -dirty anyways.
Exceptions:
1: no tags. 0.postDISTANCE[.dev0]
"""
if pieces["closest-tag"]:
rendered = pieces["closest-tag"]
if pieces["distance"] or pieces["dirty"]:
rendered += ".post%d" % pieces["distance"]
if pieces["dirty"]:
rendered += ".dev0"
rendered += plus_or_dot(pieces)
rendered += "g%s" % pieces["short"]
else:
# exception #1
rendered = "0.post%d" % pieces["distance"]
if pieces["dirty"]:
rendered += ".dev0"
rendered += "+g%s" % pieces["short"]
return rendered
def render_pep440_old(pieces):
"""TAG[.postDISTANCE[.dev0]] .
The ".dev0" means dirty.
Exceptions:
1: no tags. 0.postDISTANCE[.dev0]
"""
if pieces["closest-tag"]:
rendered = pieces["closest-tag"]
if pieces["distance"] or pieces["dirty"]:
rendered += ".post%d" % pieces["distance"]
if pieces["dirty"]:
rendered += ".dev0"
else:
# exception #1
rendered = "0.post%d" % pieces["distance"]
if pieces["dirty"]:
rendered += ".dev0"
return rendered
def render_git_describe(pieces):
"""TAG[-DISTANCE-gHEX][-dirty].
Like 'git describe --tags --dirty --always'.
Exceptions:
1: no tags. HEX[-dirty] (note: no 'g' prefix)
"""
if pieces["closest-tag"]:
rendered = pieces["closest-tag"]
if pieces["distance"]:
rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
else:
# exception #1
rendered = pieces["short"]
if pieces["dirty"]:
rendered += "-dirty"
return rendered
def render_git_describe_long(pieces):
"""TAG-DISTANCE-gHEX[-dirty].
Like 'git describe --tags --dirty --always -long'.
The distance/hash is unconditional.
Exceptions:
1: no tags. HEX[-dirty] (note: no 'g' prefix)
"""
if pieces["closest-tag"]:
rendered = pieces["closest-tag"]
rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
else:
# exception #1
rendered = pieces["short"]
if pieces["dirty"]:
rendered += "-dirty"
return rendered
def render(pieces, style):
"""Render the given version pieces into the requested style."""
if pieces["error"]:
return {
"version": "unknown",
"full-revisionid": pieces.get("long"),
"dirty": None,
"error": pieces["error"],
"date": None,
}
if not style or style == "default":
style = "pep440" # the default
if style == "pep440":
rendered = render_pep440(pieces)
elif style == "pep440-pre":
rendered = render_pep440_pre(pieces)
elif style == "pep440-post":
rendered = render_pep440_post(pieces)
elif style == "pep440-old":
rendered = render_pep440_old(pieces)
elif style == "git-describe":
rendered = render_git_describe(pieces)
elif style == "git-describe-long":
rendered = render_git_describe_long(pieces)
else:
raise ValueError("unknown style '%s'" % style)
return {
"version": rendered,
"full-revisionid": pieces["long"],
"dirty": pieces["dirty"],
"error": None,
"date": pieces.get("date"),
}
class VersioneerBadRootError(Exception):
"""The project root directory is unknown or missing key files."""
def get_versions(verbose=False):
"""Get the project version from whatever source is available.
Returns dict with two keys: 'version' and 'full'.
"""
if "versioneer" in sys.modules:
# see the discussion in cmdclass.py:get_cmdclass()
del sys.modules["versioneer"]
root = get_root()
cfg = get_config_from_root(root)
assert cfg.VCS is not None, "please set [versioneer]VCS= in setup.cfg"
handlers = HANDLERS.get(cfg.VCS)
assert handlers, "unrecognized VCS '%s'" % cfg.VCS
verbose = verbose or cfg.verbose
assert cfg.versionfile_source is not None, "please set versioneer.versionfile_source"
assert cfg.tag_prefix is not None, "please set versioneer.tag_prefix"
versionfile_abs = os.path.join(root, cfg.versionfile_source)
# extract version from first of: _version.py, VCS command (e.g. 'git
# describe'), parentdir. This is meant to work for developers using a
# source checkout, for users of a tarball created by 'setup.py sdist',
# and for users of a tarball/zipball created by 'git archive' or github's
# download-from-tag feature or the equivalent in other VCSes.
get_keywords_f = handlers.get("get_keywords")
from_keywords_f = handlers.get("keywords")
if get_keywords_f and from_keywords_f:
try:
keywords = get_keywords_f(versionfile_abs)
ver = from_keywords_f(keywords, cfg.tag_prefix, verbose)
if verbose:
print("got version from expanded keyword %s" % ver)
return ver
except NotThisMethod:
pass
try:
ver = versions_from_file(versionfile_abs)
if verbose:
print("got version from file %s %s" % (versionfile_abs, ver))
return ver
except NotThisMethod:
pass
from_vcs_f = handlers.get("pieces_from_vcs")
if from_vcs_f:
try:
pieces = from_vcs_f(cfg.tag_prefix, root, verbose)
ver = render(pieces, cfg.style)
if verbose:
print("got version from VCS %s" % ver)
return ver
except NotThisMethod:
pass
try:
if cfg.parentdir_prefix:
ver = versions_from_parentdir(cfg.parentdir_prefix, root, verbose)
if verbose:
print("got version from parentdir %s" % ver)
return ver
except NotThisMethod:
pass
if verbose:
print("unable to compute version")
return {
"version": "0+unknown",
"full-revisionid": None,
"dirty": None,
"error": "unable to compute version",
"date": None,
}
def get_version():
"""Get the short version string for this project."""
return get_versions()["version"]
def get_cmdclass(cmdclass=None):
"""Get the custom setuptools/distutils subclasses used by Versioneer.
If the package uses a different cmdclass (e.g. one from numpy), it
should be provide as an argument.
"""
if "versioneer" in sys.modules:
del sys.modules["versioneer"]
# this fixes the "python setup.py develop" case (also 'install' and
# 'easy_install .'), in which subdependencies of the main project are
# built (using setup.py bdist_egg) in the same python process. Assume
# a main project A and a dependency B, which use different versions
# of Versioneer. A's setup.py imports A's Versioneer, leaving it in
# sys.modules by the time B's setup.py is executed, causing B to run
# with the wrong versioneer. Setuptools wraps the sub-dep builds in a
# sandbox that restores sys.modules to it's pre-build state, so the
# parent is protected against the child's "import versioneer". By
# removing ourselves from sys.modules here, before the child build
# happens, we protect the child from the parent's versioneer too.
# Also see https://github.com/python-versioneer/python-versioneer/issues/52
cmds = {} if cmdclass is None else cmdclass.copy()
# we add "version" to both distutils and setuptools
from distutils.core import Command
class cmd_version(Command):
description = "report generated version string"
user_options = []
boolean_options = []
def initialize_options(self):
pass
def finalize_options(self):
pass
def run(self):
vers = get_versions(verbose=True)
print("Version: %s" % vers["version"])
print(" full-revisionid: %s" % vers.get("full-revisionid"))
print(" dirty: %s" % vers.get("dirty"))
print(" date: %s" % vers.get("date"))
if vers["error"]:
print(" error: %s" % vers["error"])
cmds["version"] = cmd_version
# we override "build_py" in both distutils and setuptools
#
# most invocation pathways end up running build_py:
# distutils/build -> build_py
# distutils/install -> distutils/build ->..
# setuptools/bdist_wheel -> distutils/install ->..
# setuptools/bdist_egg -> distutils/install_lib -> build_py
# setuptools/install -> bdist_egg ->..
# setuptools/develop -> ?
# pip install:
# copies source tree to a tempdir before running egg_info/etc
# if .git isn't copied too, 'git describe' will fail
# then does setup.py bdist_wheel, or sometimes setup.py install
# setup.py egg_info -> ?
# we override different "build_py" commands for both environments
if "build_py" in cmds:
_build_py = cmds["build_py"]
elif "setuptools" in sys.modules:
from setuptools.command.build_py import build_py as _build_py
else:
from distutils.command.build_py import build_py as _build_py
class cmd_build_py(_build_py):
def run(self):
root = get_root()
cfg = get_config_from_root(root)
versions = get_versions()
_build_py.run(self)
# now locate _version.py in the new build/ directory and replace
# it with an updated value
if cfg.versionfile_build:
target_versionfile = os.path.join(self.build_lib, cfg.versionfile_build)
print("UPDATING %s" % target_versionfile)
write_to_version_file(target_versionfile, versions)
cmds["build_py"] = cmd_build_py
if "setuptools" in sys.modules:
from setuptools.command.build_ext import build_ext as _build_ext
else:
from distutils.command.build_ext import build_ext as _build_ext
class cmd_build_ext(_build_ext):
def run(self):
root = get_root()
cfg = get_config_from_root(root)
versions = get_versions()
_build_ext.run(self)
if self.inplace:
# build_ext --inplace will only build extensions in
# build/lib<..> dir with no _version.py to write to.
# As in place builds will already have a _version.py
# in the module dir, we do not need to write one.
return
# now locate _version.py in the new build/ directory and replace
# it with an updated value
target_versionfile = os.path.join(self.build_lib, cfg.versionfile_source)
print("UPDATING %s" % target_versionfile)
write_to_version_file(target_versionfile, versions)
cmds["build_ext"] = cmd_build_ext
if "cx_Freeze" in sys.modules: # cx_freeze enabled?
from cx_Freeze.dist import build_exe as _build_exe
# nczeczulin reports that py2exe won't like the pep440-style string
# as FILEVERSION, but it can be used for PRODUCTVERSION, e.g.
# setup(console=[{
# "version": versioneer.get_version().split("+", 1)[0], # FILEVERSION
# "product_version": versioneer.get_version(),
# ...
class cmd_build_exe(_build_exe):
def run(self):
root = get_root()
cfg = get_config_from_root(root)
versions = get_versions()
target_versionfile = cfg.versionfile_source
print("UPDATING %s" % target_versionfile)
write_to_version_file(target_versionfile, versions)
_build_exe.run(self)
os.unlink(target_versionfile)
with open(cfg.versionfile_source, "w") as f:
LONG = LONG_VERSION_PY[cfg.VCS]
f.write(
LONG
% {
"DOLLAR": "$",
"STYLE": cfg.style,
"TAG_PREFIX": cfg.tag_prefix,
"PARENTDIR_PREFIX": cfg.parentdir_prefix,
"VERSIONFILE_SOURCE": cfg.versionfile_source,
}
)
cmds["build_exe"] = cmd_build_exe
del cmds["build_py"]
if "py2exe" in sys.modules: # py2exe enabled?
from py2exe.distutils_buildexe import py2exe as _py2exe
class cmd_py2exe(_py2exe):
def run(self):
root = get_root()
cfg = get_config_from_root(root)
versions = get_versions()
target_versionfile = cfg.versionfile_source
print("UPDATING %s" % target_versionfile)
write_to_version_file(target_versionfile, versions)
_py2exe.run(self)
os.unlink(target_versionfile)
with open(cfg.versionfile_source, "w") as f:
LONG = LONG_VERSION_PY[cfg.VCS]
f.write(
LONG
% {
"DOLLAR": "$",
"STYLE": cfg.style,
"TAG_PREFIX": cfg.tag_prefix,
"PARENTDIR_PREFIX": cfg.parentdir_prefix,
"VERSIONFILE_SOURCE": cfg.versionfile_source,
}
)
cmds["py2exe"] = cmd_py2exe
# we override different "sdist" commands for both environments
if "sdist" in cmds:
_sdist = cmds["sdist"]
elif "setuptools" in sys.modules:
from setuptools.command.sdist import sdist as _sdist
else:
from distutils.command.sdist import sdist as _sdist
class cmd_sdist(_sdist):
def run(self):
versions = get_versions()
self._versioneer_generated_versions = versions
# unless we update this, the command will keep using the old
# version
self.distribution.metadata.version = versions["version"]
return _sdist.run(self)
def make_release_tree(self, base_dir, files):
root = get_root()
cfg = get_config_from_root(root)
_sdist.make_release_tree(self, base_dir, files)
# now locate _version.py in the new base_dir directory
# (remembering that it may be a hardlink) and replace it with an
# updated value
target_versionfile = os.path.join(base_dir, cfg.versionfile_source)
print("UPDATING %s" % target_versionfile)
write_to_version_file(target_versionfile, self._versioneer_generated_versions)
cmds["sdist"] = cmd_sdist
return cmds
CONFIG_ERROR = """
setup.cfg is missing the necessary Versioneer configuration. You need
a section like:
[versioneer]
VCS = git
style = pep440
versionfile_source = src/myproject/_version.py
versionfile_build = myproject/_version.py
tag_prefix =
parentdir_prefix = myproject-
You will also need to edit your setup.py to use the results:
import versioneer
setup(version=versioneer.get_version(),
cmdclass=versioneer.get_cmdclass(), ...)
Please read the docstring in ./versioneer.py for configuration instructions,
edit setup.cfg, and re-run the installer or 'python versioneer.py setup'.
"""
SAMPLE_CONFIG = """
# See the docstring in versioneer.py for instructions. Note that you must
# re-run 'versioneer.py setup' after changing this section, and commit the
# resulting files.
[versioneer]
#VCS = git
#style = pep440
#versionfile_source =
#versionfile_build =
#tag_prefix =
#parentdir_prefix =
"""
INIT_PY_SNIPPET = """
from ._version import get_versions
__version__ = get_versions()['version']
del get_versions
"""
def do_setup():
"""Do main VCS-independent setup function for installing Versioneer."""
root = get_root()
try:
cfg = get_config_from_root(root)
except (EnvironmentError, configparser.NoSectionError, configparser.NoOptionError) as e:
if isinstance(e, (EnvironmentError, configparser.NoSectionError)):
print("Adding sample versioneer config to setup.cfg", file=sys.stderr)
with open(os.path.join(root, "setup.cfg"), "a") as f:
f.write(SAMPLE_CONFIG)
print(CONFIG_ERROR, file=sys.stderr)
return 1
print(" creating %s" % cfg.versionfile_source)
with open(cfg.versionfile_source, "w") as f:
LONG = LONG_VERSION_PY[cfg.VCS]
f.write(
LONG
% {
"DOLLAR": "$",
"STYLE": cfg.style,
"TAG_PREFIX": cfg.tag_prefix,
"PARENTDIR_PREFIX": cfg.parentdir_prefix,
"VERSIONFILE_SOURCE": cfg.versionfile_source,
}
)
ipy = os.path.join(os.path.dirname(cfg.versionfile_source), "__init__.py")
if os.path.exists(ipy):
try:
with open(ipy, "r") as f:
old = f.read()
except EnvironmentError:
old = ""
if INIT_PY_SNIPPET not in old:
print(" appending to %s" % ipy)
with open(ipy, "a") as f:
f.write(INIT_PY_SNIPPET)
else:
print(" %s unmodified" % ipy)
else:
print(" %s doesn't exist, ok" % ipy)
ipy = None
# Make sure both the top-level "versioneer.py" and versionfile_source
# (PKG/_version.py, used by runtime code) are in MANIFEST.in, so
# they'll be copied into source distributions. Pip won't be able to
# install the package without this.
manifest_in = os.path.join(root, "MANIFEST.in")
simple_includes = set()
try:
with open(manifest_in, "r") as f:
for line in f:
if line.startswith("include "):
for include in line.split()[1:]:
simple_includes.add(include)
except EnvironmentError:
pass
# That doesn't cover everything MANIFEST.in can do
# (http://docs.python.org/2/distutils/sourcedist.html#commands), so
# it might give some false negatives. Appending redundant 'include'
# lines is safe, though.
if "versioneer.py" not in simple_includes:
print(" appending 'versioneer.py' to MANIFEST.in")
with open(manifest_in, "a") as f:
f.write("include versioneer.py\n")
else:
print(" 'versioneer.py' already in MANIFEST.in")
if cfg.versionfile_source not in simple_includes:
print(" appending versionfile_source ('%s') to MANIFEST.in" % cfg.versionfile_source)
with open(manifest_in, "a") as f:
f.write("include %s\n" % cfg.versionfile_source)
else:
print(" versionfile_source already in MANIFEST.in")
# Make VCS-specific changes. For git, this means creating/changing
# .gitattributes to mark _version.py for export-subst keyword
# substitution.
do_vcs_install(manifest_in, cfg.versionfile_source, ipy)
return 0
def scan_setup_py():
"""Validate the contents of setup.py against Versioneer's expectations."""
found = set()
setters = False
errors = 0
with open("setup.py", "r") as f:
for line in f.readlines():
if "import versioneer" in line:
found.add("import")
if "versioneer.get_cmdclass()" in line:
found.add("cmdclass")
if "versioneer.get_version()" in line:
found.add("get_version")
if "versioneer.VCS" in line:
setters = True
if "versioneer.versionfile_source" in line:
setters = True
if len(found) != 3:
print("")
print("Your setup.py appears to be missing some important items")
print("(but I might be wrong). Please make sure it has something")
print("roughly like the following:")
print("")
print(" import versioneer")
print(" setup( version=versioneer.get_version(),")
print(" cmdclass=versioneer.get_cmdclass(), ...)")
print("")
errors += 1
if setters:
print("You should remove lines like 'versioneer.VCS = ' and")
print("'versioneer.versionfile_source = ' . This configuration")
print("now lives in setup.cfg, and should be removed from setup.py")
print("")
errors += 1
return errors
if __name__ == "__main__":
cmd = sys.argv[1]
if cmd == "setup":
errors = do_setup()
errors += scan_setup_py()
if errors:
sys.exit(1)